In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load the dataset into a DataFrame
df = pd.read_csv("C:\\ML_assignment\Steam Releases.csv")

# Select the columns of interest
X = df[["peak_players", "positive_reviews", "negative_reviews"]]
y = df["rating"]

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Instantiate the Gradient Boosting Regressor model
model = GradientBoostingRegressor(
    n_estimators=100,  # Number of boosting stages to perform
    learning_rate=0.1,  # Learning rate shrinks the contribution of each tree
    max_depth=3,  # Maximum depth of the individual trees
    random_state=42  # Seed used by the random number generator
)

# Fit the model on the training data
model.fit(X_train, y_train)

# Predict the ratings on the test data
y_pred = model.predict(X_test)

# Evaluate the performance of the model
mse = mean_squared_error(y_test, y_pred)  # Mean squared error
mae = mean_absolute_error(y_test, y_pred)  # Mean absolute error
r2 = r2_score(y_test, y_pred)  # R-squared

# Print the evaluation metrics
print(f"Mean Squared Error: {mse:.5f}")
print(f"Mean Absolute Error: {mae:.2f}")
print(f"R-squared: {r2:.2f}")