In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor, AdaBoostRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import RobustScaler

import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import KFold
from google.colab import drive

In [None]:
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/ChampionStats2.csv')

for 2nd Approach, we used Per-Game Average for feature normalization using 'GP'.

#Data Preprocessing

In [None]:
champion_stats_cleaned = df.copy()

In [None]:
columns_to_convert = ['KP', 'CS%P15', 'DMG%', 'GOLD%', 'W%']
for col in columns_to_convert:
    champion_stats_cleaned[col] = pd.to_numeric(champion_stats_cleaned[col].str.rstrip('%'), errors='coerce') / 100.0

In [None]:
features = ['KDA', 'KP', 'GD10', 'XPD10', 'CSD10', 'CSPM', 'CS%P15', 'DPM', 'DMG%', 'GOLD%', 'WPM', 'WCPM']
for col in features:
    champion_stats_cleaned[col] = pd.to_numeric(champion_stats_cleaned[col], errors='coerce')

In [None]:
champion_stats_cleaned['GP'] = pd.to_numeric(champion_stats_cleaned['GP'], errors='coerce')
champion_stats_cleaned.dropna(inplace=True)

In [None]:
numeric_cols = champion_stats_cleaned.select_dtypes(include=np.number).columns

In [None]:
grouped = champion_stats_cleaned.groupby('Champion')[numeric_cols].median().reset_index()

In [None]:
for col in features:
    grouped[col] = grouped[col] / grouped['GP']

In [None]:
X = grouped[features]
y = grouped['W%']

#Scaler

using Robust Scaler

In [None]:
scaler = RobustScaler()
X_scaled = scaler.fit_transform(X)

#Gridsearch

In [None]:
models = {
    'GradientBoostingRegressor': GradientBoostingRegressor(),
    'RandomForestRegressor': RandomForestRegressor(),
    'AdaBoostRegressor': AdaBoostRegressor(base_estimator=DecisionTreeRegressor()),
    'XGBRegressor': XGBRegressor(),
    'DecisionTreeRegressor': DecisionTreeRegressor()
}

In [None]:
param_grids = {
    'GradientBoostingRegressor': {
        'learning_rate': [0.01, 0.1, 0.2],
        'max_depth': [3, 4, 5, 6],
        'n_estimators': [50, 100, 200]
    },
    'RandomForestRegressor': {
        'max_depth': [3, 4, 5, 6],
        'n_estimators': [50, 100, 200]
    },
    'AdaBoostRegressor': {
        'n_estimators': [50, 100, 200],
        'learning_rate': [0.01, 0.1, 0.2]
    },
    'XGBRegressor': {
        'learning_rate': [0.01, 0.1, 0.2],
        'max_depth': [3, 4, 5, 6],
        'n_estimators': [50, 100, 200]
    },
    'DecisionTreeRegressor': {
        'max_depth': [3, 4, 5, 6],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4]
    }
}

In [None]:
best_models = {}

In [None]:
for model_name in models.keys():
    print(f"Training {model_name}...")
    model = models[model_name]
    param_grid = param_grids[model_name]
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=kf, scoring='neg_root_mean_squared_error', n_jobs=-1)
    grid_search.fit(X_scaled, y)

    best_params = grid_search.best_params_
    best_score = -grid_search.best_score_
    best_model = grid_search.best_estimator_

    best_models[model_name] = {
        'best_params': best_params,
        'best_score': best_score,
        'best_model': best_model
    }

    print(f"Best Parameters for {model_name}: {best_params}")
    print(f"Best RMSE for {model_name}: {best_score}")

Training GradientBoostingRegressor...
Best Parameters for GradientBoostingRegressor: {'learning_rate': 0.2, 'max_depth': 4, 'n_estimators': 50}
Best RMSE for GradientBoostingRegressor: 0.13414115886428388
Training RandomForestRegressor...
Best Parameters for RandomForestRegressor: {'max_depth': 5, 'n_estimators': 50}
Best RMSE for RandomForestRegressor: 0.132506018984827
Training AdaBoostRegressor...




Best Parameters for AdaBoostRegressor: {'learning_rate': 0.01, 'n_estimators': 50}
Best RMSE for AdaBoostRegressor: 0.13508057155511516
Training XGBRegressor...
Best Parameters for XGBRegressor: {'learning_rate': 0.01, 'max_depth': 5, 'n_estimators': 200}
Best RMSE for XGBRegressor: 0.129233532248452
Training DecisionTreeRegressor...
Best Parameters for DecisionTreeRegressor: {'max_depth': 6, 'min_samples_leaf': 4, 'min_samples_split': 5}
Best RMSE for DecisionTreeRegressor: 0.13399798101395832


In [None]:
for model_name, model_info in best_models.items():
    best_model = model_info['best_model']
    rmse_scores = cross_val_score(best_model, X_scaled, y, cv=kf, scoring='neg_root_mean_squared_error')
    rmse_scores = -rmse_scores  # Convert negative RMSE scores to positive
    print(f"Cross-Validation RMSE Scores for {model_name}: {rmse_scores}")
    print(f"Mean RMSE for {model_name}: {rmse_scores.mean()}")
    print(f"Standard Deviation of RMSE for {model_name}: {rmse_scores.std()}")

Cross-Validation RMSE Scores for GradientBoostingRegressor: [0.13187753 0.12070632 0.11621634 0.17371299 0.13198788]
Mean RMSE for GradientBoostingRegressor: 0.1349002137299909
Standard Deviation of RMSE for GradientBoostingRegressor: 0.02036960300251874
Cross-Validation RMSE Scores for RandomForestRegressor: [0.18959968 0.11349858 0.11347048 0.15483106 0.11758691]
Mean RMSE for RandomForestRegressor: 0.13779734067162117
Standard Deviation of RMSE for RandomForestRegressor: 0.0302136304020978




Cross-Validation RMSE Scores for AdaBoostRegressor: [0.19634751 0.11193958 0.12241192 0.1421806  0.13399567]
Mean RMSE for AdaBoostRegressor: 0.14137505323315364
Standard Deviation of RMSE for AdaBoostRegressor: 0.029336403413821086
Cross-Validation RMSE Scores for XGBRegressor: [0.1194521  0.1398257  0.119467   0.14827342 0.11914944]
Mean RMSE for XGBRegressor: 0.129233532248452
Standard Deviation of RMSE for XGBRegressor: 0.01238920360320956
Cross-Validation RMSE Scores for DecisionTreeRegressor: [0.13748015 0.10903494 0.13406616 0.15026337 0.13471307]
Mean RMSE for DecisionTreeRegressor: 0.13311153786060984
Standard Deviation of RMSE for DecisionTreeRegressor: 0.013389762358101909


In [None]:
param_grid_xgb = {
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
    'max_depth': [3, 4, 5, 6],
    'n_estimators': [50, 100, 200, 300],
    'subsample': [0.7, 0.8, 0.9, 1.0],
    'colsample_bytree': [0.7, 0.8, 0.9, 1.0]
}


In [None]:
grid_search_xgb = GridSearchCV(estimator=XGBRegressor(), param_grid=param_grid_xgb, cv=kf, scoring='neg_root_mean_squared_error', n_jobs=-1)

In [None]:
grid_search_xgb.fit(X_scaled, y)

In [None]:
best_params_xgb = grid_search_xgb.best_params_
best_score_xgb = -grid_search_xgb.best_score_
best_model_xgb = grid_search_xgb.best_estimator_

In [None]:
print(f'Best Parameters for XGBRegressor after further tuning: {best_params_xgb}')
print(f'Best RMSE for XGBRegressor after further tuning: {best_score_xgb}')

Best Parameters for XGBRegressor after further tuning: {'colsample_bytree': 0.8, 'learning_rate': 0.1, 'max_depth': 6, 'n_estimators': 200, 'subsample': 0.7}
Best RMSE for XGBRegressor after further tuning: 0.1264045044528733


#Result

Best Parameters for GradientBoostingRegressor: {'learning_rate': 0.2, 'max_depth': 4, 'n_estimators': 50}

Best RMSE for GradientBoostingRegressor: 0.13414115886428388

Best Parameters for RandomForestRegressor: {'max_depth': 5, 'n_estimators': 50}

Best RMSE for RandomForestRegressor: 0.132506018984827

Best Parameters for AdaBoostRegressor: {'learning_rate': 0.01, 'n_estimators': 50}

Best RMSE for AdaBoostRegressor: 0.13508057155511516

Best Parameters for XGBRegressor: {'learning_rate': 0.01, 'max_depth': 5, 'n_estimators': 200}

Best RMSE for XGBRegressor: 0.129233532248452

Best Parameters for DecisionTreeRegressor: {'max_depth': 6, 'min_samples_leaf': 4, 'min_samples_split': 5}

Best RMSE for DecisionTreeRegressor: 0.13399798101395832

Best Parameters for XGBRegressor after further tuning: {'colsample_bytree': 0.8, 'learning_rate': 0.1, 'max_depth': 6, 'n_estimators': 200, 'subsample': 0.7}

Best RMSE for XGBRegressor after further tuning: 0.1264045044528733