In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor, AdaBoostRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import RobustScaler

In [None]:
import numpy as np

In [None]:
from sklearn.tree import DecisionTreeRegressor

In [None]:
from sklearn.model_selection import KFold

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/ChampionStats2.csv')

This approach is for completely different table, this table is all about the champion stats for the last 2 weeks.

The game developers updates the champion stats (damage, HP, attack speed, Ability Power, Ability stats etc) and buff/nerf (Strong/Weak) them based on a number of things.

Our target is to predict the win% for any specific champion.

for 1st approach we will use 'GP' (Games played) as Weighted Average.

The idea is to account for the variability in the number of games played by weighting each statistic according to the number of games.

#Data Preprocessing

In [None]:
champion_stats_cleaned = df.copy()

datatype in the columns below are object, changing them into numeric

In [None]:
columns_to_convert = ['KP', 'CS%P15', 'DMG%', 'GOLD%', 'W%']

In [None]:
for col in columns_to_convert:
    champion_stats_cleaned[col] = pd.to_numeric(champion_stats_cleaned[col].str.rstrip('%'), errors='coerce') / 100.0

In [None]:
features = ['KDA', 'KP', 'GD10', 'XPD10', 'CSD10', 'CSPM', 'CS%P15', 'DPM', 'DMG%', 'GOLD%', 'WPM', 'WCPM']
for col in features:
    champion_stats_cleaned[col] = pd.to_numeric(champion_stats_cleaned[col], errors='coerce')

In [None]:
champion_stats_cleaned['GP'] = pd.to_numeric(champion_stats_cleaned['GP'], errors='coerce')

In [None]:
champion_stats_cleaned.dropna(inplace=True)

In [None]:
def weighted_average(group, weights_column, feature_columns):
    weighted_stats = {}
    for column in feature_columns:
        weighted_stats[column] = np.average(group[column], weights=group[weights_column])
    return pd.Series(weighted_stats, index=feature_columns)

In [None]:
grouped = champion_stats_cleaned.groupby('Champion', group_keys=False).apply(lambda x: weighted_average(x, 'GP', features + ['W%'])).reset_index()

In [None]:
grouped['GP'] = champion_stats_cleaned.groupby('Champion')['GP'].sum().values

In [None]:
for col in features:
    grouped[col] = grouped[col] / grouped['GP']

In [None]:
X = grouped[features]
y = grouped['W%']

#Scaler

Using robustScaler

In [None]:
scaler = RobustScaler()
X_scaled = scaler.fit_transform(X)

#Gridsearch

In [None]:
models = {
    'GradientBoostingRegressor': GradientBoostingRegressor(),
    'RandomForestRegressor': RandomForestRegressor(),
    'AdaBoostRegressor': AdaBoostRegressor(base_estimator=DecisionTreeRegressor()),
    'XGBRegressor': XGBRegressor(),
    'DecisionTreeRegressor': DecisionTreeRegressor()
}

In [None]:
param_grids = {
    'GradientBoostingRegressor': {
        'learning_rate': [0.01, 0.1, 0.2],
        'max_depth': [3, 4, 5,6],
        'n_estimators': [50, 100, 200]
    },
    'RandomForestRegressor': {
        'max_depth': [3, 4, 5,6],
        'n_estimators': [50, 100, 200]
    },
    'AdaBoostRegressor': {
        'n_estimators': [50, 100, 200],
        'learning_rate': [0.01, 0.1, 0.2]
    },
    'XGBRegressor': {
        'learning_rate': [0.01, 0.1, 0.2],
        'max_depth': [3, 4, 5,6],
        'n_estimators': [50, 100, 200]
    },
    'DecisionTreeRegressor': {
        'max_depth': [3, 4, 5,6],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4]
    }
}

In [None]:
best_models = {}

In [None]:
for model_name in models.keys():
    print(f"Training {model_name}...")
    model = models[model_name]
    param_grid = param_grids[model_name]
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=kf, scoring='neg_root_mean_squared_error', n_jobs=-1)
    grid_search.fit(X_scaled, y)

    best_params = grid_search.best_params_
    best_score = -grid_search.best_score_
    best_model = grid_search.best_estimator_

    best_models[model_name] = {
        'best_params': best_params,
        'best_score': best_score,
        'best_model': best_model
    }

    print(f"Best Parameters for {model_name}: {best_params}")
    print(f"Best RMSE for {model_name}: {best_score}")

Training GradientBoostingRegressor...
Best Parameters for GradientBoostingRegressor: {'learning_rate': 0.2, 'max_depth': 3, 'n_estimators': 200}
Best RMSE for GradientBoostingRegressor: 0.10059718643944131
Training RandomForestRegressor...
Best Parameters for RandomForestRegressor: {'max_depth': 3, 'n_estimators': 100}
Best RMSE for RandomForestRegressor: 0.10612540227462228
Training AdaBoostRegressor...




Best Parameters for AdaBoostRegressor: {'learning_rate': 0.1, 'n_estimators': 100}
Best RMSE for AdaBoostRegressor: 0.11614557963810605
Training XGBRegressor...
Best Parameters for XGBRegressor: {'learning_rate': 0.01, 'max_depth': 4, 'n_estimators': 200}
Best RMSE for XGBRegressor: 0.09640847484713663
Training DecisionTreeRegressor...
Best Parameters for DecisionTreeRegressor: {'max_depth': 3, 'min_samples_leaf': 4, 'min_samples_split': 2}
Best RMSE for DecisionTreeRegressor: 0.12034523087328797


In [None]:
for model_name, model_info in best_models.items():
    best_model = model_info['best_model']
    rmse_scores = cross_val_score(best_model, X_scaled, y, cv=kf, scoring='neg_root_mean_squared_error')
    rmse_scores = -rmse_scores  # Convert negative RMSE scores to positive
    print(f"Cross-Validation RMSE Scores for {model_name}: {rmse_scores}")
    print(f"Mean RMSE for {model_name}: {rmse_scores.mean()}")
    print(f"Standard Deviation of RMSE for {model_name}: {rmse_scores.std()}")

Cross-Validation RMSE Scores for GradientBoostingRegressor: [0.13434312 0.12687351 0.06519249 0.13371226 0.08191567]
Mean RMSE for GradientBoostingRegressor: 0.10840740828361381
Standard Deviation of RMSE for GradientBoostingRegressor: 0.029063153856441835
Cross-Validation RMSE Scores for RandomForestRegressor: [0.18367305 0.13019605 0.06792064 0.13122828 0.06482235]
Mean RMSE for RandomForestRegressor: 0.1155680744428097
Standard Deviation of RMSE for RandomForestRegressor: 0.04459353098344766




Cross-Validation RMSE Scores for AdaBoostRegressor: [0.21048364 0.16123554 0.05498536 0.13166155 0.10222959]
Mean RMSE for AdaBoostRegressor: 0.13211913741636414
Standard Deviation of RMSE for AdaBoostRegressor: 0.0525965309086866
Cross-Validation RMSE Scores for XGBRegressor: [0.09861313 0.1420368  0.05595791 0.119061   0.06637353]
Mean RMSE for XGBRegressor: 0.09640847484713663
Standard Deviation of RMSE for XGBRegressor: 0.03205709790337392
Cross-Validation RMSE Scores for DecisionTreeRegressor: [0.20870215 0.14150659 0.05912087 0.12616965 0.06622689]
Mean RMSE for DecisionTreeRegressor: 0.12034523087328797
Standard Deviation of RMSE for DecisionTreeRegressor: 0.05470994147314158


In [None]:
param_grid_xgb = {
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
    'max_depth': [3, 4, 5, 6],
    'n_estimators': [50, 100, 200, 300],
    'subsample': [0.7, 0.8, 0.9, 1.0],
    'colsample_bytree': [0.7, 0.8, 0.9, 1.0]
}

In [None]:
kf = KFold(n_splits=5, shuffle=True, random_state=42)

In [None]:
grid_search_xgb = GridSearchCV(estimator=XGBRegressor(), param_grid=param_grid_xgb, cv=kf, scoring='neg_root_mean_squared_error', n_jobs=-1)

In [None]:
grid_search_xgb.fit(X_scaled, y)

In [None]:
best_params_xgb = grid_search_xgb.best_params_
best_score_xgb = -grid_search_xgb.best_score_
best_model_xgb = grid_search_xgb.best_estimator_

In [None]:
print(f'Best Parameters for XGBRegressor after further tuning: {best_params_xgb}')
print(f'Best RMSE for XGBRegressor after further tuning: {best_score_xgb}')

Best Parameters for XGBRegressor after further tuning: {'colsample_bytree': 1.0, 'learning_rate': 0.05, 'max_depth': 4, 'n_estimators': 50, 'subsample': 1.0}
Best RMSE for XGBRegressor after further tuning: 0.0954044318385116


#Result

Best Parameters for GradientBoostingRegressor: {'learning_rate': 0.2, 'max_depth': 3, 'n_estimators': 200}

Best RMSE for GradientBoostingRegressor: 0.10059718643944131

Best Parameters for RandomForestRegressor: {'max_depth': 3, 'n_estimators': 100}

Best RMSE for RandomForestRegressor: 0.10612540227462228

Best Parameters for AdaBoostRegressor: {'learning_rate': 0.1, 'n_estimators': 100}

Best RMSE for AdaBoostRegressor: 0.11614557963810605

Best Parameters for XGBRegressor: {'learning_rate': 0.01, 'max_depth': 4, 'n_estimators': 200}

Best RMSE for XGBRegressor: 0.09640847484713663

Best Parameters for DecisionTreeRegressor: {'max_depth': 3, 'min_samples_leaf': 4, 'min_samples_split': 2}

Best RMSE for DecisionTreeRegressor: 0.12034523087328797

Best Parameters for XGBRegressor after further tuning: {'colsample_bytree': 1.0, 'learning_rate': 0.05, 'max_depth': 4, 'n_estimators': 50, 'subsample': 1.0}

Best RMSE for XGBRegressor after further tuning: 0.0954044318385116