# <span style="font-width:bold; font-size: 3rem; color:#1EB182;"> **Champion prediction** </span><span style="font-width:bold; font-size: 3rem; color:#333;">- Batch Inference</span>

## 🗒️ This notebook is divided into the following sections:

1. Download model and batch inference data
2. Make predictions

## <span style='color:#ff5f27'> 📝 Imports

In [1]:
import pandas as pd
import numpy as np
import os
from sklearn.svm import SVC
from xgboost import XGBClassifier
import xgboost as xgb
from util.helper import * 
import util.helper as helpfun


## <span style="color:#ff5f27;"> Get data features</span>

In [2]:
def calculate_champ_variety_score(df):
    df = df.copy()  # Create a copy to avoid warnings
    
    # Create a list of champion columns we want to check
    champ_columns = [
        'most_champ_1', 'most_champ_2', 'most_champ_3',
        '7d_champ_1', '7d_champ_2', '7d_champ_3'
    ]
    
    # Filter to only include columns that exist in the DataFrame
    existing_columns = [col for col in champ_columns if col in df.columns]
    
    # Function to count unique non-NaN values
    def count_unique_champions(row):
        # Get all values that are not NaN
        valid_champions = row[existing_columns].dropna()
        # Count unique values
        return len(set(valid_champions))
    
    # Calculate the score for each row
    df['champ_variety_score'] = df.apply(count_unique_champions, axis=1)
    
    return df

def calculate_playstyle(df):
    df = df.copy()
    
    # Playstyle categorization (0-5)
    conditions = [
        # 0: Assassin/Carry (high kills, high KDA, high kill participation)
        (df['avg_kills'] > df['avg_assists']) & 
        (df['kda_ratio_profile'] > 3) & 
        (df['kill_participation_profile'] > 0.6),
        
        # 1: Support/Utility (high assists, good KDA, high kill participation)
        (df['avg_assists'] > df['avg_kills']) & 
        (df['kda_ratio_profile'] > 2.5) & 
        (df['kill_participation_profile'] > 0.55),
        
        # 2: Tank/Initiator (moderate deaths, high assists, high kill participation)
        (df['avg_deaths'] > 3) & 
        (df['avg_assists'] > df['avg_kills']) & 
        (df['kill_participation_profile'] > 0.5),
        
        # 3: Split-pusher (lower kill participation, good KDA)
        (df['kill_participation_profile'] < 0.5) & 
        (df['kda_ratio_profile'] > 2),
        
        # 4: Aggressive/Fighter (high kills and deaths, high kill participation)
        (df['avg_kills'] > 3) & 
        (df['avg_deaths'] > 4) & 
        (df['kill_participation_profile'] > 0.55)
    ]
    
    values = [0, 1, 2, 3, 4]  # Numeric values for each playstyle
    df['playstyle'] = np.select(conditions, values, default=5)
    
    return df

In [3]:
current_dir = os.getcwd()

# Construct path
data_path = os.path.join(current_dir, 'util', 'data', 'feature_eng_stats.csv')
training_df = pd.read_csv(data_path)

training_df = training_df.dropna(subset=['champion'])

#Clean table and convert string to num, remove future stats
training_df = convert_training_df(training_df)

training_df = calculate_champ_variety_score(training_df)

training_df = calculate_playstyle(training_df)

# Add top 3 highest score features
top_champ_scores = training_df.apply(get_top_champion_scores, n=5, axis=1) #champion variety 5
training_df = pd.concat([training_df, top_champ_scores], axis=1)

# List of columns to remove
date = ['date']
recent_stats = ['total_games', 'wins', 'losses', 'win_rate', 'avg_kills', 'avg_deaths', 'avg_assists', 'kda_ratio_profile', 'kill_participation_profile']
recent_champs = ['most_champ_1', 'most_champ_2']
recent_champs_2 = ['most_champ_3']
recent_champs_stats = ['WR_1', 'W_1', 'L_1', 'KDA_1','WR_2', 'W_2', 'L_2', 'KDA_2', 'WR_3', 'W_3', 'L_3', 'KDA_3']
preferred_roles = ['TOP', 'JUNGLE', 'MID', 'ADC', 'SUPPORT']  
most_roles = ['most_role_1', 'most_role_2']
most_roles_value= ['most_role_1_value', 'most_role_2_value']
season_champ_top3 = ['season_champ_1', 'season_champ_2''season_champ_3']
season_stats = [ 'cs_ssn_1', 'cpm_ssn_1', 'kda_ssn_1', 
    'k_ssn_1', 'd_ssn_1', 'a_ssn_1', 'wr_ssn_1', 'games_ssn_1',  'cs_ssn_2', 
    'cpm_ssn_2', 'kda_ssn_2', 'k_ssn_2', 'd_ssn_2', 'a_ssn_2', 'wr_ssn_2', 'games_ssn_2', 
     'cs_ssn_3', 'cpm_ssn_3', 'kda_ssn_3', 'k_ssn_3', 'd_ssn_3', 'a_ssn_3', 
    'wr_ssn_3', 'games_ssn_3', 'season_champ_4', 'cs_ssn_4', 'cpm_ssn_4', 'kda_ssn_4', 'k_ssn_4', 
    'd_ssn_4', 'a_ssn_4', 'wr_ssn_4', 'games_ssn_4', 'season_champ_5', 'cs_ssn_5', 'cpm_ssn_5', 
    'kda_ssn_5', 'k_ssn_5', 'd_ssn_5', 'a_ssn_5', 'wr_ssn_5', 'games_ssn_5', 'season_champ_6', 
    'cs_ssn_6', 'cpm_ssn_6', 'kda_ssn_6', 'k_ssn_6', 'd_ssn_6', 'a_ssn_6', 'wr_ssn_6', 'games_ssn_6', 
    'season_champ_7', 'cs_ssn_7', 'cpm_ssn_7', 'kda_ssn_7', 'k_ssn_7', 'd_ssn_7', 'a_ssn_7', 
    'wr_ssn_7', 'games_ssn_7']
weekly_stats_1=['7d_champ_1', '7d_total_1',  '7d_WR_1', '7d_champ_2', '7d_total_2',  '7d_WR_2', '7d_champ_3', '7d_total_3', '7d_WR_3']
weekly_stats_2=['7d_W_1', '7d_L_1', '7d_W_2', '7d_L_2','7d_W_3', '7d_L_3']
mastery_stats=['mastery_champ_1', 'm_lv_1', 'mastery_champ_2', 'm_lv_2', 
    'mastery_champ_3', 'm_lv_3', 'mastery_champ_4', 'm_lv_4', 'mastery_champ_5', 'm_lv_5', 
    'mastery_champ_6', 'm_lv_6', 'mastery_champ_7', 'm_lv_7', 'mastery_champ_8', 'm_lv_8', 
    'mastery_champ_9', 'm_lv_9', 'mastery_champ_10', 'm_lv_10', 'mastery_champ_11', 'm_lv_11', 
    'mastery_champ_12', 'm_lv_12', 'mastery_champ_13', 'm_lv_13', 'mastery_champ_14', 'm_lv_14', 
    'mastery_champ_15', 'm_lv_15', 'mastery_champ_16', 'm_lv_16']
champions = [
            "Aatrox", "Ahri", "Akali", "Akshan", "Alistar", "Ambessa", "Amumu", "Anivia", "Annie", "Aphelios", "Ashe", "Aurelion Sol",
            "Aurora", "Azir", "Bard", "Bel'Veth", "Blitzcrank", "Brand", "Braum", "Briar", "Caitlyn", "Camille", "Cassiopeia", "Cho'Gath",
            "Corki", "Darius", "Diana", "Dr. Mundo", "Draven", "Ekko", "Elise", "Evelynn", "Ezreal", "Fiddlesticks", "Fiora", "Fizz", "Galio",
            "Gangplank", "Garen", "Gnar", "Gragas", "Graves", "Gwen", "Hecarim", "Heimerdinger", "Hwei", "Illaoi", "Irelia", "Ivern", "Janna",
            "Jarvan IV", "Jax", "Jayce", "Jhin", "Jinx", "K'Sante", "Kai'Sa", "Kalista", "Karma", "Karthus", "Kassadin", "Katarina", "Kayle",
            "Kayn", "Kennen", "Kha'Zix", "Kindred", "Kled", "Kog'Maw", "LeBlanc", "Lee Sin", "Leona", "Lillia", "Lissandra", "Lucian", "Lulu",
            "Lux", "Malphite", "Malzahar", "Maokai", "Master Yi", "Milio", "Miss Fortune", "Mordekaiser", "Morgana", "Naafiri", "Nami", "Nasus",
            "Nautilus", "Neeko", "Nidalee", "Nilah", "Nocturne", "Nunu & Willump", "Olaf", "Orianna", "Ornn", "Pantheon", "Poppy", "Pyke",
            "Qiyana", "Quinn", "Rakan", "Rammus", "Rek'Sai", "Rell", "Renata Glasc", "Renekton", "Rengar", "Riven", "Rumble", "Ryze", "Samira",
            "Sejuani", "Senna", "Seraphine", "Sett", "Shaco", "Shen", "Shyvana", "Singed", "Sion", "Sivir", "Skarner", "Smolder", "Sona",
            "Soraka", "Swain", "Sylas", "Syndra", "Tahm Kench", "Taliyah", "Talon", "Taric", "Teemo", "Thresh", "Tristana", "Trundle",
            "Tryndamere", "Twisted Fate", "Twitch", "Udyr", "Urgot", "Varus", "Vayne", "Veigar", "Vel'Koz", "Vex", "Vi", "Viego", "Viktor",
            "Vladimir", "Volibear", "Warwick", "Wukong", "Xayah", "Xerath", "Xin Zhao", "Yasuo", "Yone", "Yorick", "Yuumi", "Zac", "Zed",
            "Zeri", "Ziggs", "Zilean", "Zoe", "Zyra"
        ]
champion_score = ['1_champ_score', '2_champ_score', '3_champ_score', '4_champ_score', '5_champ_score']
# Combine all columns to remove into a single list
columns_to_remove = []

# Include categories conditionally
columns_to_remove += date
columns_to_remove += recent_stats
#columns_to_remove += recent_champs
columns_to_remove += recent_champs_2
columns_to_remove += recent_champs_stats
columns_to_remove += preferred_roles
columns_to_remove += most_roles
#columns_to_remove += most_roles_value
#columns_to_remove += season_champ_top3
columns_to_remove += season_stats
columns_to_remove += weekly_stats_1
columns_to_remove += weekly_stats_2
columns_to_remove += mastery_stats
columns_to_remove += champions
columns_to_remove += champion_score

# Drop the specified columns
training_df = training_df.drop(columns=columns_to_remove)

# Display the resulting DataFrame
print(training_df)

       champion  region  team  avg_tier  team_champ1  team_champ2  \
0           130       1     2         1           40           71   
1            19       1     1         2          111           71   
2            37       1     1         2           42           48   
3           125       1     1         2            6           91   
4            96       1     2         2           80           93   
...         ...     ...   ...       ...          ...          ...   
10326       129       2     2         2           29          138   
10327       159       2     2         2          124            2   
10328       160       2     2         1           52          124   
10329       160       2     2         3          110           67   
10330        71       2     2         1           78           46   

       team_champ3  team_champ4  opp_champ1  opp_champ2  ...  season_champ_1  \
0               33          106         129         150  ...           129.0   
1          

## <span style="color:#ff5f27;">🪝 Get model & Prediction</span>

In [4]:
import joblib
import warnings
warnings.filterwarnings('ignore')  # Suppress warnings

# Define the path to your saved models
current_dir = os.getcwd()
save_dir = os.path.join(current_dir, 'model')

# Method 1: Load as Booster (more reliable)
model_path = os.path.join(save_dir, 'champion_predictor.json')
loaded_model = xgb.Booster()
loaded_model.load_model(model_path)

# Load the label encoder
encoder_path = os.path.join(save_dir, 'label_encoder.joblib')
loaded_label_encoder = joblib.load(encoder_path)


In [8]:
def prepare_test_data(training_df, num_rows=1, random=False):
    """
    Prepare test data from training DataFrame
    """
    if random:
        test_data = training_df.sample(n=num_rows)
    else:
        test_data = training_df.head(num_rows)
    
    # Store actual champions
    actual_champions = test_data['champion'].copy()
    
    # Remove the target column for prediction
    test_features = test_data.drop('champion', axis=1)
    
    return test_features, actual_champions

def predict_single_champion(features):
    """
    Make a single champion prediction for given features
    """
    try:
        # Handle missing values
        features = features.fillna(-1)
        
        # Make prediction using predict_proba
        pred_proba = loaded_model.predict_proba(features)
        
        # Get the highest probability prediction
        pred_idx = np.argmax(pred_proba, axis=1)[0]
        confidence = pred_proba[0][pred_idx]
        
        # Get raw prediction
        raw_prediction = loaded_model.predict(features)[0]
        
        # Convert to champion name
        champion = loaded_label_encoder.inverse_transform([raw_prediction])[0]
        
        return champion, confidence
        
    except Exception as e:
        print(f"Error details:")
        print(f"Features shape: {features.shape}")
        print(f"Feature columns: {features.columns.tolist()}")
        print(f"Error message: {str(e)}")
        return None, 0.0

def test_predictions(features, actual_champions=None):
    """
    Test the model with given features
    """
    for idx, row in features.iterrows():
        print(f"\nPrediction for row {idx}:")
        
        # Get single row as DataFrame
        single_row = pd.DataFrame([row])
        
        # Print feature values for debugging
        print("\nFeature values:")
        for col in single_row.columns:
            print(f"{col}: {single_row[col].values[0]}")
        
        # Get prediction
        champion, confidence = predict_single_champion(single_row)
        
        # Print actual champion if provided
        if actual_champions is not None:
            print(f"\nActual Champion: {actual_champions[idx]}")
        
        # Print prediction
        print(f"Predicted Champion: {champion}")
        print(f"Confidence: {confidence:.2f}")
        
        # Check if prediction is correct
        if actual_champions is not None:
            actual = actual_champions[idx]
            if actual == champion:
                print("Correct prediction! ✓")
            else:
                print("Incorrect prediction ✗")

In [10]:
# Test with single row
print("Testing with single row:")
test_features_single, actual_champions_single = prepare_test_data(training_df, num_rows=1)
test_predictions(test_features_single, actual_champions_single)

Testing with single row:

Prediction for row 0:

Feature values:
region: 1.0
team: 2.0
avg_tier: 1.0
team_champ1: 40.0
team_champ2: 71.0
team_champ3: 33.0
team_champ4: 106.0
opp_champ1: 129.0
opp_champ2: 150.0
opp_champ3: 96.0
opp_champ4: 144.0
opp_champ5: 100.0
most_champ_1: 151.0
most_champ_2: 33.0
most_role_1_value: 0.55
most_role_2_value: 0.2
season_champ_1: 129.0
season_champ_2: 46.0
season_champ_3: 160.0
champ_variety_score: 4.0
playstyle: 2.0
1_champ_name: 151.0
2_champ_name: 25.0
3_champ_name: 160.0
4_champ_name: 54.0
5_champ_name: 53.0
Error details:
Features shape: (1, 26)
Feature columns: ['region', 'team', 'avg_tier', 'team_champ1', 'team_champ2', 'team_champ3', 'team_champ4', 'opp_champ1', 'opp_champ2', 'opp_champ3', 'opp_champ4', 'opp_champ5', 'most_champ_1', 'most_champ_2', 'most_role_1_value', 'most_role_2_value', 'season_champ_1', 'season_champ_2', 'season_champ_3', 'champ_variety_score', 'playstyle', '1_champ_name', '2_champ_name', '3_champ_name', '4_champ_name', '5_c

In [15]:

# Test with first 5 rows
print("\nTesting with first 5 rows:")
test_features_five, actual_champions_five = prepare_test_data(training_df, num_rows=5)
test_predictions(test_features_five, actual_champions_five)


Testing with first 5 rows:

Prediction for row 0:
Error during prediction: invalid index to scalar variable.
Actual Champion: 130
Predicted Champion: None
Confidence: 0.00
Incorrect prediction ✗

Prediction for row 1:
Error during prediction: invalid index to scalar variable.
Actual Champion: 19
Predicted Champion: None
Confidence: 0.00
Incorrect prediction ✗

Prediction for row 2:
Error during prediction: invalid index to scalar variable.
Actual Champion: 37
Predicted Champion: None
Confidence: 0.00
Incorrect prediction ✗

Prediction for row 3:
Error during prediction: invalid index to scalar variable.
Actual Champion: 125
Predicted Champion: None
Confidence: 0.00
Incorrect prediction ✗

Prediction for row 4:
Error during prediction: invalid index to scalar variable.
Actual Champion: 96
Predicted Champion: None
Confidence: 0.00
Incorrect prediction ✗
