In [20]:
from sklearn.multioutput import MultiOutputClassifier
from sklearn.ensemble import RandomForestClassifier
import joblib
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import pandas as pd
import numpy as np
import json
import os
import sklearn

## Exploratory Data Analysis

In [7]:
df = pd.read_csv("../data/processed/transformed_data.csv")

In [22]:
print("scitkit-learn version: ", sklearn.__version__)

scitkit-learn version:  1.5.1


In [26]:
print(input('How are you?'))

How are you? good


good


## Pipeline / Basic Modeling

In [13]:
# load DataFrame
df = df.copy()

# define input features and target features
input_features = [
    "championId", "matchupChampion", "individualPosition", 
    "kills", "deaths", "assists", 
    "goldEarned", "totalDamageDealt", 
    "totalDamageTaken", "totalHeal", "win"
]
target_features = [
    "Boots_id", "Legendary_1_id", "Legendary_2_id",
    "Keystone", "PrimarySlot1", "PrimarySlot2",
    "PrimarySlot3", "SecondarySlot1", "SecondarySlot2"
]

# one-hot encode individualPosition
one_hot_encoder = OneHotEncoder(drop='first', handle_unknown='ignore')

# scale numerical features
scaler = StandardScaler()

# preprocessor
preprocessor = ColumnTransformer(
    transformers=[
        ("position", one_hot_encoder, ["individualPosition"]),
        ("scaling", scaler, ["kills", "deaths", "assists", "goldEarned", "totalDamageDealt", "totalDamageTaken", "totalHeal"])
    ],
    remainder='passthrough'  # keep championId, matchupChampion, win as-is
)

# create pipeline for the entire data processing
pipeline = Pipeline([
    ("preprocessor", preprocessor)
])

# fit and transform the input features
X = df[input_features]
pipeline.fit_transform(X)

array([[  1.,   0.,   0., ..., 516.,  75.,   0.],
       [  0.,   1.,   0., ..., 254., 887.,   0.],
       [  0.,   0.,   0., ..., 202.,  81.,   0.],
       ...,
       [  0.,   0.,   1., ..., 777., 103.,   0.],
       [  0.,   0.,   0., ...,  22.,  51.,   0.],
       [  0.,   0.,   0., ..., 147., 902.,   0.]])

In [15]:
pipeline_path = "../models/preprocessing_pipeline.pkl"

# save the pipeline to a file
joblib.dump(pipeline, pipeline_path)

['../models/preprocessing_pipeline.pkl']

In [43]:
# fit and transform the input features
X = df[input_features]
X_processed = pipeline.fit_transform(X)

# target features (outputs)
y = df[target_features]

In [19]:
# initialize the base classifier
base_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
multi_target_classifier = MultiOutputClassifier(base_classifier)

# fit the classifier
multi_target_classifier.fit(X_processed, y)

In [31]:
# load champion, item, and rune datasets
with open("../data/raw/champion_data/champions.json", "r") as f:
    champion_data = json.load(f)["data"]

with open("../data/raw/item_data/items.json", "r") as f:
    item_data = json.load(f)["data"]

with open("../data/raw/runes_data/runes.json", "r") as f:
    rune_data = json.load(f)

# create lookup dictionaries
champion_name_to_id = {v["name"].lower(): int(v["key"]) for k, v in champion_data.items()}
champion_id_to_name = {int(v["key"]): v["name"] for k, v in champion_data.items()}

item_id_to_name = {int(k): v["name"] for k, v in item_data.items()}

# extract rune IDs and names properly from runes data
rune_id_to_name = {}
for style in rune_data:
    # add the main style name
    rune_id_to_name[style["id"]] = style["name"]

    # add individual runes within each style
    for slot in style["slots"]:
        for rune in slot["runes"]:
            rune_id_to_name[rune["id"]] = rune["name"]

In [49]:
def predict_optimal_build(champion_name, matchup_champion_name, df, pipeline, model):
    """
    Predict the optimal item build and runes for the given champion and matchup champion.
    
    Parameters:
    - champion_name: str, champion name of the player.
    - matchup_champion_name: str, champion name of the opponent.
    - df: DataFrame, original DataFrame with historical data.
    - pipeline: preprocessing pipeline used for transforming the features.
    - model: trained MultiOutputClassifier model.

    Returns:
    - DataFrame, containing the predicted items and runes.
    """
    # convert champion names to IDs
    champion_id = champion_name_to_id.get(champion_name.lower())
    matchup_champion_id = champion_name_to_id.get(matchup_champion_name.lower())

    if champion_id is None or matchup_champion_id is None:
        raise ValueError(f"Champion name(s) provided are not valid: {champion_name}, {matchup_champion_name}")

    # create a new input DataFrame with average values for other features
    input_data = df[(df['championId'] == champion_id) & (df['matchupChampion'] == matchup_champion_id)].mean().to_dict()

    # override champion-specific fields
    input_data['championId'] = champion_id
    input_data['matchupChampion'] = matchup_champion_id

    # create a DataFrame for input
    input_features = df.columns.difference(target_features)
    input_df = pd.DataFrame([input_data], columns=input_features)

    # preprocess the input features using the pipeline
    input_processed = pipeline.transform(input_df)

    # predict the output
    predicted_output = model.predict(input_processed)

    # convert the prediction to a DataFrame for better readability
    predicted_df = pd.DataFrame(predicted_output, columns=target_features)

    # convert IDs to item and rune names for user-friendly output
    for col in ['Boots_id', 'Legendary_1_id', 'Legendary_2_id']:
        predicted_df[col] = predicted_df[col].apply(lambda x: item_id_to_name.get(int(x), "Unknown Item"))

    for col in ['Keystone', 'PrimarySlot1', 'PrimarySlot2', 'PrimarySlot3', 'SecondarySlot1', 'SecondarySlot2']:
        predicted_df[col] = predicted_df[col].apply(lambda x: rune_id_to_name.get(int(x), "Unknown Rune"))

    return predicted_df

# example usage
champion_name = "Ambessa"  
matchup_champion_name = "Aatrox"  

#prediction function
predicted_build = predict_optimal_build(champion_name, matchup_champion_name, df, pipeline, multi_target_classifier)

# print results
print(f"Suggested Items and Runes for the given matchup: {champion_name} vs {matchup_champion_name}")
print(predicted_build)

Suggested Items and Runes for the given matchup: Ambessa vs Aatrox
       Boots_id Legendary_1_id Legendary_2_id   Keystone PrimarySlot1  \
0  Unknown Item        Eclipse  Black Cleaver  Conqueror      Triumph   

       PrimarySlot2 PrimarySlot3 SecondarySlot1 SecondarySlot2  
0  Legend: Alacrity   Last Stand    Second Wind     Overgrowth  
