In [42]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [102]:
import numpy as np
import pandas as pd
import os
from sklearn.preprocessing import RobustScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn.inspection import permutation_importance
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
import pickle
import tensorflow as tf
from tensorflow.keras import Sequential, layers
from tensorflow.keras.layers import Dense



# IMPORT DATA

In [103]:
folder_path_train = "../data/Match_Diff"

In [104]:
data_dict = {}

# Iterate over files in the folder
for filename in os.listdir(folder_path_train):
    if filename.endswith(".csv"):
        file_path = os.path.join(folder_path_train, filename)
        
        # Extract the key from the filename
        key = filename.replace("Match_Diff_", "").replace(".csv", "")
        
        # Read the CSV file into a DataFrame
        df = pd.read_csv(file_path)
        
        # Delete the first column from the DataFrame
        df = df.drop(df.columns[0], axis=1)
        
        # Add the DataFrame to the dictionary with the key
        data_dict[key] = df

# STARTS MODEL

In [105]:
from get_json import reduce_columns_train

data_dict_red={}

for key, df in data_dict.items():
    
    data_dict_red[key] = reduce_columns_train(data_dict[key])
    


In [106]:
from clean_preprocess import preprocess_train

data_preprop = {}

for key, df in data_dict_red.items():
    
    X = data_dict_red[key].drop(columns="target")
    
    y = data_dict_red[key]["target"]

    X_preprop,transformer = preprocess_train(X)
    
    data_preprop[key] = [X_preprop,y,transformer]

In [107]:
columns_of_interest = data_preprop["IRON"][0].columns.tolist()
columns_of_interest

['killType_KILL_ACE',
 'killType_KILL_FIRST_BLOOD',
 'killType_KILL_MULTI',
 'minionsKilled',
 'monsterType_AIR_DRAGON',
 'monsterType_CHEMTECH_DRAGON',
 'monsterType_EARTH_DRAGON',
 'monsterType_FIRE_DRAGON',
 'monsterType_HEXTECH_DRAGON',
 'monsterType_RIFTHERALD',
 'monsterType_WATER_DRAGON',
 'totalGold',
 'towerType_INNER_TURRET',
 'towerType_OUTER_TURRET']

# LOG MODEL

In [108]:
#Look for the best params and export the model with these

fitted_models = {}

for key, value in data_preprop.items():
    print(key)

    X_preprop = data_preprop[key][0]
    y = data_preprop[key][1]

    X_train, X_test, y_train, y_test = train_test_split(X_preprop, y, test_size=0.2, random_state=42)

    param_grid = {
        'C': [0.1, 1, 10],
        'penalty': ['l1', 'l2'],
        'solver': ['liblinear', 'saga']
    }

    model = LogisticRegression(max_iter=5000)

    grid_search = GridSearchCV(
        estimator=model,
        param_grid=param_grid,
        scoring='accuracy',
        cv=5
    )

    grid_search.fit(X_train, y_train)

    print("Best Hyperparameters: ", grid_search.best_params_)
    print("Best Score: ", grid_search.best_score_)

    best_model = LogisticRegression(
        C=grid_search.best_params_['C'],
        penalty=grid_search.best_params_['penalty'],
        solver=grid_search.best_params_['solver'],
        max_iter=5000
    )
    best_model.fit(X_train, y_train)

    # Save the fitted model in the dictionary using the key as the key
    fitted_models[key] = best_model

    # Export the model as a pickle file. Uncoment if you want
    """
    filename = key + '_model.pkl'
    with open(filename, 'wb') as file:
        pickle.dump(best_model, file)
    print("Model exported as", filename)
    """

IRON
Best Hyperparameters:  {'C': 0.1, 'penalty': 'l1', 'solver': 'liblinear'}
Best Score:  0.7357476220185917
PLATINUM
Best Hyperparameters:  {'C': 0.1, 'penalty': 'l1', 'solver': 'liblinear'}
Best Score:  0.7182283722275502
SILVER
Best Hyperparameters:  {'C': 0.1, 'penalty': 'l2', 'solver': 'liblinear'}
Best Score:  0.7126429818155621
GOLD
Best Hyperparameters:  {'C': 0.1, 'penalty': 'l2', 'solver': 'liblinear'}
Best Score:  0.7137599820730117
BRONZE
Best Hyperparameters:  {'C': 1, 'penalty': 'l1', 'solver': 'liblinear'}
Best Score:  0.7025020061905308
CHALLENGER
Best Hyperparameters:  {'C': 0.1, 'penalty': 'l1', 'solver': 'liblinear'}
Best Score:  0.7233133473241978
MASTER
Best Hyperparameters:  {'C': 1, 'penalty': 'l1', 'solver': 'saga'}
Best Score:  0.7052763483570029
GRANDMASTER
Best Hyperparameters:  {'C': 0.1, 'penalty': 'l1', 'solver': 'saga'}
Best Score:  0.7123748584985016
DIAMOND
Best Hyperparameters:  {'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}
Best Score:  0.70438190

# PREDICTION

In [109]:
pred_folder = "../data/predict"

In [110]:
from get_json import process_folder
from get_diff import calculate_event_differences
from clean_preprocess import preprocess_pred

In [111]:
minute = 10
look_events=["CHAMPION_SPECIAL_KILL","CHAMPION_KILL","ELITE_MONSTER_KILL","BUILDING_KILL"] 
folder_path = pred_folder

df = process_folder(folder_path,minute,look_events)
df


Unnamed: 0,jungleMinionsKilled,minionsKilled,totalGold,xp,magicDamageDone,magicDamageDoneToChampions,magicDamageTaken,physicalDamageDone,physicalDamageDoneToChampions,physicalDamageTaken,...,omnivamp,physicalVamp,power,powerMax,powerRegen,spellVamp,kills,killType_KILL_FIRST_BLOOD,matchId,target
0,40,190,13400,17475,31417,2962,3845,54631,4306,9269,...,7,0,2059,3239,264,0,1,1,LA2_1308803773,1
1,40,185,12659,16157,22675,3434,3563,54528,2836,10067,...,0,0,1669,2700,105,0,1,0,LA2_1308803773,0


In [112]:
df_dif = calculate_event_differences(df)
df_dif

  all_events_diff = all_events_diff.append(resta_filas, ignore_index=True)


Unnamed: 0,matchId,jungleMinionsKilled,minionsKilled,totalGold,xp,magicDamageDone,magicDamageDoneToChampions,magicDamageTaken,physicalDamageDone,physicalDamageDoneToChampions,...,movementSpeed,omnivamp,physicalVamp,power,powerMax,powerRegen,spellVamp,kills,killType_KILL_FIRST_BLOOD,target
0,LA2_1308803773,0,5,741,1318,8742,-472,282,103,1470,...,-45,7,0,390,539,159,0,0,1,1


In [113]:
from get_json import check_and_create_columns

df_dif.drop(columns="matchId",inplace=True)

df_cc = check_and_create_columns(df_dif, columns_of_interest)
df_cc

Unnamed: 0,minionsKilled,totalGold,killType_KILL_FIRST_BLOOD,killType_KILL_ACE,killType_KILL_MULTI,monsterType_AIR_DRAGON,monsterType_CHEMTECH_DRAGON,monsterType_EARTH_DRAGON,monsterType_FIRE_DRAGON,monsterType_HEXTECH_DRAGON,monsterType_RIFTHERALD,monsterType_WATER_DRAGON,towerType_INNER_TURRET,towerType_OUTER_TURRET
0,5,741,1,0,0,0,0,0,0,0,0,0,0,0


In [115]:
X_pred_prep = preprocess_pred(df_cc,data_preprop["IRON"][2])
X_pred_prep

Unnamed: 0,killType_KILL_ACE,killType_KILL_FIRST_BLOOD,killType_KILL_MULTI,minionsKilled,monsterType_AIR_DRAGON,monsterType_CHEMTECH_DRAGON,monsterType_EARTH_DRAGON,monsterType_FIRE_DRAGON,monsterType_HEXTECH_DRAGON,monsterType_RIFTHERALD,monsterType_WATER_DRAGON,totalGold,towerType_INNER_TURRET,towerType_OUTER_TURRET
0,0.0,0.0,0.0,0.155556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.233998,0.0,0.0


In [116]:
model_iron = fitted_models["IRON"]

In [118]:
model_iron.predict_proba(X_pred_prep)

array([[0.39874876, 0.60125124]])