<a href="https://colab.research.google.com/github/jovansx/siap-project/blob/develop/siap-script.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install ydata-profiling
from google.colab import drive
drive.mount('/content/drive')

import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn import tree
from sklearn.preprocessing import StandardScaler, MinMaxScaler, MaxAbsScaler, RobustScaler
from ydata_profiling import ProfileReport

# Methods

In [2]:
MinMaxScaler,
def convert_champion_json_to_map():
  df_champs = pd.read_json('/content/drive/MyDrive/SIAP Data/champion_info.json')
  champ_map = {}

  for ch in df_champs.data:
    champ_map[ch["id"]] = ch["name"]
  return champ_map

def generate_df2(champ_map, champ_id, champ_name, df1):
  df2 = pd.DataFrame(list(champ_map.items()), columns = [champ_id,champ_name])
  merged = pd.merge(df1, df2, on=champ_id)
  dropped = merged.drop(columns=[champ_id], axis=1)
  return dropped

def merge(champ_map, df1):
  df1 = generate_df2(champ_map, 't1_champ1id', 't1_champ1name', df1)
  df1 = generate_df2(champ_map, 't1_champ2id', 't1_champ2name', df1)
  df1 = generate_df2(champ_map, 't1_champ3id', 't1_champ3name', df1)
  df1 = generate_df2(champ_map, 't1_champ4id', 't1_champ4name', df1)
  df1 = generate_df2(champ_map, 't1_champ5id', 't1_champ5name', df1)
  df1 = generate_df2(champ_map, 't2_champ1id', 't2_champ1name', df1)
  df1 = generate_df2(champ_map, 't2_champ2id', 't2_champ2name', df1)
  df1 = generate_df2(champ_map, 't2_champ3id', 't2_champ3name', df1)
  df1 = generate_df2(champ_map, 't2_champ4id', 't2_champ4name', df1)
  df1 = generate_df2(champ_map, 't2_champ5id', 't2_champ5name', df1)
  return df1

def delete_columns(df, columns_deletion):
  return df.drop(columns=columns_deletion, axis=1)

def preprocess_dataframe(df_stats):
  df_stats = delete_columns(df_stats, ["Class", "Role", "Tier", "Trend", "Role %", "Pick %", "Ban %", "KDA"])   # Remove columns
  df_stats['Win %'] = df_stats['Win %'].str[:5].astype(float)                                                   # Remove % and convert to float
  distinct_names = set(df_stats['Name'])                                                                        # Distinct names
  new_df_stats = pd.DataFrame({'Name': [], 'Score': [], 'Win': []})                                             # New empty dataframe of stats

  for name in distinct_names:
    sub_df_stats = df_stats.loc[df_stats['Name'] == name]
    score = sub_df_stats["Score"].mean(axis=0)
    win_rate = sub_df_stats["Win %"].mean(axis=0)
    one_row_df = pd.DataFrame({"Name": [name], "Score": [score], "Win": [win_rate]})
    new_df_stats = new_df_stats.append(one_row_df)                                                              # Append average values for every champion
  new_df_stats.index = range(1,len(new_df_stats)+1)                                                             # Set incremental indexes
  return new_df_stats

def merge_games_with_stats_helper(df_games, df_stats, old_score, new_score, old_win, new_win, name):
  df_stats.columns = df_stats.columns.str.replace(old_score, new_score)
  df_stats.columns = df_stats.columns.str.replace(old_win, new_win)
  df_games.columns = df_games.columns.str.replace(name, "Name")
  df_games = pd.merge(df_games, df_stats, on="Name")
  df_games = df_games.drop(columns=["Name"], axis=1)
  return df_games, df_stats

def merge_games_with_stats(df_games, df_stats):
  df_games, df_stats = merge_games_with_stats_helper(df_games, df_stats, "Score", "t1_champ1score", "Win", "t1_champ1win", "t1_champ1name")
  df_games, df_stats = merge_games_with_stats_helper(df_games, df_stats, "t1_champ1score", "t1_champ2score", "t1_champ1win", "t1_champ2win", "t1_champ2name")
  df_games, df_stats = merge_games_with_stats_helper(df_games, df_stats, "t1_champ2score", "t1_champ3score", "t1_champ2win", "t1_champ3win", "t1_champ3name")
  df_games, df_stats = merge_games_with_stats_helper(df_games, df_stats, "t1_champ3score", "t1_champ4score", "t1_champ3win", "t1_champ4win", "t1_champ4name")
  df_games, df_stats = merge_games_with_stats_helper(df_games, df_stats, "t1_champ4score", "t1_champ5score", "t1_champ4win", "t1_champ5win", "t1_champ5name")
  df_games, df_stats = merge_games_with_stats_helper(df_games, df_stats, "t1_champ5score", "t2_champ1score", "t1_champ5win", "t2_champ1win", "t2_champ1name")
  df_games, df_stats = merge_games_with_stats_helper(df_games, df_stats, "t2_champ1score", "t2_champ2score", "t2_champ1win", "t2_champ2win", "t2_champ2name")
  df_games, df_stats = merge_games_with_stats_helper(df_games, df_stats, "t2_champ2score", "t2_champ3score", "t2_champ2win", "t2_champ3win", "t2_champ3name")
  df_games, df_stats = merge_games_with_stats_helper(df_games, df_stats, "t2_champ3score", "t2_champ4score", "t2_champ3win", "t2_champ4win", "t2_champ4name")
  df_games, df_stats = merge_games_with_stats_helper(df_games, df_stats, "t2_champ4score", "t2_champ5score", "t2_champ4win", "t2_champ5win", "t2_champ5name")
  return df_games

def create_scaler(df_games):
  scaler = StandardScaler()
  return scaler.fit(df_games)
  # return (df_games-df_games.mean())/df_games.std()

def extract_y_from_dataframe(df_games):
  df_games_y = df_games["winner"]
  df_games = df_games.drop(columns=["winner"], axis=1)
  return df_games, df_games_y


# Main 

In [26]:
# Read data
df_games = pd.read_csv('/content/drive/MyDrive/SIAP Data/games.csv')
df_stats = pd.read_csv('/content/drive/MyDrive/SIAP Data/League of Legends Champion Stats 12.1.csv', delimiter=';')

# Replace champion ids with names
champ_map = convert_champion_json_to_map()
df_games = merge(champ_map, df_games)

# Preprocess data of df_stats
df_stats = preprocess_dataframe(df_stats)

# Merge read data into single dataframe
df_games = merge_games_with_stats(df_games, df_stats)

# Leave chosen columns
df_games = df_games[["winner", "t1_champ1win", "t2_champ1win", "t1_champ2win", "t2_champ2win", "t1_champ3win", "t2_champ3win", "t1_champ4win", "t2_champ4win", "t1_champ5win", "t2_champ5win",
                     "firstBlood", "firstTower"]]

# Shuffle data in dataframe
df_games = df_games.sample(frac = 1)

# Extract y values into separate dataframe, and drop it from existing one
df_games = df_games.drop_duplicates()
df_games_x, df_games_y = extract_y_from_dataframe(df_games)

# Analyzing data
# prof = ProfileReport(df_games_x)
# prof.to_file(output_file='output_after_dropping_dragon.html')

# Split data to train and test
X_train, X_test, y_train, y_test = train_test_split(df_games_x, df_games_y, test_size = 0.30)

# Create Standard scaler based on X_train -> transform X_train and X_test using that scaler
scaler = create_scaler(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

# Create models
random_forest_classifier = RandomForestClassifier() 
linear_svc = LinearSVC()
logistic_regression = LogisticRegression()
decision_tree_classifier = tree.DecisionTreeClassifier()

# Train models using X_train and y_train
random_forest_classifier.fit(X_train, y_train)
linear_svc.fit(X_train, y_train)
logistic_regression.fit(X_train, y_train)
decision_tree_classifier.fit(X_train, y_train)

# Predict values
y_pred_1 = random_forest_classifier.predict(X_test)
y_pred_2 = linear_svc.predict(X_test)
y_pred_3 = logistic_regression.predict(X_test)
y_pred_4 = decision_tree_classifier.predict(X_test)

print("F1 SCORE OF THE RandomForestClassifier: ", metrics.f1_score(y_test, y_pred_1))
print("F1 SCORE OF THE LinearSVC: ", metrics.f1_score(y_test, y_pred_2))
print("F1 SCORE OF THE LogisticRegression: ", metrics.f1_score(y_test, y_pred_3))
print("F1 SCORE OF THE DecisionTreeClassifier: ", metrics.f1_score(y_test, y_pred_4))


F1 SCORE OF THE RandomForestClassifier:  0.7071473750790639
F1 SCORE OF THE LinearSVC:  0.7128376251337909
F1 SCORE OF THE LogisticRegression:  0.7128912400025191
F1 SCORE OF THE DecisionTreeClassifier:  0.5978448001020212
