# Import Statements

In [17]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split 
from sklearn.metrics import accuracy_score
import pandas as pd 
import os 

# Load the files and save them as a pandas dataframe 

In [18]:
processed_data_directory = './data_by_league'

# files = os.listdir(processed_data_directory)

file_path = os.path.join(processed_data_directory, 'LCK.csv')

df = pd.read_csv(file_path)
df

Unnamed: 0,team,gold_diff,gold_diff_14min,elder_dragon_kills,baron_kills,dragon_kills,herald_kills,team_KD_ratio,game_length,objectives_stolen_away,first_turret_kill,second_turret_kill,third_turret_kill,baron_powerplay,first_blood,result,file_name
0,KT,11528,1812,0,2,2,1,2.090909,35.987300,0,1037306.0,1948408.0,2115845.0,965.0,False,1,./games/ESPORTSTMNT01_3410111_cleaned.json
1,LSB,-11528,-1812,0,0,3,1,0.478261,35.987300,0,1036206.0,,,0.0,True,0,./games/ESPORTSTMNT01_3410111_cleaned.json
2,KT,12007,2545,0,1,3,0,4.250000,24.753417,0,931593.0,1359309.0,1451427.0,3608.0,True,1,./games/ESPORTSTMNT01_3412825_cleaned.json
3,T1,-12007,-2545,0,0,0,2,0.235294,24.753417,0,1251860.0,,,0.0,False,0,./games/ESPORTSTMNT01_3412825_cleaned.json
4,DRX,7624,278,0,2,2,2,1.600000,33.767400,0,1006762.0,1753906.0,1995268.0,698.0,True,1,./games/ESPORTSTMNT01_3386968_cleaned.json
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
488,DRX,-13779,-265,0,0,0,1,0.142857,29.366433,0,1314063.0,,,0.0,False,0,./games/ESPORTSTMNT02_3223073_cleaned.json
489,HLE,4426,-962,0,1,2,0,4.500000,32.320933,0,1672912.0,1776652.0,1903868.0,4077.0,True,1,./games/ESPORTSTMNT01_3375723_cleaned.json
490,KDF,-4426,962,0,0,2,2,0.222222,32.320933,0,1014505.0,,,0.0,False,0,./games/ESPORTSTMNT01_3375723_cleaned.json
491,HLE,4416,22,0,2,2,1,2.000000,44.130517,0,1009437.0,2530562.0,2620661.0,558.0,False,1,./games/ESPORTSTMNT03_3168168_cleaned.json


# Split the dataset into training and testing sets with an 80/20 split 

In [19]:
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

# Create the RFCModel class which holds the model, dataframe, and the teams of each league with their respective winrates  

In [20]:
class RFCModel():
    
    def __init__(self, league):
        self.clf = None 
        self.model_accuracy = None 
        self.league = league
        self.teams = {}
        self.df = None 
        self.df_no_teams = None 
        
    def calculate_rankings(self):
        
        teams = self.df['team'].unique()
        for team in teams:
            # team_all_matches = pd.DataFrame(columns=self.df_no_teams.columns)
            # team_all_matches.columns = team_all_matches.columns.astype(str) 
            wins, losses = 0, 0
            
            for index, row in self.df.iterrows():
                if row['team'] == team:
                    new_row = (self.df_no_teams.iloc[index].drop(labels=['result']).values.reshape(1,-1))
                    predictions = self.clf.predict(new_row)
                    if predictions:
                        wins += 1 
                    else:
                        losses += 1 
            self.teams[team] = wins / (wins + losses) * 100 

        
    def create_model(self):
        processed_data_directory = './data_by_league'
        file_name = self.league + ".csv"
        file_path  = os.path.join(processed_data_directory, file_name)
        self.df = pd.read_csv(file_path)
        self.df.dropna(inplace=True, ignore_index=True)
        self.df_no_teams = self.df.drop(columns=['team', 'elder_dragon_kills', 'game_length', 'second_turret_kill', 'third_turret_kill', 'first_blood', 'file_name'], axis=1)
        
        x = self.df_no_teams.drop(columns=['result'])
        y = self.df_no_teams['result']
        
        X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
        # print(f"training data: {X_train}")
        
        # Create a Random Forest Classifier
        self.clf = RandomForestClassifier(random_state=42)
        
        # Train the model
        self.clf.fit(X_train, y_train)
        
        # Make predictions on the test set
        y_pred = self.clf.predict(X_test)
        
        # Evaluate the model
        accuracy = accuracy_score(y_test, y_pred)
        
        self.model_accuracy = accuracy

        

In [21]:
leagues = ["LCS", "LEC", "CBLOL", "LCK", "LJL", "LLA", "LPL", "PCS", "VCS"]

lck = RFCModel("LCK")
lck.create_model()
lck.calculate_rankings()



In [22]:
lck.teams

{'KT': 100.0,
 'DRX': 84.21052631578947,
 'NS': 92.85714285714286,
 'LSB': 82.35294117647058,
 'GEN': 100.0,
 'BRO': 88.23529411764706,
 'HLE': 90.9090909090909,
 'DK': 96.0,
 'T1': 90.47619047619048,
 'KDF': 100.0}

In [25]:
def assign_points(teams_dict, sorted_keys, league): 
    returning_dict = {}
    curr_score = 10 
    
    # Arbitrary additional scores based on perceived strength of the entire region 
    if league == 'LPL':
        curr_score += 4 
    elif league == 'LCK':
        curr_score += 3
    elif league == 'LEC':
        curr_score += 2 
    elif league == 'LCS':
        curr_score += 1 
    
    
    prev_win_rate = teams_dict[list(teams_dict.keys())[0]] 
    for team in sorted_keys:
        win_rate = teams_dict[team]        
        if prev_win_rate == win_rate:
            returning_dict[team] = curr_score

        else:
            prev_win_rate = win_rate 
            curr_score -= 1     
            returning_dict[team] = curr_score
            
    return returning_dict
        
        
        

In [27]:
a = sorted(lck.teams, key=lambda x: lck.teams[x], reverse=True)
print(assign_points(lck.teams, a, 'LCK'))

{'KT': 13, 'GEN': 13, 'KDF': 13, 'DK': 12, 'NS': 11, 'HLE': 10, 'T1': 9, 'BRO': 8, 'DRX': 7, 'LSB': 6}
