Henry Ho 304723723

# League Rank Predictor

Uses Riot Developer's api V3 (https://developer.riotgames.com/) and seeded data to predict the "highest achieved season tier" for each player using the data from matches as features.

Features include:

Labels include:
- UNRANKED
- BRONZE
- SILVER
- GOLD
- PLATINUM
- DIAMOND
- MASTERS
- CHALLENGER

In [1]:
# Import pandas
import pandas as pd

## Create Dataframe using Riot's seeded data

In [2]:
# Creating empty dataframe
stats_df = pd.DataFrame()

### Parse seeded data

In [3]:
# Importing JSON
import json

# Function gets stats from seeded data and returns a dataframe
def get_stats(*files):
    df = pd.DataFrame()
    
    if len(files) == 1:
        df = parse_json(files)
    elif len(files) > 1:
        for file in files:
            df = df.append(parse_json(file), ignore_index=True)
            
    return df
        
# Function that parses json file into a dataframe
def parse_json(file):
    df = pd.DataFrame(columns = ['win', 'role', 'kills', 'deaths', 'assists', 'gameDuration', 'goldEarned', 'totalMinionsKilled', 'damageDealtToObjectives', 'totalDamageDealt', 'totalDamageTaken', 'wardsPlaced', 'highestAchievedSeasonTier'])
    matches = pd.read_json(path_or_buf=file)
    
    for match in matches['matches']:
        for participant in match['participants']:
            
            df = df.append({'win': participant['stats']['win'],
                            'role': participant['timeline']['role'],
                            'kills': participant['stats']['kills'],
                            'deaths': participant['stats']['deaths'],
                            'assists': participant['stats']['assists'],
                            'gameDuration': match['gameDuration'],
                            'goldEarned': participant['stats']['goldEarned'],
                            'totalMinionsKilled': participant['stats']['totalMinionsKilled'],
                            'damageDealtToObjectives': participant['stats']['damageDealtToObjectives'],
                            'totalDamageDealt': participant['stats']['totalDamageDealt'],
                            'totalDamageTaken': participant['stats']['totalDamageTaken'],
                            'wardsPlaced': participant['stats']['wardsPlaced'],
                            'highestAchievedSeasonTier': participant['highestAchievedSeasonTier']}, ignore_index=True)

    return df

In [4]:
# datasets = ['datasets/matches1.json', 'datasets/matches2.json', 'datasets/matches3.json', 
#             'datasets/matches4.json', 'datasets/matches5.json', 'datasets/matches6.json', 
#             'datasets/matches7.json', 'datasets/matches8.json', 'datasets/matches9.json', 
#             'datasets/matches10.json']

datasets = ['datasets/matches1.json', 'datasets/matches2.json', 'datasets/matches3.json',
            'datasets/matches4.json', 'datasets/matches5.json', 'datasets/matches6.json', 
            'datasets/matches7.json', 'datasets/matches8.json', 'datasets/matches9.json',
            'datasets/matches10.json']
stats_df = get_stats(*datasets)
print(stats_df)

        win         role kills deaths assists gameDuration goldEarned  \
0      True         SOLO    17      5      25         3509      26248   
1      True    DUO_CARRY    24     14      15         3509      28906   
2      True  DUO_SUPPORT     8     17      22         3509      18845   
3      True         NONE     7      7      29         3509      19404   
4      True         SOLO    10     10      20         3509      20805   
5     False         NONE     8     15      12         3509      19921   
6     False         SOLO    14     13      12         3509      22644   
7     False  DUO_SUPPORT    11     15      16         3509      20659   
8     False    DUO_CARRY     8     12      18         3509      22493   
9     False         SOLO    12     11      13         3509      22368   
10    False         SOLO     3      8      14         3105      17936   
11    False  DUO_SUPPORT     8     10      16         3105      19876   
12    False    DUO_CARRY     9     12      14      

In [5]:
print(stats_df.columns[:-1])

Index(['win', 'role', 'kills', 'deaths', 'assists', 'gameDuration',
       'goldEarned', 'totalMinionsKilled', 'damageDealtToObjectives',
       'totalDamageDealt', 'totalDamageTaken', 'wardsPlaced'],
      dtype='object')


# KNN, Decision Tree, Logistic Regression, Random Forest without One Hot Encoding

In [6]:
feature_cols = ['kills', 'deaths', 'gameDuration', 'goldEarned', 'totalMinionsKilled', 'damageDealtToObjectives', 'totalDamageDealt', 'wardsPlaced']
X = stats_df[feature_cols]

y = stats_df['highestAchievedSeasonTier']


In [7]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=3)


In [8]:
# KNN
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Instantiating an "object" of KNeighborsClassifier "class" with k=5:
k = 5
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X_train, y_train)
y_predict_knn = knn.predict(X_test)


accuracy_knn = accuracy_score(y_test, y_predict_knn)

print(accuracy_knn)

0.235666666667


In [9]:
# Decision Tree
from sklearn.tree import DecisionTreeClassifier

# "my_decisiontree" is instantiated as an "object" of DecisionTreeCLassifier "class"

my_decisiontree = DecisionTreeClassifier()
my_decisiontree.fit(X_train, y_train)
y_predict_dt = my_decisiontree.predict(X_test)
accuracy_dt = accuracy_score(y_test, y_predict_dt)

print(accuracy_dt)

0.234


In [10]:
# Logistic Regression
from sklearn.linear_model import LogisticRegression

# "my_decisiontree" is instantiated as an "object" of DecisionTreeCLassifier "class"

my_logreg = LogisticRegression()
my_logreg.fit(X_train, y_train)
y_predict_lr = my_logreg.predict(X_test)
accuracy_lr = accuracy_score(y_test, y_predict_lr)

print(accuracy_lr)

0.293666666667


In [11]:
#Random Forest
from sklearn.ensemble import RandomForestClassifier
my_RandomForest = RandomForestClassifier(n_estimators = 19, bootstrap = True, random_state=2)

my_RandomForest.fit(X_train, y_train)

y_predict_rf = my_RandomForest.predict(X_test)

accuracy_rf = accuracy_score(y_test, y_predict_rf)

print(accuracy_rf)

0.258333333333


# Accuracy of KNN, Decision Tree, Logistic Regression, Random Forest

In [12]:
print("KNN:                 ", accuracy_knn)
print("Decision Tree:       ", accuracy_dt)
print("Logistic Regression: ", accuracy_lr)
print("Random Forest:       ", accuracy_rf)

KNN:                  0.235666666667
Decision Tree:        0.234
Logistic Regression:  0.293666666667
Random Forest:        0.258333333333


# One Hot Encoding using KNN, Decision Tree, Logistic Regression

In [13]:
feature_cols = ['kills', 'deaths', 'gameDuration', 'goldEarned', 'totalMinionsKilled', 'damageDealtToObjectives', 'totalDamageDealt', 'wardsPlaced']
X = stats_df[feature_cols]

encoded_stats_df = pd.get_dummies(stats_df, columns = ['win', 'role'])

cols = encoded_stats_df.columns.tolist()
cols.append(cols.pop(cols.index('highestAchievedSeasonTier')))
encoded_stats_df = encoded_stats_df.reindex(columns=cols)

print(stats_df.head())
print('\n')
print(encoded_stats_df.head())


    win         role kills deaths assists gameDuration goldEarned  \
0  True         SOLO    17      5      25         3509      26248   
1  True    DUO_CARRY    24     14      15         3509      28906   
2  True  DUO_SUPPORT     8     17      22         3509      18845   
3  True         NONE     7      7      29         3509      19404   
4  True         SOLO    10     10      20         3509      20805   

  totalMinionsKilled damageDealtToObjectives totalDamageDealt  \
0                373                   17163           335249   
1                403                   34924           495365   
2                 68                    4738            99302   
3                 86                    8750           232614   
4                287                    8416           247818   

  totalDamageTaken wardsPlaced highestAchievedSeasonTier  
0            82758          27                  UNRANKED  
1            53588          15                      GOLD  
2            4559

In [14]:

X = encoded_stats_df[feature_cols]
y = encoded_stats_df['highestAchievedSeasonTier']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=3)

print(X_test.head())
print('\n')
print(y_test.head())


     kills deaths gameDuration goldEarned totalMinionsKilled  \
5876     0      5         1718       7087                  7   
6555    10     10         1544      10390                139   
1448     0      7         1521       6155                 47   
3351     5      3         1805      11262                 18   
231      1      8         2499      15142                 56   

     damageDealtToObjectives totalDamageDealt wardsPlaced  
5876                     531            10099          26  
6555                       0            85144           8  
1448                    1256            29577          15  
3351                    7396            30787          25  
231                     6516           254831          16  


5876    UNRANKED
6555    PLATINUM
1448        GOLD
3351        GOLD
231         GOLD
Name: highestAchievedSeasonTier, dtype: object


In [15]:
#KNN
k = 5
knn = KNeighborsClassifier(n_neighbors=k)

# Training only on the training set using the method "fit" 
# of the object along with training dataset and labels to train the model.
knn.fit(X_train, y_train)

# Testing on the testing set:
y_predict_knn = knn.predict(X_test)

# KNN Accuracy Evaluation
accuracy_knn = accuracy_score(y_test, y_predict_knn)

print(accuracy_knn)

0.221


In [16]:
# Decision Tree

my_decisiontree = DecisionTreeClassifier()

# Training only on the training set using the method "fit" 
# of the object along with training dataset and labels to train the model.
my_decisiontree.fit(X_train, y_train)

# Testing on the testing set:
y_predict_dt = my_decisiontree.predict(X_test)

# Decision Tree Accuracy Evaluation
accuracy_dt = accuracy_score(y_test, y_predict_dt)

print(accuracy_dt)

0.23525


In [17]:
# Logistic Regression

# "my_decisiontree" is instantiated as an "object" of DecisionTreeCLassifier "class"

my_logreg = LogisticRegression()

# Training only on the training set using the method "fit" 
# of the object along with training dataset and labels to train the model.
my_logreg.fit(X_train, y_train)

# Testing on the testing set:
y_predict_lr = my_logreg.predict(X_test)

# Logistic Regression Accuracy Evaluation
accuracy_lr = accuracy_score(y_test, y_predict_lr)

print(accuracy_lr)

0.296


In [18]:
#Random Forest
my_RandomForest = RandomForestClassifier(n_estimators = 19, bootstrap = True, random_state=2)

my_RandomForest.fit(X_train, y_train)

y_predict_rf = my_RandomForest.predict(X_test)

accuracy_rf = accuracy_score(y_test, y_predict_rf)

print(accuracy_rf)

0.25925


# Accuracy of KNN, Decision Tree, Logistic Regression, Random Forest

In [19]:
print("KNN:                 ", accuracy_knn)
print("Decision Tree:       ", accuracy_dt)
print("Logistic Regression: ", accuracy_lr)
print("Random Forest:       ", accuracy_rf)

KNN:                  0.221
Decision Tree:        0.23525
Logistic Regression:  0.296
Random Forest:        0.25925
