In [None]:
import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt
import seaborn as sn
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from efficient_apriori import apriori as apriori2

# Notes regarding data and names
# In results, 1 stands for blue team win and 0 for lose, if there teams are merged, it just stands for win and lose
# When using "bot" as a name, it is referring to "adc" and "support" as they are usually in the "bot lane" in the game

# Convert csv into pandas dataframes. 
# dataRaw is the csv data preprocessed (deleted id, gameid, league, blueteam, and redteam columns)
# dataMerged is preprocessed merge blue team and red team champions into a single column (5 total, 1 for each role) 
# botRaw is preprocessed to only include "adc", "support", and "result" columns
# damwon includes only matches were DAMWON gaming was playing
# different damwon csv were preprocessed to include only the champions DAMWON played, the enemy played, DAMWON's bot chapmions, and enemy bot champions
dataRaw = pd.read_csv("dataRaw.csv", header=0)
dataMerged = pd.read_csv("dataMerged.csv", header=0)
botRaw = pd.read_csv("botRaw.csv")
botMatch = pd.read_csv("botmatch.csv", header=0)
damwon = pd.read_csv("damwon.csv")
damwonChampions = pd.read_csv("damwonChampions.csv")
damwonEnemy = pd.read_csv("damwonEnemy.csv")
damwonBot = pd.read_csv("damwonBot.csv")
print("Total amount of games in raw dataset: ", len(dataMerged['top']))
if(dataMerged.isna().values.any()):
    print("Data has null/empty values")
else:
    print("Data has no null/empty values")
    
# Sort individual columns in a dataframe for specific analysis. Also filtered to have at least a count of 50
dataMerged = dataMerged.groupby('top').filter(lambda x: len(x) > 50)
dataMerged = dataMerged.groupby('jungle').filter(lambda x: len(x) > 50)
dataMerged = dataMerged.groupby('mid').filter(lambda x: len(x) > 50)
dataMerged = dataMerged.groupby('adc').filter(lambda x: len(x) > 50)
dataMerged = dataMerged.groupby('support').filter(lambda x: len(x) > 50)

dataRaw = dataRaw.groupby('bluetop').filter(lambda x: len(x) > 10)
dataRaw = dataRaw.groupby('bluejungle').filter(lambda x: len(x) > 10)
dataRaw = dataRaw.groupby('bluemid').filter(lambda x: len(x) > 10)
dataRaw = dataRaw.groupby('blueadc').filter(lambda x: len(x) > 10)
dataRaw = dataRaw.groupby('bluesupport').filter(lambda x: len(x) > 10)
dataRaw = dataRaw.groupby('redtop').filter(lambda x: len(x) > 10)
dataRaw = dataRaw.groupby('redjungle').filter(lambda x: len(x) > 10)
dataRaw = dataRaw.groupby('redmid').filter(lambda x: len(x) > 10)
dataRaw = dataRaw.groupby('redadc').filter(lambda x: len(x) > 10)
dataRaw = dataRaw.groupby('redsupport').filter(lambda x: len(x) > 10)
botRaw = botRaw.groupby('adc').filter(lambda x: len(x) > 50)
botRaw = botRaw.groupby('support').filter(lambda x: len(x) > 50)

botMatch = botMatch.groupby('blueadc').filter(lambda x: len(x) > 50)
botMatch = botMatch.groupby('bluesupp').filter(lambda x: len(x) > 50)
botMatch = botMatch.groupby('redadc').filter(lambda x: len(x) > 50)
botMatch = botMatch.groupby('redsupport').filter(lambda x: len(x) > 50)

damwontop = damwon[['result', 'damwontop', 'enemytop']]
damwontop = damwontop.groupby('damwontop').filter(lambda x: len(x) > 2)
damwonjungle = damwonChampions.groupby('damwonjungle').filter(lambda x: len(x) > 2)
damwonmid = damwonChampions.groupby('damwonmid').filter(lambda x: len(x) > 2)
damwonadc = damwonChampions.groupby('damwonadc').filter(lambda x: len(x) > 2)
damwonsupport = damwonChampions.groupby('damwonsupport').filter(lambda x: len(x) > 2)

damwonBot = damwonBot.groupby('damwonadc').filter(lambda x: len(x) > 4)
damwonBot = damwonBot.groupby('damwonsupport').filter(lambda x: len(x) > 4)

damwonEnemytop = damwonEnemy.groupby('enemytop').filter(lambda x: len(x) > 2)
damwonEnemysupport = damwonEnemy.groupby('enemyjungle').filter(lambda x: len(x) > 2)
damwonEnemyjungle = damwonEnemy.groupby('enemymid').filter(lambda x: len(x) > 2)
damwonEnemyadc = damwonEnemy.groupby('enemyadc').filter(lambda x: len(x) > 2)
damwonEnemysupport = damwonEnemy.groupby('enemysupport').filter(lambda x: len(x) > 2)
print("Total amount of games after preprocessing only champions in a specific role with a count of 50 matches: ", len(dataMerged['top']))
print()

# Calculate % of games a champion has been played
topPercent = dataMerged['top'].value_counts(normalize=True).head(5).to_string(dtype=False)
junglePercent = dataMerged['jungle'].value_counts(normalize=True).head(5).to_string(dtype=False)
midPercent = dataMerged['mid'].value_counts(normalize=True).head(5).to_string(dtype=False)
adcPercent = dataMerged['adc'].value_counts(normalize=True).head(5).to_string(dtype=False)
supPercent = dataMerged['support'].value_counts(normalize=True).head(5).to_string(dtype=False)
topselect = dataMerged['top'].value_counts().reset_index()

# Print most popular champions by role in % of total games
# print("Most popular champions by ratio of matches played")
print(f"Most popular top champions \n{topPercent}")
# print(f"Most popular jungle champions \n{junglePercent}")
# print(f"Most popular mid champions \n{midPercent}")
# print(f"Most popular marksman champions \n{adcPercent}")    
# print(f"Most popular support champions \n{supPercent}")    

# Calculate % of games won by a champion
topWin = dataMerged.groupby('top')['result'].mean().sort_values(ascending=False).head(5).to_string(dtype=False, header=False)
print(f"Best top champions by winrate\n{topWin}")
# jgWin = dataMerged.groupby('jungle')['result'].mean().sort_values(ascending=False).head(5).to_string(dtype=False, header=False)
# print(f"Best jungle champions by winrate\n{jgWin}")
# midWin = dataMerged.groupby('mid')['result'].mean().sort_values(ascending=False).head(5).to_string(dtype=False, header=False)
# print(f"Best mid champions by winrate\n{midWin}")
# adcWin = dataMerged.groupby('adc')['result'].mean().sort_values(ascending=False).head(5).to_string(dtype=False, header=False)
# print(f"Best marksman champions by winrate\n{adcWin}")
# supWin = dataMerged.groupby('support')['result'].mean().sort_values(ascending=False).head(5).to_string(dtype=False, header=False)
# print(f"Best support champions by winrate\n{supWin}")
# print()

# Graph 10 most played champions for top lane

fig, ax = plt.subplots(1)
temp = dataMerged['top'].value_counts().reset_index()
temp2 = temp.head(5)
plt.bar(temp2['index'], temp2['top']);
# temp2.plot(kind='bar')
plt.title('Most Played in Top')
plt.ylabel('frequency')
plt.show()    

# Graph the winrate of top champions as a bar graph

topGraph = dataMerged.groupby('top')['result'].mean().sort_values(ascending=False).head(5).reset_index()
fig, ax = plt.subplots(1)
plt.bar(topGraph['top'], topGraph['result']);
ax.set_ylim(ymax=1)
plt.ylabel('win ratio')
plt.title('Best win rate in Top')
plt.show()
# print champions with worst winrate
topWin = dataMerged.groupby('top')['result'].mean().sort_values(ascending=True).head(5).to_string(dtype=False, header=False)
print(f"Worst top champions by winrate with a minimum of 10 games\n{topWin}")
# jgWin = dataMerged.groupby('jungle')['result'].mean().sort_values(ascending=True).head(5).to_string(dtype=False, header=False)
# print(f"Worst jungle champions by winrate\n{jgWin}")
# midWin = dataMerged.groupby('mid')['result'].mean().sort_values(ascending=True).head(5).to_string(dtype=False, header=False)
# print(f"Worst mid champions by winrate\n{midWin}")
# adcWin = dataMerged.groupby('adc')['result'].mean().sort_values(ascending=True).head(5).to_string(dtype=False, header=False)
# print(f"Worst marksman champions by winrate\n{adcWin}")
# supWin = dataMerged.groupby('support')['result'].mean().sort_values(ascending=True).head(5).to_string(dtype=False, header=False)
# print(f"Worst support champions by winrate\n{supWin}")
print()
# Graph heatmap of winrate of adc and support combinations with at least a 200 count
newBot = botRaw
# Delete rows of marksman and supports that have less than a count of 200 total games
# by comparing the value_count() function to each row 
# We want to filter as many champions as we can so the graph makes more sense (less attributes per role)

vc = newBot['adc'].value_counts()
u = [i not in set(vc[vc < 900].index) for i in newBot['adc']]
newBot = newBot[u] 
vc = newBot['support'].value_counts()
u = [i not in set(vc[vc < 550].index) for i in newBot['support']]
newBot = newBot[u]
  
df_heatmap = newBot.pivot_table(values='result', index='support', columns='adc', aggfunc=np.mean)
sn.heatmap(df_heatmap, annot=True)
plt.xlabel('marksman')
plt.ylabel('support')
plt.title('Win rate of marksman and support champions')
plt.show()

# Classify the data with K-nearest Neighbors
# Convert categorical data to numerical with pandas.get_dummies

print("K-nearest neighbors classifier")
dummy_cols = list(set(dataRaw.columns) - set(['result']))
features = pd.get_dummies(dataRaw, columns=dummy_cols)
# Split the data into features and labels
X = features.iloc[:,:-1].values
y = features['result']
# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=27)
# Run the model
KNN_model = KNeighborsClassifier(n_neighbors=5)
KNN_model.fit(X_train, y_train)
KNN_prediction = KNN_model.predict(X_test)
print("Accuracy KNN: ", accuracy_score(KNN_prediction, y_test))
print(classification_report(KNN_prediction, y_test))
   
records2 = []   

# loop used to make a list out of the csv file.     
for i in range(0, len(botMatch)):
    records2.append([str(botMatch.values[i, j]) for j in range(0, len(botMatch.columns))])
    
# save list into a file, so I don't need to loop every time I run the code
# Commented code used to create the file

# with open("botmatch.txt", "wb") as fp: 
#    pickle.dump(records2, fp)
with open("botmatch.txt", "rb") as fp:
    records2 = pickle.load(fp)
print("\nApriori algorithm for bot lane matchups")    
# efficient apriori test
# generating rules on adc and support combinations with at least 0.2% support
itemsets, rules = apriori2(records2, min_support=0.002, min_confidence=0.51)
# rules with at least 5 items, 4 on left hand and 1 on right hand
# and match result in the right hand side
rules_rhs = filter(lambda rule: len(rule.lhs) == 4 and len(rule.rhs) == 1, rules)
rules_rhs = filter(lambda rule: rule.rhs[0] == '0' or rule.rhs[0] == '1', rules_rhs)

# sort by lift and print
for rule in sorted(rules_rhs, key=lambda rule: rule.lift, reverse=True):
    print(rule)
    
print()    
print("DAMWON number of unique champion picks per role")
print(damwon.nunique().to_string(dtype=False))
print("Total amount of games DAMWOM Gaming played in 2020: ", len(damwon['result']))
topWin = damwontop.groupby('damwontop')['result'].mean().sort_values(ascending=False).head(5).to_string(dtype=False, header=False)
print(f"Best DAMWON top champions by winrate\n{topWin}")
topWin = damwontop.groupby('damwontop')['result'].mean().sort_values(ascending=True).head(5).to_string(dtype=False, header=False)
print(f"Worst DAMWON top champions by winrate\n{topWin}")
topWin = damwonEnemytop.groupby('enemytop')['result'].mean().sort_values(ascending=False).head(5).to_string(dtype=False, header=False)
print(f"Worst top champion to pick against DAMWON by winrate (DAMWON's winrate against the champion)\n{topWin}")
topWin = damwonEnemytop.groupby('enemytop')['result'].mean().sort_values(ascending=True).head(5).to_string(dtype=False, header=False)
print(f"Best top champion to pick against DAMWON by winrate (DAMWON's winrate against the champion)\n{topWin}")

# Graph 10 most played champions for top lane by DAMWON

fig, ax = plt.subplots(1)
temp = damwontop['damwontop'].value_counts().reset_index()
temp2 = temp.head(6)
plt.bar(temp2['index'], temp2['damwontop']);
plt.title('Most Played in Top by DAMWON')
plt.ylabel('frequency')
plt.show()    

# Unique values for DAMWON's top champions and enemy top champions
print(damwontop.nunique())
# Graph the winrate of top champions as a bar graph by DAMWON

topGraph = damwontop.groupby('damwontop')['result'].mean().sort_values(ascending=False).head(5).reset_index()
fig, ax = plt.subplots(1)
plt.bar(topGraph['damwontop'], topGraph['result']);
ax.set_ylim(ymax=1)
plt.title('DAMWON\'s Top Winrate')
plt.show()

# Counting specific champion count. These were DAMWON's highest win rate top champions
champs = ['Poppy', 'Camille', 'Kayle', 'Wukong', 'Akali']
print(topselect.loc[topselect['index'].isin(champs)])

# Graph the winrate of the wrose top champions as a bar graph by DAMWON

topGraph = damwontop.groupby('damwontop')['result'].mean().sort_values(ascending=True).head(5).reset_index()
fig, ax = plt.subplots(1)
plt.bar(topGraph['damwontop'], topGraph['result']);
ax.set_ylim(ymax=1)
plt.title('DAMWON\'s Top Winrate')
plt.show()

# Graph heatmap of winrate of adc and support combinations with at least a 2 count for DAMWON
# Because of the low match count, heatmaps are not ideal

newBot = damwonBot
vc = newBot['damwonadc'].value_counts()
u = [i not in set(vc[vc < 5].index) for i in newBot['damwonadc']]
newBot = newBot[u] 
vc = newBot['damwonsupport'].value_counts()
u = [i not in set(vc[vc < 5].index) for i in newBot['damwonsupport']]
newBot = newBot[u]
   
df_heatmap = newBot.pivot_table(values='result', index='damwonadc', columns='damwonsupport', aggfunc=np.mean)
sn.heatmap(df_heatmap, annot=True)
plt.xlabel('supports')
plt.ylabel('marksman')
plt.title('Win rate of DAMWON\'s marksman and support champions ')
plt.show()

records2 = []
# loop used to make a list out of the csv file.
# Note you will need to uncomment lines the loop below if you want to create the file from the code
# for i in range(0, len(damwonBot)):
#    records2.append([str(damwonBot.values[i, j]) for j in range(0, len(damwonBot.columns))])

# save list into a file, so I don't need to loop every time I run the code
# with open("damwonBot.txt", "wb") as fp: 
#    pickle.dump(records2, fp)
with open("damwonBot.txt", "rb") as fp:
    records2 = pickle.load(fp)
print("\nApriori algorithm for DAMWON's bot lane")    
# efficient apriori test
# generating rules on adc and support combinations with at least 2% support
itemsets, rules = apriori2(records2, min_support=0.02, min_confidence=0.51)
# rules with at least 3 items, 2 on left hand and 1 on right hand
# and rules where the right hand side includes the match result
rules_rhs = filter(lambda rule: len(rule.lhs) == 2 and len(rule.rhs) == 1, rules)
rules_rhs = filter(lambda rule: rule.rhs[0] == '0' or rule.rhs[0] == '1', rules_rhs)

# sort by lift and print
for rule in sorted(rules_rhs, key=lambda rule: rule.support, reverse=True):
    print(rule)