In [1]:
import pandas as pd
import numpy as np
from collections import defaultdict
from itertools import combinations
import csv
import networkx as nx
from networkx.algorithms.community.modularity_max import greedy_modularity_communities

In [2]:
def processData(file):

    df = pd.read_csv(file, header=None, index_col=0)
    df_filtered = df[(df[7]==400) | (df[7]==420) | (df[7]==430) | (df[7]==440)].iloc[:,7:]
    x = df_filtered[df_filtered.columns[1::3]].columns.union([22,37])
    toStack = [df_filtered[x].iloc[:,:6],df_filtered[x].iloc[:,6:]]
    
    columnNames = ["Top","Jungle","Middle","Bot","Support","Win"]
    for i in toStack:
        i.columns=columnNames

    df = pd.concat(toStack).reset_index(drop=True)
    df['Win'] = df['Win'].astype(int)
    
    return df

In [3]:
def update_champ_counts(champ_counts, teams):
    for a in teams:
        champ_counts[a] += 1

In [4]:
def update_champ_pair_counts (champ_pair_counts, teams):
    for (a, b) in combinations (teams, 2):
        champ_pair_counts[(a, b)] += 1
        champ_pair_counts[(b, a)] += 1

In [5]:
def update_champ_pair_wins(champ_pair_wins, teams):
    for (a, b) in combinations (teams, 2):
        if teams[5] == 1:
            champ_pair_wins[(a, b)] += 1
            champ_pair_wins[(b, a)] += 1

In [6]:
def filter_rules(champ_pair_counts, champ_counts, conf_threshold, size):
    rules = {}
    lifts = {}
    for (a, b) in champ_pair_counts:
        conf_ab = champ_pair_counts[(a, b)] / champ_counts[a] * 100
        conf_ba = champ_pair_counts[(a, b)] / champ_counts[b] * 100
        lift_ab = (champ_pair_counts[(a, b)]/size) / (champ_counts[a]/size *champ_counts[b]/size)
        if conf_ab >= conf_threshold:
            rules[(a, b)] = conf_ab
            rules[(b, a)] = conf_ba
            if lift_ab > 1:
                lifts[(a,b)] = lift_ab
                lifts[(b,a)] = lift_ab
    return rules, lifts

In [7]:
def filter_supports(champ_pair_counts, allTeams, sup_threshold):
    supports = {}
    for k,v in champ_pair_counts.items():
        #toKey = tuple(sorted(list(k)))
        supp_ab = v/len(allTeams)*100
        if supp_ab >= sup_threshold:
            supports[k] = supp_ab
    return supports

In [8]:
def mine_rules(allTeams, withWins, sup_threshold, conf_threshold):
    champ_pair_counts = defaultdict(int)
    champ_counts = defaultdict(int)
    champ_pair_wins = defaultdict(int)
    size = len(allTeams)

    for teams in allTeams:
        update_champ_pair_counts(champ_pair_counts, teams)
        update_champ_counts(champ_counts, teams)
    
    for teams in withWins:
        update_champ_pair_wins(champ_pair_wins, teams)
    
    rules, lifts = filter_rules(champ_pair_counts, champ_counts, conf_threshold,size)
    supports = filter_supports(champ_pair_counts, allTeams, sup_threshold)
    
    return supports, rules, lifts,champ_pair_counts, champ_pair_wins

In [9]:
def filterGraph(z1,z4,z5, winRates):
    champ_win_rates = dict(((key, z5[key]/z4[key]) for key in z4.keys()))
    filtered_win_rates = dict(((k,v) for k,v in champ_win_rates.items() if v > winRates))
    filtered_supps = dict(((k,v) for k,v in z1.items() if k in filtered_win_rates.keys()))
    finalDict = {}
    for k,v in filtered_supps.items():
        toKey = tuple(sorted(list(k)))
        if toKey not in finalDict:
            finalDict[toKey]=v
    return finalDict

In [10]:
def createEdges(finalDict):
    finalList = []
    for item, values in finalDict.items():
        finalList.append([item[0],item[1],values])
    return finalList

In [11]:
def createNodes(finalDict, finalList):
    nodesData = defaultdict(int)
    for item, values in finalDict.items():
        if (item[0] not in nodesData.keys()) or (item[1] not in nodesData.keys()):
            nodesData[item[0]]
            nodesData[item[1]]
    
    finalNodes = []
    for item, values in nodesData.items():
        finalNodes.append([item,values])
     
    G = nx.Graph()
    for a in finalList:
        G.add_edge(a[0],a[1],weight = a[2])
    
    communities= sorted(greedy_modularity_communities(G))
    
    for i in range(len(finalNodes)):
        for j in range(len(communities)):
            if finalNodes[i][0] in communities[j]:
                if len(communities[j]) == 1:
                    finalNodes[i][1] = 0
                else:    
                    finalNodes[i][1] = j+1
    return finalNodes

In [12]:
def runAlgos(file, winRate):
    x = processData(file)
    x_1 = x.drop(columns=['Win']).to_numpy()
    x_2 = x.to_numpy() 
    
    z1, z2, z3, z4, z5 = mine_rules(x_1,x_2,0.1,5)
    
    finalDict = filterGraph(z1,z4,z5,winRate) 
    finalEdges = createEdges(finalDict)
    finalNodes = createNodes(finalDict, finalEdges)
    
    return finalNodes, finalEdges

In [13]:
file = 'data/matchdata_eun1.csv'
finalNodes, finalEdges = runAlgos(file, 0.55)

In [14]:
finalNodes[:10]

[['Diana', 7],
 ['Viego', 10],
 ['Seraphine', 7],
 ['Graves', 0],
 ['Malphite', 23],
 ['Lux', 16],
 ['Soraka', 0],
 ['Vex', 30],
 ['Vayne', 15],
 ['Nocturne', 2]]

In [15]:
finalEdges[:10]

[['Diana', 'Viego', 0.114944478847671],
 ['Diana', 'Seraphine', 0.11367203074234622],
 ['Graves', 'Malphite', 0.20995393737858725],
 ['Lux', 'Soraka', 0.12639651179559394],
 ['Soraka', 'Vex', 0.17941518285079272],
 ['Soraka', 'Vayne', 0.28502837559274874],
 ['Nocturne', 'Tristana', 0.18832231958806614],
 ['Maokai', 'MissFortune', 0.11706522568987894],
 ['Jhin', 'Xerath', 0.38979326959782157],
 ['Lux', 'Xerath', 0.17178049421884412]]

In [None]:
## unhash when you want to writer the csv
## be sure to create a unique name for the files
## e.g. edges_eun1.csv and nodes_eun1.csv

#headers = ['source', 'target', 'value']
#with open("edges.csv", "w", newline="") as e:
#    writer = csv.writer(e)
#    writer.writerow(headers)
#    writer.writerows(finalEdges)

In [None]:
#headers = ['id','group']
#with open('nodes.csv','w',newline='') as n:
#    writer = csv.writer(n)
#    writer.writerow(headers)
#    writer.writerows(finalNodes)