In [1]:
import pandas as pd
import numpy as np
from mlxtend.frequent_patterns import apriori
import json
from pandas.io.json import json_normalize
from mlxtend.frequent_patterns import association_rules
from tqdm import tqdm
import networkx as nx
import flask
import seaborn as sns
%matplotlib inline

In [26]:
month = '2017-08'
data = json.load(open('../datasets/' + month + '.json'))

In [27]:
rawDf = json_normalize(data['games'])
rawDf.head()

Unnamed: 0,added,card_history,coin,duration,hero,hero_deck,id,legend,mode,note,opponent,opponent_deck,rank,region,result,user_hash
0,2017-08-01T12:08:00Z,"[{'player': 'opponent', 'turn': 1, 'card': {'i...",True,323.0,Hunter,Midrange,85806542,,ranked,,Shaman,Token,,Europe,win,21995B902C82AC4F0ECBEC23A3CB54AF
1,2017-08-01T12:02:29Z,"[{'player': 'opponent', 'turn': 1, 'card': {'i...",True,311.0,Hunter,Face,85806025,,ranked,,Rogue,Miracle,,Europe,win,21995B902C82AC4F0ECBEC23A3CB54AF
2,2017-08-01T11:57:15Z,"[{'player': 'me', 'turn': 1, 'card': {'id': 'C...",False,207.0,Hunter,Midrange,85805533,,ranked,,Hunter,Midrange,,Europe,win,21995B902C82AC4F0ECBEC23A3CB54AF
3,2017-08-01T11:53:42Z,"[{'player': 'me', 'turn': 1, 'card': {'id': 'C...",False,148.0,Hunter,,85805219,,ranked,,Warrior,,,Europe,win,21995B902C82AC4F0ECBEC23A3CB54AF
4,2017-08-01T11:51:05Z,"[{'player': 'me', 'turn': 1, 'card': {'id': 'C...",True,230.0,Hunter,Midrange,85804959,,ranked,,Warrior,Pirate,15.0,Europe,win,21995B902C82AC4F0ECBEC23A3CB54AF


In [28]:
opponentCards = []
meCards = []
for game in rawDf['card_history']:
    meCardsPlayed = {}
    opponentCardsPlayed = {}
    dictSelect = {'me':meCardsPlayed, 'opponent':opponentCardsPlayed}
    for action in game:
        card = action['card']['name']
        dictionary = dictSelect[action['player']]
        dictionary[str(card)] = 1#dictionary.get(card, 0) + 1
    
    meCards.append(meCardsPlayed)
    opponentCards.append(opponentCardsPlayed)

In [29]:
meDf = pd.DataFrame(meCards)#.add_suffix('_me')
opponentDf = pd.DataFrame(opponentCards)#.add_suffix('_opponent')
df = meDf.append(opponentDf)
df = df.fillna(0).astype('int')
df.head()

Unnamed: 0,"""Little Friend""",A Light in the Darkness,AFK,Aberrant Berserker,Abominable Bowman,Abomination,Abusive Sergeant,Abyssal Enforcer,Acherus Veteran,Acidic Swamp Ooze,...,"Y'Shaarj, Rage Unbound","Yogg-Saron, Hope's End",Young Dragonhawk,Young Priestess,Youthful Brewmaster,Ysera,Ysera Awakens,Zealous Initiate,Zombie Chow,Zoobot
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [30]:
def FilterCards(dfCards, cardDict):
    for col in dfCards.columns:
        if col not in cardDict:
            dfCards = dfCards.drop(col, axis=1)
            
    return dfCards

In [31]:
# Read in collectable card data
collectableCards = pd.read_json('../datasets/collectibleCards.json')
cardClass = dict(collectableCards[['name', 'cardClass']].values)

# Remove cards which are not collectable
df = FilterCards(df, cardClass)

In [32]:
def CrossProbabilities1(df):
    probDict = {}
    totalGames = df.shape[0]
    numCol = len(df.columns)
    
    for i in tqdm(range(numCol - 1)):
        col1 = df.columns[i]
        for j in range(numCol - i - 1):
            col2 = df.columns[i+j+1]
            probDict[(col1, col2)] = (df[col1] & df[col2]).sum() / totalGames
        
    df = pd.DataFrame.from_dict(probDict, orient='index')
    df = df.reset_index()
    df.columns = ['itemsets', 'support']
    return df
    
    
def CrossProbabilities2(df):
    frequentSets = apriori(df, min_support=0, use_colnames=True, max_len=2)

    indices = frequentSets['itemsets'].apply(len) > 1
    probabilities = frequentSets[indices]
    probabilities.sort_values('support')
    return probabilities.reset_index(drop=True)

In [None]:
probabilities = CrossProbabilities2(df)
probabilities = probabilities[probabilities['support'] != 0]

In [None]:
colorDict = {'warlock':'rgb(92,60,97)', 
             'mage':'rgb(86,100,139)',
             'hunter':'rgb(26,96,46)',
             'priest':'rgb(167,173,179)',
             'druid':'rgb(88,57,35)',
             'warrior':'rgb(134,45,36)',
             'rogue':'rgb(56,57,61)',
             'shaman':'rgb(57,68,111)',
             'paladin':'rgb(181,123,56)',
             'neutral':'rgb(130,107,97)'}

# colorDict = {'warlock':'purple', 
#              'mage':'lightblue',
#              'hunter':'green',
#              'priest':'white',
#              'druid':'lightbrown',
#              'warrior':'red',
#              'rogue':'darkgrey',
#              'shaman':'darkblue',
#              'paladin':'yellow',
#              'neutral':'darkbrown'}

In [None]:
# Filters out values with less than minSupport
# Then adds n top supported edges
# Card class is a dict which adds the name of the class 

def ProbabilitiesToGraph(df, cardClass=None, colorDict=None, minSupport=0, n=50):
    g = nx.Graph()
    
    # Filter out insufficient support
    df = df[df['support'] > minSupport]
    
    # Take top n
    df = df.sort_values('support', ascending=False).head(n)
    
    g.add_edges_from((df['itemsets']))
    
    for card in g:
#         g.node[card]['radius'] = 3*np.sqrt(g.degree(card))
        g.node[card]['degree'] = g.degree(card)
        if cardClass is not None:
            g.node[card]['class'] = cardClass[card]
            g.node[card]['color'] = colorDict[cardClass[card].lower()]
    
    return g    

In [None]:
g = ProbabilitiesToGraph(probabilities, cardClass=cardClass, colorDict=colorDict, n=5000)
nx.draw(g)

In [None]:
d = nx.readwrite.json_graph.node_link_data(g)
json.dump(d, open('../graphVisualization/graphData/' +
                  month + '-graph.json','w'), indent=2)