# Build graph for card ids

The idea here is to

1. Load the previously ETLelled outgoing and incoming nodes and edges (as dataframes)
2. Build out and incoming graphs for each card id.
3. Maybe build a composed graph (in and out) for each card_id

Next:
4. Next, we should work with graphs for a specific deck

At the end, store it in a pickle to avoid parsing everything again next time, which takes a long time.

**DESIRED RESULT**:
result = {card_id1: {out: graph_from_text, in: graph_from_attributes},
          card_id2: {out: graph_from_text, in: graph_from_attributes}
          }

In [None]:
import json
import pandas as pd
import re
from collections import defaultdict
from IPython.display import clear_output

# Params

In [None]:
from sqlalchemy import create_engine
engine = create_engine('postgresql+psycopg2://mtg:mtg@localhost:5432/mtg')
engine.connect()

# Create dataframe of cards

In [None]:
out_nodes = pd.read_pickle('./pickles/cards_outgoing_nodes.pkl')
out_edges = pd.read_pickle('./pickles/cards_outgoing_edges.pkl')
in_nodes = pd.read_pickle('./pickles/cards_incoming_nodes.pkl')
in_edges = pd.read_pickle('./pickles/cards_incoming_edges.pkl')
# cards_df = cards_df.sample(200)

In [None]:
ent_out_nodes  = out_nodes[out_nodes['type']=='entity']
ent_in_nodes = in_nodes[in_nodes['type']=='entity']

In [None]:
# There is no need to build a graph for the same named card twice
unique_cards = out_nodes[out_nodes['type']=='card'].drop_duplicates(subset=['card_name'])

In [None]:
ids_to_process = unique_cards['card_id']#.sample(11)

# Helping functions

In [None]:
# Split dataframelist
import collections
def splitDataFrameList(df,target_column,separator=None):
    '''
    https://gist.github.com/jlln/338b4b0b55bd6984f883
    df = dataframe to split,
    target_column = the column containing the values to split
    separator = the symbol used to perform the split
    returns: a dataframe with each entry for the target column separated, with each element moved into a new row. 
    The values in the other columns are duplicated across the newly divided rows.
    '''
    def splitListToRows(row,row_accumulator,target_column,separator):
        split_row = row[target_column]#.split(separator)
        if isinstance(split_row, collections.Iterable):
            for s in split_row:
                new_row = row.to_dict()
                new_row[target_column] = s
                row_accumulator.append(new_row)
        else:
            new_row = row.to_dict()
            new_row[target_column] = pd.np.nan
            row_accumulator.append(new_row)
    new_rows = []
    df.apply(splitListToRows, axis=1, args=(new_rows,target_column,separator))
    new_df = pd.DataFrame(new_rows)
    return new_df

In [None]:
# Create hashable dict
from collections import OrderedDict
import hashlib
class HashableDict(OrderedDict):
    def __hash__(self):
        return hash(tuple(sorted(self.items())))
    
    def hexdigext(self):
        return hashlib.sha256(''.join([str(k)+str(v) for k, v in self.items()]).encode()).hexdigest()

In [None]:
# Make defaultdict which depends on its key
# Source: https://www.reddit.com/r/Python/comments/27crqg/making_defaultdict_create_defaults_that_are_a/
from collections import defaultdict
class key_dependent_dict(defaultdict):
    def __init__(self, f_of_x):
        super().__init__(None) # base class doesn't get a factory
        self.f_of_x = f_of_x # save f(x)
    def __missing__(self, key): # called when a default needed
        ret = self.f_of_x(key) # calculate default value
        self[key] = ret # and install it in the dict
        return ret
    
def entity_key_hash(key):
    return HashableDict({'entity': key}).hexdigext()

In [None]:
# function to draw a graph to png
shapes = ['box', 'polygon', 'ellipse', 'oval', 'circle', 'egg', 'triangle', 'exagon', 'star']
colors = ['blue', 'black', 'red', '#db8625', 'green', 'gray', 'cyan', '#ed125b']
styles = ['filled', 'rounded', 'rounded, filled', 'dashed', 'dotted, bold']

entities_colors = {
    'PLAYER': '#FF6E6E',
    'ZONE': '#F5D300',
    'ACTION': '#1ADA00',
    'MANA': '#00DA84',
    'SUBTYPE': '#0DE5E5',
    'TYPE': '#0513F0',
    'SUPERTYPE': '#8D0BCA',
    'ABILITY': '#cc3300',
    'COLOR': '#666633',
    'STEP': '#E0E0F8'
}

def draw_graph(G, filename='test.png'):
    pdot = nx.drawing.nx_pydot.to_pydot(G)


    for i, node in enumerate(pdot.get_nodes()):
        attrs = node.get_attributes()
        node.set_label(str(attrs.get('label', 'none')))
    #     node.set_fontcolor(colors[random.randrange(len(colors))])
        entity_node_ent_type = attrs.get('entity_node_ent_type', pd.np.nan)
        if not pd.isnull(entity_node_ent_type):
            color = entities_colors[entity_node_ent_type.strip('"')]
            node.set_fillcolor(color)
            node.set_color(color)
            node.set_shape('hexagon')
            #node.set_colorscheme()
            node.set_style('filled')
        
        node_type = attrs.get('type', None)
        if node_type == '"card"':
            color = '#999966'
            node.set_fillcolor(color)
#             node.set_color(color)
            node.set_shape('star')
            #node.set_colorscheme()
            node.set_style('filled')
    #     
        #pass

    for i, edge in enumerate(pdot.get_edges()):
        att = edge.get_attributes()
        att = att.get('label', 'NO-LABEL')
        edge.set_label(att)
    #     edge.set_fontcolor(colors[random.randrange(len(colors))])
    #     edge.set_style(styles[random.randrange(len(styles))])
    #     edge.set_color(colors[random.randrange(len(colors))])

    png_path = filename
    pdot.write_png(png_path)

    from IPython.display import Image
    return Image(png_path)

# Build graph with Networkx

In [None]:
import networkx as nx

In [None]:
def eliminate_and_wrap_in_quotes(text):
    return '"'+str(text).replace('"', '')+'"'

In [None]:
import pickle

In [None]:
# Build out nodes and edges for all ids
result = {}
cards_graph_dir = './pickles/card_graphs/'
import os.path

for i, card_id in enumerate(ids_to_process):
        
    path_to_graph_file = cards_graph_dir+card_id
    if os.path.isfile(path_to_graph_file):
    #if i < 15890:
        continue
        
    result[card_id] = {}
    if not i%100:
        clear_output()
    else:
        if not i%10:
            print('{0}/{1} cards processed'.format(i, ids_to_process.shape[0]))
    
    # Card nodes
    #card_0_nodes = out_nodes[(out_nodes['card_id']==card_id)
    #                        |(out_nodes['type']=='entity')]
    card_0_nodes = out_nodes[(out_nodes['card_id']==card_id)]
    card_0_edges = out_edges[(out_edges['source'].isin(card_0_nodes['node_id']))
                            |(out_edges['target'].isin(card_0_nodes['node_id']))]
    
    # Relevant entity nodes
    ent_0_nodes = ent_out_nodes[ent_out_nodes['node_id'].isin(card_0_edges['source'])
                                 |ent_out_nodes['node_id'].isin(card_0_edges['target'])]

    card_0_nodes = pd.concat([card_0_nodes, ent_0_nodes], sort=False)
    
    #result[card_id] = {'nodes': card_0_nodes.copy(), 'edges': card_0_edges.copy()}
    
    # Build graph
    edge_attr = [x for x in card_0_edges.columns if not x in ['source', 'target']]
    G = nx.from_pandas_edgelist(card_0_edges,
                                source='source',
                                target='target',
                                edge_attr=edge_attr,
                                create_using=nx.DiGraph())
    
    ###### IN NODES
    
    # NODES (set attributes)
    for k in card_0_nodes['type'].unique():
        #print(k)
        #import pdb
        #pdb.set_trace()
        node_col = 'node_id'
        cols = [x for x in card_0_nodes[card_0_nodes['type']==k] if x not in ['node_id']]
        for node_attr in cols:
            temp = card_0_nodes[[node_attr, node_col]]
            temp = temp.dropna()

            # Eliminate and wrap in quotes
            temp[node_attr] = temp[node_attr].apply(eliminate_and_wrap_in_quotes)
            nx.set_node_attributes(G, pd.Series(temp[node_attr].values, index=temp[node_col].values).copy().to_dict(), name=node_attr)
    
    result[card_id]['outgoing'] = G
    
    # Card nodes
    card_0_in_nodes = in_nodes[(in_nodes['card_id']==card_id)]
    card_0_in_edges = in_edges[(in_edges['source'].isin(card_0_in_nodes['node_id']))
                            |(in_edges['target'].isin(card_0_in_nodes['node_id']))]
    
    # Relevant entity nodes
    ent_0_nodes = ent_in_nodes[ent_in_nodes['node_id'].isin(card_0_in_edges['source'])
                                 |ent_in_nodes['node_id'].isin(card_0_in_edges['target'])]

    card_0_in_nodes = pd.concat([card_0_in_nodes, ent_0_nodes], sort=False)
    
    #result[card_id] = {'nodes': card_0_in_nodes.copy(), 'edges': card_0_in_edges.copy()}
    
    # Build graph
    edge_attr = [x for x in card_0_in_edges.columns if not x in ['source', 'target']]
    H = nx.from_pandas_edgelist(card_0_in_edges,
                                source='source',
                                target='target',
                                edge_attr=edge_attr,
                                create_using=nx.DiGraph())
    
    # NODES (set attributes)
    for k in card_0_in_nodes['type'].unique():
        #print(k)
        #import pdb
        #pdb.set_trace()
        node_col = 'node_id'
        cols = [x for x in card_0_in_nodes[card_0_in_nodes['type']==k] if x not in ['node_id']]
        for node_attr in cols:
            temp = card_0_in_nodes[[node_attr, node_col]]
            temp = temp.dropna()

            # Eliminate and wrap in quotes
            temp[node_attr] = temp[node_attr].apply(eliminate_and_wrap_in_quotes)
            nx.set_node_attributes(H, pd.Series(temp[node_attr].values, index=temp[node_col].values).copy().to_dict(), name=node_attr)
    
    result[card_id]['incoming'] = H
    
    a = result[card_id]
    pickle.dump(a, open(path_to_graph_file, 'wb'))
    result = {}

In [None]:
draw_graph(result[card_id]['incoming'])

In [None]:
draw_graph(result[card_id]['outgoing'])