# A 3d visualization of a random sample of NHL games this season. 
## This notebook represents several iterations and refinements to produce an original digraph dataset. Which as far as a cursory literature review hints at has never been done with hockey data. This merely scratches the surface of what is possibly with graphs, network analysis, and the combination of d3 and Threejs seen with the excellent https://github.com/vasturiano/3d-force-graph 

In [1]:
import requests
import json
import plotly.offline as py
import plotly.graph_objs as go
import random
import networkx as nx
from networkx.readwrite import json_graph
from node2vec import Node2Vec


In [58]:
# Gets the team roster to fill out the nodes for the desired season, sets up the api dictionary for easy use
def get_raw_nhl_data(season_year):
    api_dict = {'entry_url': r'https://statsapi.web.nhl.com/api/v1/',
                'section': r'game',
                'year': f'{season_year}',
                'reg_season': r'02',
                'options': r'/feed/live',
                'team_url': r'https://statsapi.web.nhl.com/api/v1/teams',
                'roster': r'?expand=team.roster'}
    all_teams =  [requests.get(api_dict['team_url']).json()][0]['teams']
    team_ids = [(all_teams[team]['id'], all_teams[team]['name']) for team, _ in enumerate(all_teams)]
    nhl_roster = [requests.get(f'{api_dict["team_url"]}/{team_ids[i][0]}{api_dict["roster"]}').json() for i, _ in enumerate(team_ids)]
    return nhl_roster, api_dict


# These work well for igraph and plotly's thing. If I want to use d3, i might need a better format
# Get's the roster for every team. Each player is a node. feed hockey_player into index_nodes
def get_hockey_players(nhl_roster):
    hockey_player = {}
    i = 0
    for index, team in enumerate(nhl_roster):
        for i_num, mate in enumerate(nhl_roster[index]['teams'][0]['roster']['roster']):
            hockey_player[nhl_roster[index]['teams'][0]['roster']['roster'][i_num]['person']['id']] = {'index': i, 'fullName': f"{nhl_roster[index]['teams'][0]['roster']['roster'][i_num]['person']['fullName']}", 'team': f"{nhl_roster[index]['teams'][0]['name']}"}
            i += 1
    return hockey_player

# gets all the hockey games in the season
def get_hockey_games(api_params, sample_size):
    all_gnumbers = [f'{"0"*(4-len(str(i)))}{str(i)}' for i in range(1,1272)]
    # All the games makes a messy visualization. Take a random sample
    all_gids = [f'{api_params["year"]}{api_params["reg_season"]}{number}' for number in random.sample(all_gnumbers, sample_size)]
    all_urls = [f'{api_params["entry_url"]}{api_params["section"]}/{game}{api_params["options"]}' for game in all_gids]
    all_data = [requests.get(f'{url}') for url in all_urls]
    all_games = [data.json() for data in all_data if data.status_code == 200] 
    return all_games
    
# attaches an arbitrary index to each player. Used in edge/node ids
def index_nodes(player_list):
    i=0
    nodes_indexed = {}
    for p_id in r['gameData']['players']:
         nodes_indexed[r['gameData']['players'][p_id]['id']] = {'index': i, 'fullName': r['gameData']['players'][p_id]['fullName'], 'team': r['gameData']['players'][p_id]['currentTeam']['id']}
         i+=1
    return nodes_indexed

# creates the edge relationships between each node. returns a tuple. Also returns edges with players names for node2vec.
def index_edges(game_list, node_list):
    links = []
    ml_edges = []
    for index, game in enumerate(game_list):
        all_plays = game['liveData']['plays']['allPlays']
        for play in all_plays:
            if  'players' in play.keys() and len(play['players'])==2:
                if play['players'][0]['player']['id'] not in node_list.keys() or play['players'][1]['player']['id'] not in node_list.keys():
                    continue
                else:
                    links.append((node_list[play['players'][0]['player']['id']]['index'], node_list[play['players'][1]['player']['id']]['index'], {'event_type': play['result']['event'], 'time_stamp': play['about']['dateTime']}))
                    ml_edges.append((play['players'][0]['player']['fullName'], play['players'][1]['player']['fullName'], {'event_type': play['result']['event'], 'time_stamp': play['about']['dateTime']}))
    return(ml_edges, links)

# These functions format the node list into propper source, target, attributes we needf for use in forcegraph3d.js
def format_nodes(node_list):
    formated_nodes = [{'name': node_list[node]['fullName'].replace(" ",""), 'team':  node_list[node]['team'].replace(" ", ""), 'id': node_list[node]['index']} for node in node_list]
    return formated_nodes

def format_links(link_list):
    formated_links =  [{'source':link[0], 'target':link[1], 'data': {'type':link[2]['event_type'], 'time_stamp':link[2]['time_stamp']}} for link in link_list]
    return formated_links

def network_file(node_list, edge_list):
    return {'nodes': node_list, 'links': edge_list}

# This wraps up all of the above into a neat package for ready to go visualization and analysis
def create_data(season_year, sample_size):
    roster, entry_points = get_raw_nhl_data(season_year)
    nhl_roster = get_hockey_players(roster)
    sample_games = get_hockey_games(entry_points, sample_size)
    ml_links, viz_links  = index_edges(sample_games, nhl_roster)
    edges_formated = format_links(viz_links)
    nodes_formated = format_nodes(nhl_roster)
    network_params = network_file(nodes_formated, edges_formated)
    with open(r'C:\Users\Juan\bootcamp\projects\final\js\nhlpbp.json', 'w+') as jsondump:
        json.dump(network_params, jsondump, indent=4,)
    return(ml_links, network_params)

In [59]:
ml_links, network_params = create_data(2019, 64)

In [None]:
GX = json_graph.node_link_graph(network_params, directed=True, multigraph=True)
GX.nodes()
node2vec = Node2Vec(GX, dimensions=32, walk_length=8, num_walks=512, workers=2) 

In [61]:
model = node2vec.fit(window=10, min_count=1)

In [None]:
for node, _ in model.most_similar('731'):
    print(network_params['nodes'][int(node)])

## Node Creation:
The first step is to create each games nodes which is just the players So let's get a list of the players and an attributes (full_name, team)

## Play Nodes:
We might want nodes to be play classes, shot on goal, throway, hits, etc.. We'll see if it is necessary in a moment.


## Edge Creation:
Now that we have nodes, we need to populate a list of edges. In this case an edge represents a play that involves 2 players (may extend to three players in the case of assists). With network x we can define an edge as a three-tuple (node_1, node_2, {attribute_dictionary}) so we are looking at (p_id_1, p_id_2, {})

### Edge Attributes:
What kind of things should the edges represent? type of play, outcome, timestamp, x,y coords? Keep it simple to start. Type of play and timestamp

In [None]:
# import igraph as ig 

# N = len(nodes_indexed)
# L = len(edges_indexed)
# # edge = [link[:2] for link in links]
# threeGee = ig.Graph(edges_indexed, directed=True)
# threeGee.write_graphml('season_graph.graphml')

In [None]:
# threeGee.get_edgelist()

In [None]:
# labels=[nodes_indexed[node]['fullName'] for node in nodes_indexed]
# group = [nodes_indexed[node]['team'] for node in nodes_indexed]
# labels

In [None]:
# d3layt = threeGee.layout_auto(dim=3)

# Plotyly: Now it is time to implement the 3d graph network



In [None]:
# Xn=[d3layt[k][0] for k in range(N)]# x-coordinates of nodes
# Yn=[d3layt[k][1] for k in range(N)]# y-coordinates
# Zn=[d3layt[k][2] for k in range(N)]# z-coordinates
# Xe=[]
# Ye=[]
# Ze=[]
# for e in edges_indexed:
#     Xe+=[d3layt[e[0]][0],d3layt[e[1]][0], None]# x-coordinates of edge ends
#     Ye+=[d3layt[e[0]][1],d3layt[e[1]][1], None]
#     Ze+=[d3layt[e[0]][2],d3layt[e[1]][2], None]

In [None]:

# trace1=go.Scatter3d(x=Xe,
#                y=Ye,
#                z=Ze,
#                mode='lines',
#                line=dict(color='rgb(125,125,125)', width=1),
#                hoverinfo='none'
#                )

# trace2=go.Scatter3d(x=Xn,
#                y=Yn,
#                z=Zn,
#                mode='markers',
#                name='actors',
#                marker=dict(symbol='circle',
#                              size=6,
#                             #  color=group,
#                              colorscale='jet',
#                              line=dict(color='rgb(50,50,50)', width=0.01)
#                              ),
#                 text=labels,
#                 hoverinfo='text'
#                )

# axis=dict(showbackground=False,
#           showline=False,
#           zeroline=False,
#           showgrid=False,
#           showticklabels=False,
#           title=''
#           )

# layout = go.Layout(
#          title="Network of Interactions in a Hockey Game:(3D visualization)",
#          width=1000,
#          height=1000,
#          showlegend=False,
#          scene=dict(
#              xaxis=dict(axis),
#              yaxis=dict(axis),
#              zaxis=dict(axis),
#         ),
#      margin=dict(
#         t=100
#     ),
#     hovermode='closest',
#     annotations=[
#            dict(
#            showarrow=False,
#             text="NHL API",
#             xref='paper',
#             yref='paper',
#             x=0,
#             y=0.1,
#             xanchor='left',
#             yanchor='bottom',
#             font=dict(
#             size=14
#             )
#             )
#         ],    )