Data downloaded from https://www.kaggle.com/datasets/nathanlauga/nba-games?select=games.csv, originally from Sports Data LLC (https://www.sports-reference.com/)

In [1]:
import csv
import pandas as pd
import numpy as np

## Processing Data

In [2]:
big_game_df = pd.read_csv("data/nba/games.csv")
team_df = pd.read_csv("data/nba/teams.csv")

In [3]:
game_df = big_game_df[['SEASON', 'HOME_TEAM_ID', 'VISITOR_TEAM_ID', 'PTS_home', 'PTS_away']].copy()

In [4]:
# only for 2021 season
game_df = game_df[game_df['SEASON'] == 2021]
game_df.dropna( inplace = True)
print(game_df.shape)

(1076, 5)


### team_names

In [5]:
teams = set(game_df['HOME_TEAM_ID'].tolist())
teams.update(set(game_df['VISITOR_TEAM_ID'].tolist()))
teams = list(teams)

In [6]:
team_names = []
for team in teams:
    team_names.append(str(team_df[team_df['TEAM_ID'] == team].iloc[0]['NICKNAME']))
print(team_names)

['Hawks', 'Celtics', 'Cavaliers', 'Pelicans', 'Bulls', 'Mavericks', 'Nuggets', 'Warriors', 'Rockets', 'Clippers', 'Lakers', 'Heat', 'Bucks', 'Timberwolves', 'Nets', 'Knicks', 'Magic', 'Pacers', '76ers', 'Suns', 'Trail Blazers', 'Kings', 'Spurs', 'Thunder', 'Raptors', 'Jazz', 'Grizzlies', 'Wizards', 'Pistons', 'Hornets']


## Naive Ranking

In [7]:
# each team scored based on the point difference of every game they've played
scores = {team_ID: 0 for team_ID in teams}
games_played = {team_ID: 0 for team_ID in teams}
for index, game in game_df.iterrows():
    diff = int(game['PTS_away']) - int(game['PTS_home'])
    scores[game['HOME_TEAM_ID']] += -diff
    scores[game['VISITOR_TEAM_ID']] += diff
    games_played[game['HOME_TEAM_ID']] += 1
    games_played[game['VISITOR_TEAM_ID']] += 1

In [8]:
naive_r = []
for team, score in scores.items():
    naive_r.append(score/ games_played[team])

In [9]:
naive_rank_df = pd.DataFrame({
    'team': team_names,
    'r': naive_r
})
naive_rank_df = naive_rank_df.sort_values(by =['r'],  ascending = False)
naive_rank_df = naive_rank_df.reset_index(drop = True)
print(naive_rank_df)

             team         r
0            Suns  8.281690
1        Warriors  6.794521
2            Jazz  6.014085
3       Grizzlies  5.270270
4         Celtics  5.125000
5            Heat  4.546667
6       Mavericks  4.253521
7           Bucks  3.465753
8    Timberwolves  3.178082
9           Bulls  2.718310
10      Cavaliers  2.402778
11        Nuggets  2.315068
12          76ers  1.884058
13        Raptors  1.625000
14          Hawks  0.785714
15          Spurs  0.136986
16           Nets -0.056338
17         Knicks -0.309859
18       Clippers -1.383562
19        Hornets -1.472222
20         Pacers -2.541667
21       Pelicans -2.690141
22         Lakers -3.152778
23        Wizards -3.157143
24          Kings -4.342466
25  Trail Blazers -7.257143
26          Magic -7.388889
27        Thunder -7.857143
28        Pistons -8.830986
29        Rockets -9.239437


## Building the graph, matrices, and vectors

In [10]:
#init edges, triangles, adj_matrix, curl, neg_divergence, f, and W
edges = []
num_nodes = len(teams)
for i in range(num_nodes):
        for j in range(i + 1, num_nodes):
            edges.append((i,j))
neg_divergence = np.zeros((len(edges), num_nodes))

f = np.zeros((len(edges)))
W = np.zeros((len(edges), len(edges)))

In [11]:
print(len(edges))

435


In [12]:
# f, w
# f is a vector representing pairwise differences between the nodes
# w is a diagonal matrix containing the weights of each edge

W = np.zeros((len(edges), len(edges)))
w = np.zeros(len(edges))
f = np.zeros((len(edges)))
for index, game in game_df.iterrows():
    home_index = teams.index(game['HOME_TEAM_ID'])
    opp_index = teams.index(game['VISITOR_TEAM_ID'])
    diff = int(game['PTS_away']) - int(game['PTS_home'])
    if home_index < opp_index:
        edge = (home_index, opp_index)
    else:
        diff = -diff
        edge = (opp_index, home_index)
    i = edges.index(edge)
    W[i, i] += 1
    w[i] += 1
    f[i] += diff
for i in range(len(edges)):
    if W[i, i] != 0:
        f[i] = f[i]*1/W[i,i]
    else:
        f[i] = 0
print(f)


[ -2.          -0.5         -3.           6.          -8.5
  13.5         14.           6.           3.5          4.
   3.8         -6.         -11.5          8.5         10.66666667
 -11.25       -12.           9.33333333  -2.5          5.
 -11.5         -1.5        -12.          -3.33333333  15.
 -18.           0.33333333  -7.5         -9.66666667  -4.66666667
 -11.           6.           3.          -6.           4.
 -14.           6.          -3.5         -8.66666667  -6.33333333
   5.         -11.25        -6.         -10.2         -0.33333333
  -8.           3.         -12.         -53.           5.
  -6.           0.5          7.         -13.          -5.66666667
  -7.          -5.5          0.5          7.4        -18.
 -12.          14.5        -12.         -13.          12.
  -8.33333333 -12.33333333  -6.           2.66666667  -9.5
 -13.          -5.66666667   8.           7.          -8.
  -7.5         -9.          -6.         -18.          -9.5
   7.5         -0.25        -

In [13]:
print(w)

[4. 4. 1. 4. 2. 2. 1. 1. 2. 2. 5. 3. 2. 2. 3. 4. 2. 3. 2. 1. 2. 2. 1. 3.
 2. 2. 3. 2. 3. 3. 2. 2. 1. 1. 1. 2. 2. 2. 3. 3. 1. 4. 4. 5. 3. 4. 2. 2.
 1. 2. 1. 4. 1. 1. 3. 4. 4. 2. 5. 1. 1. 2. 2. 1. 1. 3. 3. 2. 3. 2. 1. 6.
 2. 2. 2. 2. 2. 2. 3. 2. 2. 4. 3. 4. 2. 4. 4. 2. 3. 3. 1. 2. 2. 5. 2. 2.
 3. 2. 2. 3. 1. 3. 2. 3. 2. 5. 3. 2. 2. 1. 2. 2. 2. 2. 1. 2. 3. 2. 1. 3.
 3. 4. 4. 4. 1. 2. 1. 2. 2. 3. 1. 3. 2. 4. 2. 3. 4. 3. 5. 2. 2. 2. 2. 1.
 2. 2. 2. 1. 3. 2. 4. 3. 4. 2. 4. 4. 1. 1. 2. 5. 3. 4. 1. 2. 2. 4. 2. 2.
 2. 1. 1. 2. 4. 4. 3. 5. 2. 4. 3. 1. 2. 1. 3. 4. 5. 1. 2. 4. 2. 2. 1. 2.
 2. 3. 5. 3. 2. 3. 2. 3. 3. 0. 2. 2. 3. 4. 3. 2. 2. 1. 2. 2. 1. 2. 3. 2.
 2. 4. 4. 2. 4. 3. 2. 2. 2. 4. 2. 1. 5. 2. 2. 2. 2. 1. 3. 4. 4. 3. 2. 1.
 1. 4. 2. 1. 2. 2. 2. 3. 3. 2. 2. 2. 1. 4. 4. 5. 4. 3. 0. 2. 4. 1. 2. 2.
 4. 2. 3. 2. 3. 3. 3. 2. 2. 1. 3. 1. 3. 2. 2. 4. 3. 4. 1. 4. 4. 4. 4. 2.
 2. 2. 1. 2. 3. 3. 2. 2. 2. 3. 4. 3. 2. 2. 2. 2. 2. 4. 3. 1. 4. 1. 4. 4.
 1. 2. 2. 2. 3. 2. 5. 2. 1. 2. 2. 2. 4. 1. 1. 4. 3.

In [14]:
# neg_divergence, edges x nodes
for i in range(len(edges)):
    for j in range(num_nodes):
        if edges[i][0] == j:
            neg_divergence[i,j] = -1
        elif edges[i][1] == j:
            neg_divergence[i,j] = 1
print(neg_divergence)

[[-1.  1.  0. ...  0.  0.  0.]
 [-1.  0.  1. ...  0.  0.  0.]
 [-1.  0.  0. ...  0.  0.  0.]
 ...
 [ 0.  0.  0. ... -1.  1.  0.]
 [ 0.  0.  0. ... -1.  0.  1.]
 [ 0.  0.  0. ...  0. -1.  1.]]


In [15]:
right_side = np.matmul(np.transpose(neg_divergence), np.matmul(W, f))
left_side = np.matmul(np.matmul(np.transpose(neg_divergence), W), neg_divergence)
r = np.matmul(np.linalg.pinv(left_side), right_side)
print(r)

[ 1.28246727  4.43656461  2.36435915 -2.18359708  2.35165521  4.22564431
  2.10209339  6.49239783 -8.7491817  -1.22854766 -3.19226628  4.33541596
  3.16428958  2.89402519  0.10105    -0.30682556 -6.8426367  -2.25186342
  1.76721406  7.55631855 -6.19993212 -4.23642858  0.0457767  -7.30698196
  1.70241609  5.60959399  4.82941923 -3.2634901  -8.19956173 -1.29938825]


In [16]:
rank_df = pd.DataFrame({
    'team': team_names,
    'r': r
})

rank_df = rank_df.sort_values(by =['r'],  ascending = False)
rank_df = rank_df.reset_index(drop = True)
print(rank_df)

#rank_df.to_csv('data/hodge_ranking.csv')

             team         r
0            Suns  7.556319
1        Warriors  6.492398
2            Jazz  5.609594
3       Grizzlies  4.829419
4         Celtics  4.436565
5            Heat  4.335416
6       Mavericks  4.225644
7           Bucks  3.164290
8    Timberwolves  2.894025
9       Cavaliers  2.364359
10          Bulls  2.351655
11        Nuggets  2.102093
12          76ers  1.767214
13        Raptors  1.702416
14          Hawks  1.282467
15           Nets  0.101050
16          Spurs  0.045777
17         Knicks -0.306826
18       Clippers -1.228548
19        Hornets -1.299388
20       Pelicans -2.183597
21         Pacers -2.251863
22         Lakers -3.192266
23        Wizards -3.263490
24          Kings -4.236429
25  Trail Blazers -6.199932
26          Magic -6.842637
27        Thunder -7.306982
28        Pistons -8.199562
29        Rockets -8.749182
