# Import packages

In [1]:
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import pandas as pd
pd.set_option("display.max_rows", 100)

# Model classes

In [2]:
S_E = 10/4

class Skill:
    def __init__(self, mu=20, var=10):
        self.mu = mu
        self.var = var


class Team:
    def __init__(self, name, skill):
        self.name = name
        self.skill = skill

    def predict(self, another):
        lose_rate = stats.norm.cdf(x=0,
                                   loc=self.skill.mu - another.skill.mu,
                                   scale=(self.skill.var + another.skill.var + S_E) ** 0.5)
        win_rate = 1 - lose_rate
        return win_rate

    def update(self, another, result, num_samples=3000, drops=1000):
        posterior = []
        post_weights = []
        s1 = self.skill.mu
        s1_w = self.skill.var

        for _ in range(num_samples):
            s2 = stats.norm.rvs(another.skill.mu, another.skill.var ** 0.5)
            s2_w = self._importance(s1, s2, result)

            posterior.append((s1, s2))
            post_weights.append((s1_w, s2_w))

            s1 = stats.norm.rvs(self.skill.mu, self.skill.var ** 0.5)
            s1_w = self._importance(s1, s2, result)

        posterior = np.array(posterior)
        post_weights = np.array(post_weights)

        self.skill.mu, self.skill.var = self._approx_gaussian(posterior[drops:, 0], post_weights[drops:, 0])
        another.skill.mu, another.skill.var = self._approx_gaussian(posterior[drops:, 1], post_weights[drops:, 1])

    @staticmethod
    def _approx_gaussian(samples, importances):
        mu = np.average(samples, weights=importances)
        var = np.average((samples - mu) ** 2, weights=importances)
        return mu, var

    @staticmethod
    def _importance(s1, s2, y=1):
        pr = 1 - stats.norm(s1 - s2, S_E ** 0.5).cdf(0) if y > 0 else stats.norm(s1 - s2, S_E ** 0.5).cdf(0)
        return pr

# Data preparation and exploration

In [3]:
#raw data
nba=pd.read_csv('./nba-2021-UTC.csv', sep=',', header=0)
nba

Unnamed: 0,Match Number,Round Number,Date,Location,Home Team,Away Team,Result
0,1,1,19/10/2021 23:30,Fiserv Forum,Milwaukee Bucks,Brooklyn Nets,127 - 104
1,2,1,20/10/2021 02:00,STAPLES Center,Los Angeles Lakers,Golden State Warriors,114 - 121
2,3,1,20/10/2021 23:00,Spectrum Center,Charlotte Hornets,Indiana Pacers,123 - 122
3,4,1,20/10/2021 23:00,Little Caesars Arena,Detroit Pistons,Chicago Bulls,88 - 94
4,5,1,20/10/2021 23:30,Madison Square Garden,New York Knicks,Boston Celtics,138 - 134
...,...,...,...,...,...,...,...
1225,1220,24,11/04/2022 01:30,Ball Arena,Denver Nuggets,Los Angeles Lakers,141 - 146
1226,1222,24,11/04/2022 01:30,Crypto.com Arena,LA Clippers,Oklahoma City Thunder,138 - 88
1227,1225,24,11/04/2022 01:30,Smoothie King Center,New Orleans Pelicans,Golden State Warriors,107 - 128
1228,1229,24,11/04/2022 01:30,Footprint Center,Phoenix Suns,Sacramento Kings,109 - 116


In [4]:
nba['Home Team'].describe()

count                1230
unique                 30
top       New York Knicks
freq                   41
Name: Home Team, dtype: object

In [5]:
# split result into team1 score and team2 score
nba['Score1']=nba['Result'].map(lambda x: x.split()[0])
nba['Score2']=nba['Result'].map(lambda x: x.split()[2])
nba

Unnamed: 0,Match Number,Round Number,Date,Location,Home Team,Away Team,Result,Score1,Score2
0,1,1,19/10/2021 23:30,Fiserv Forum,Milwaukee Bucks,Brooklyn Nets,127 - 104,127,104
1,2,1,20/10/2021 02:00,STAPLES Center,Los Angeles Lakers,Golden State Warriors,114 - 121,114,121
2,3,1,20/10/2021 23:00,Spectrum Center,Charlotte Hornets,Indiana Pacers,123 - 122,123,122
3,4,1,20/10/2021 23:00,Little Caesars Arena,Detroit Pistons,Chicago Bulls,88 - 94,88,94
4,5,1,20/10/2021 23:30,Madison Square Garden,New York Knicks,Boston Celtics,138 - 134,138,134
...,...,...,...,...,...,...,...,...,...
1225,1220,24,11/04/2022 01:30,Ball Arena,Denver Nuggets,Los Angeles Lakers,141 - 146,141,146
1226,1222,24,11/04/2022 01:30,Crypto.com Arena,LA Clippers,Oklahoma City Thunder,138 - 88,138,88
1227,1225,24,11/04/2022 01:30,Smoothie King Center,New Orleans Pelicans,Golden State Warriors,107 - 128,107,128
1228,1229,24,11/04/2022 01:30,Footprint Center,Phoenix Suns,Sacramento Kings,109 - 116,109,116


In [6]:
nba.loc[nba['Score1']==nba['Score2']]
#no draws, perfect dataset

Unnamed: 0,Match Number,Round Number,Date,Location,Home Team,Away Team,Result,Score1,Score2


# Training and prediction

In [7]:
teams = {name: Team(name, Skill()) for name in nba['Home Team'].unique()}

In [8]:
num_predictions = 0
correct_predictions = 0

for num in nba.index:
    #load match
    match=nba[num:num+1]
    team1 = teams[match['Home Team'].item()]
    team2 = teams[match['Away Team'].item()]

    # skip draws
    if match['Score1'].astype('int').item() == match['Score2'].astype('int').item():
        continue

    num_predictions += 1
    result = 1 if match['Score1'].astype('int').item() > match['Score2'].astype('int').item() else -1
    print(f"Before match: {team1.name}: {round(team1.skill.mu, 2)}-{round(team1.skill.var, 2)} vs "
          f"{team2.name}: {round(team2.skill.mu, 2)}-{round(team2.skill.var, 2)}")
    print(f"Team 1 rate: {round(team1.predict(team2), 2)} ==================== Result: {result}")

    prediction = 1 if team1.predict(team2) >= 0.5 else -1

    correct_predictions += prediction == result
    # update skills
    team1.update(team2, result)
    print(f"After match: {team1.name}: {round(team1.skill.mu, 2)}-{round(team1.skill.var, 2)} vs "
          f"{team2.name}: {round(team2.skill.mu, 2)}-{round(team2.skill.var, 2)}")

    print("=======================================================")

Before match: Milwaukee Bucks: 20-10 vs Brooklyn Nets: 20-10
After match: Milwaukee Bucks: 21.61-6.82 vs Brooklyn Nets: 18.26-7.15
Before match: Los Angeles Lakers: 20-10 vs Golden State Warriors: 20-10
After match: Los Angeles Lakers: 18.28-7.1 vs Golden State Warriors: 21.68-7.33
Before match: Charlotte Hornets: 20-10 vs Indiana Pacers: 20-10
After match: Charlotte Hornets: 21.68-7.36 vs Indiana Pacers: 18.41-6.68
Before match: Detroit Pistons: 20-10 vs Chicago Bulls: 20-10
After match: Detroit Pistons: 18.29-7.44 vs Chicago Bulls: 21.67-7.91
Before match: New York Knicks: 20-10 vs Boston Celtics: 20-10
After match: New York Knicks: 21.71-7.38 vs Boston Celtics: 18.28-7.3
Before match: Toronto Raptors: 20-10 vs Washington Wizards: 20-10
After match: Toronto Raptors: 18.35-7.13 vs Washington Wizards: 21.91-7.25
Before match: Memphis Grizzlies: 20-10 vs Cleveland Cavaliers: 20-10
After match: Memphis Grizzlies: 21.58-7.18 vs Cleveland Cavaliers: 18.28-6.92
Before match: Minnesota Timbe

In [9]:
team1.name, team2.name, team1.predict(team2)

('Portland Trail Blazers', 'Utah Jazz', 0.2356068213023631)

In [10]:
correct_predictions/num_predictions

0.6146341463414634

In [11]:
skills=pd.DataFrame({'Team':teams.keys()})
skills['Mean']=[teams[skills['Team'][i:i+1].item()].skill.mu for i in range(len(skills))]
skills['Varience']=[teams[skills['Team'][i:i+1].item()].skill.var for i in range(len(skills))]

In [12]:
skills.sort_values(by='Mean', ascending=False)

Unnamed: 0,Team,Mean,Varience
11,Phoenix Suns,21.482903,0.076017
6,Memphis Grizzlies,20.843001,0.041839
14,Miami Heat,20.709592,0.063413
20,Philadelphia 76ers,20.702265,0.055078
0,Milwaukee Bucks,20.697746,0.055999
29,Dallas Mavericks,20.695532,0.048098
19,Boston Celtics,20.692081,0.048961
5,Toronto Raptors,20.573828,0.05867
15,Golden State Warriors,20.499249,0.087582
7,Minnesota Timberwolves,20.465094,0.079569


In [13]:
skills['Mean'].sum()/30

20.071348001339164

In [14]:
skills['Varience'].sum()/30

0.060799217724144504

In [15]:
# use the previous model to run again
num_predictions = 0
correct_predictions = 0

for num in nba.index:
    #load match
    match=nba[num:num+1]
    team1 = teams[match['Home Team'].item()]
    team2 = teams[match['Away Team'].item()]

    # skip draws
    if match['Score1'].astype('int').item() == match['Score2'].astype('int').item():
        continue

    num_predictions += 1
    result = 1 if match['Score1'].astype('int').item() > match['Score2'].astype('int').item() else -1
    print(f"Before match: {team1.name}: {round(team1.skill.mu, 2)}-{round(team1.skill.var, 2)} vs "
          f"{team2.name}: {round(team2.skill.mu, 2)}-{round(team2.skill.var, 2)}")
    print(f"Team 1 rate: {round(team1.predict(team2), 2)} ==================== Result: {result}")

    prediction = 1 if team1.predict(team2) >= 0.5 else -1

    correct_predictions += prediction == result
    # update skills
    team1.update(team2, result)
    print(f"After match: {team1.name}: {round(team1.skill.mu, 2)}-{round(team1.skill.var, 2)} vs "
          f"{team2.name}: {round(team2.skill.mu, 2)}-{round(team2.skill.var, 2)}")

    print("=======================================================")

Before match: Milwaukee Bucks: 20.7-0.06 vs Brooklyn Nets: 20.26-0.05
After match: Milwaukee Bucks: 20.72-0.06 vs Brooklyn Nets: 20.23-0.05
Before match: Los Angeles Lakers: 19.49-0.06 vs Golden State Warriors: 20.5-0.09
After match: Los Angeles Lakers: 19.48-0.06 vs Golden State Warriors: 20.52-0.09
Before match: Charlotte Hornets: 20.19-0.05 vs Indiana Pacers: 19.25-0.05
After match: Charlotte Hornets: 20.2-0.05 vs Indiana Pacers: 19.24-0.05
Before match: Detroit Pistons: 19.16-0.07 vs Chicago Bulls: 20.29-0.09
After match: Detroit Pistons: 19.14-0.07 vs Chicago Bulls: 20.33-0.09
Before match: New York Knicks: 19.82-0.06 vs Boston Celtics: 20.69-0.05
After match: New York Knicks: 19.85-0.05 vs Boston Celtics: 20.66-0.05
Before match: Toronto Raptors: 20.57-0.06 vs Washington Wizards: 19.67-0.05
After match: Toronto Raptors: 20.53-0.06 vs Washington Wizards: 19.71-0.05
Before match: Memphis Grizzlies: 20.84-0.04 vs Cleveland Cavaliers: 20.26-0.07
After match: Memphis Grizzlies: 20.86-

In [16]:
correct_predictions/num_predictions

0.6552845528455284