In [None]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
import torch
import time

curr_directory = os.getcwd()
#
#
#
# Create index for all players + roster dictionaries
#
#
# Player Roster Information (Copied over from player_roster.ipynb)
teams = ['BOS','BRK','NYK','PHI','TOR','CHI','CLE','DET','IND','MIL','ATL','CHO','MIA','ORL','WAS',
         'DEN','MIN','OKC','POR','UTA','GSW','LAC','LAL','PHO','SAC','DAL','HOU','MEM','NOP','SAS']
        
# Dictionary of roster
# Ex. The roster of Boston Celtics players for the 2019-2020 season can be accessed using roster['BOS']['2019']
# It does not include any players/rookies for which there is no season data
roster = {}
    
for team in teams: 
    roster[team] = {}

# Initialize set for list of all players (with no repeats)
all_players = set()
    
for filename in os.listdir(os.path.join(curr_directory, 'data_sets/player_roster')):
    data = pd.read_csv(os.path.join('data_sets/player_roster', filename))
    year = filename[0:4]
    
    for team in teams:
        roster[team][year] = []
        
        players = data.loc[data['Tm'] == team]
        for ind in players.index: 
            player_name = players['Player'][ind].split('\\', 1)[0]
            if player_name not in roster[team][year]: 
                roster[team][year].append(player_name)
            
        all_players.update(roster[team][year])

num_players = len(all_players)
        
# Player dictionary that maps all players to index
player_index = dict(zip(list(all_players), range(len(all_players))))
#
#
#
# Data Processing
#
#
#
# Game data from 2014 - 2015 season to 2017-2018 season
game_data = pd.read_csv(os.path.join(curr_directory,'data_sets/nba.games.stats.csv'))

# Sort all values by the Date
game_data = game_data.sort_values(by=['Date'])

# game has chronical order and Y shows score differential, X plus is team 1 payer
# X minus is team 2 player
game_results = np.array(list(game_data['TeamPoints'] - game_data['OpponentPoints']))
teams = np.array(list(zip(game_data.Team, game_data.Opponent)))
dates = np.array(list(game_data['Date']))

unique_dates = list(set(dates))

# Makes an index of all games that are repeated
repeat_indexes = []

for date in unique_dates: 
    same_day = np.where(dates == date)
    # suppose same_day = [0, 1, 2, 3, 4, 5]
    for i in same_day[0]: 
        # start with i = 0
        for j in same_day[0]: 
            # j = 0, 1, 2, 3, 4, 5
            if j > i: 
                if np.array_equal(np.flip(teams[j], axis=0) , teams[i]): 
                    repeat_indexes.append(j)

# Make new unique game results, teams and dates arrays
unique_game_results = game_results[repeat_indexes]
unique_teams = teams[repeat_indexes]
unique_dates = dates[repeat_indexes]
#
#
#
# Game data from the 2018-2019 season and the 2019-2020 season
game_data_2018 = pd.read_csv(os.path.join(curr_directory,'data_sets/game_data_2018_2019.csv'))
game_data_2019 = pd.read_csv(os.path.join(curr_directory,'data_sets/game_data_2019_2020.csv'))

# Strip the day of week abbreviation from Date
game_data_2018['Date'] = game_data_2018['Date'].str[4:]
game_data_2019['Date'] = game_data_2019['Date'].str[4:]

# Date conversion functions
def monthToNum(shortMonth):
    return{
            'Jan' : '01',
            'Feb' : '02',
            'Mar' : '03',
            'Apr' : '04',
            'May' : '05',
            'Jun' : '06',
            'Jul' : '07',
            'Aug' : '08',
            'Sep' : '09', 
            'Oct' : '10',
            'Nov' : '11',
            'Dec' : '12'
    }[shortMonth]

def convert_dates(dataframe): 
    for i in range(dataframe['Date'].shape[0]): 
        if len(dataframe['Date'][i]) == 10: 
            year = dataframe['Date'][i][6:10]
            date = '0' + dataframe['Date'][i][4]
            month = monthToNum(dataframe['Date'][i][0:3])
            dataframe.loc[i, 'Date'] = year + '-' + month + '-' + date
        else: 
            year = dataframe['Date'][i][7:11]
            date = dataframe['Date'][i][4:6]
            month = monthToNum(dataframe['Date'][i][0:3])
            dataframe.loc[i, 'Date'] = year + '-' + month + '-' + date

convert_dates(game_data_2018)
convert_dates(game_data_2019)

game_results_2018 = np.array(list(game_data_2018['Visitor PTS'] - game_data_2018['Home PTS']))
teams_2018 = np.array(list(zip(game_data_2018.Visitor, game_data_2018.Home)))
dates_2018 = np.array(list(game_data_2018['Date']))

game_results_2019 = np.array(list(game_data_2019['Visitor PTS'] - game_data_2019['Home PTS']))
teams_2019 = np.array(list(zip(game_data_2019.Visitor, game_data_2019.Home)))
dates_2019 = np.array(list(game_data_2019['Date']))
#
#
#
# Combine all data into one dataset
#
#
#
teams_all = np.concatenate((unique_teams, teams_2018, teams_2019), axis=0)
dates_all = np.concatenate((unique_dates, dates_2018, dates_2019), axis=0)
results_all = np.concatenate((unique_game_results, game_results_2018, game_results_2019), axis=0)

In [None]:
class QuadraticRegression:
    def __init__(self, step_size=1e-5, max_iter=200, eps=1e-3, batch_size =32, theta=None, 
                  verbose=True):
        
        self.theta = theta
        self.batch_size = batch_size
        self.step_size = step_size
        self.max_iter = max_iter
        self.eps = eps
        self.error_list = []
        self.training_acc = []
        self.dev_acc = []
        
    def getSAS(self):
        # Top left
        S = np.array(self.theta[0:962, 0:962])
        # Bottom right
        S2 = np.array(self.theta[963:1925, 963:1925])
        # Top right
        A = np.array(self.theta[0:962, 963:1925])
        # Bottom left
        A2 = np.array(self.theta[963:1925, 0:962])
        
        return S,S2,A,A2

    def predict(self, x): 
        z = x@self.theta@x.T 
        return self.sigmoid(z)
    
    def sigmoid(self, z): 
        return 1.0 / (1. + np.exp(-z))
    
    def loss_function_t(self, theta_t, x, y):
        EPS = 1e-8
        x = torch.tensor(x)
        y = torch.tensor(y)
        p = torch.sigmoid(x @ theta_t @ x.T)
        return -1.*((y * torch.log(p + EPS) + (1-y) * torch.log(1 - p + EPS)).sum())
    
    def pytorch_gradient(self, x, y):
        theta_t = torch.tensor(self.theta, requires_grad=True)
        self.loss_function_t(theta_t, x, y).backward()
        return theta_t.grad.numpy()
    
    def pytorch_batch_gradient(self, x_teams, y_teams, index): 
        x = x_teams[index::self.batch_size]
        y = y_teams[index::self.batch_size]
        
        theta_t = torch.tensor(self.theta, requires_grad=True)
        self.loss_function_t(theta_t, x, y).backward()
        return theta_t.grad.numpy()
    
    def gradBatchLossFunction(self, x_teams, y_teams):
        update = 0
        theta = np.matrix(self.theta)
        
        for i in range(x_teams.shape[0]):
            x = np.matrix(x_teams[i, :])
            y = np.asscalar(y_teams[i])
            update += x.T@x@theta@x.T@x - y*x.T@x
            
        return update
    
    def gradminiBatchLossFunction(self, x_teams, y_teams, batch_size, index):
        update = 0
        theta = np.matrix(self.theta)
        
        for i in range(batch_size):
            x = np.matrix(x_teams[int((i+index) % x_teams.shape[0]), :])
            y = np.asscalar(y_teams[int((i+index) % x_teams.shape[0])])
            update += x.T@x@theta@x.T@x - y*x.T@x
            
        return update
    
    def fit(self, x, y, dev_x, dev_y, mini = False):
        iterations = 0
        abs_error = 1
        ind = 0
        
        if self.theta is None: 
            self.theta = np.zeros((2*num_players, 2*num_players))
        
        if mini == False:
            while iterations < self.max_iter and abs_error >= self.eps and abs_error < 1000000:
                error = self.step_size*self.pytorch_gradient(x, y)
                abs_error = np.linalg.norm(error, 2)
                self.error_list.append(abs_error)

                theta_new = self.theta - error
                self.theta = self.project(theta_new)

                iterations += 1
                
                train_accuracy = self.training_predict(x, y)
                self.training_acc.append(train_accuracy)
                dev_accuracy = self.training_predict(dev_x, dev_y)
                self.dev_acc.append(dev_accuracy)

                print('Error {}: {}'.format(iterations, abs_error))
                print('Training Accuracy: {}'.format(train_accuracy))
                print('Dev Accuracy: {}'.format(dev_accuracy))
        else:
            batch_num = 1
            while iterations < self.max_iter and abs_error >= self.eps and abs_error < 1000000:
                error = self.step_size*self.pytorch_batch_gradient(x, y, self.batch_size, ind)
                abs_error = np.linalg.norm(error, 2)
                self.error_list.append(abs_error)

                theta_new = self.theta - error
                self.theta = self.project(theta_new)

                iterations += 1
                ind += 1

                print('Error {}: {}'.format(iterations, abs_error))
        
        print('Convergence!')
        plt.style.use('seaborn-darkgrid')
        plt.plot(self.training_acc, color = 'firebrick', label='Training Accuracy')
        plt.plot(self.dev_acc, color = 'teal', label='Dev Accuracy')
        plt.xlabel('Iterations')
        plt.ylabel('Accuracy')
        plt.show()
        
    def process_data(self, teams, dates, results): 
        num_games = teams.shape[0]

        # Create x for all games
        # To access x for 0th game -- x[0, :] 
        x_without_intercept = np.zeros((num_games, 2*num_players))
        
        for i in range(num_games): 
            z, t = self.x_for_game(teams[i], dates[i])
            combined = np.vstack((z, t))
            x_without_intercept[i, :] = combined[:, 0]
            
        x = x_without_intercept
        
        # Create y for all games (if team A wins, y = 1; if team B wins, y = 0)
        y = np.zeros((num_games, 1))
        for i in range(num_games): 
            if results[i] > 0: 
                y[i] = 1
            else:
                y[i] = 0
                
        return x, y

    def x_for_game(self, teams, date): 
        x_1 = np.zeros((num_players, 1))
        x_2 = np.zeros((num_players, 1))

        if int(date[5:7]) < 9: 
            year = str(int(date[0:4]) - 1)
        else: 
            year = date[0:4]

        team_1_players = roster[teams[0]][year]
        for item in team_1_players: 
            x_1[player_index[item]] = 1

        team_2_players = roster[teams[1]][year]
        for item in team_2_players: 
            x_2[player_index[item]] = 1

        return x_1, x_2
    
    def add_intercept(self, x): 
        new_x = np.zeros((x.shape[0], x.shape[1] + 1))
        new_x[:, 0] = 1
        new_x[:, 1:] = x
        
        return new_x
    
    def symmetrize(self, m):
        m = np.array(m)
        for i in range(m.shape[0]):
            for j in range(i, m.shape[1]):
                m[i][j] = m[j][i] = 0.5*(m[j][i] + m[i][j])
                
        return m
    
    def antisymmetrize(self, m):
        for i in range(m.shape[0]):
            for j in range(i, m.shape[1]):
                temp = m[i][j] - m[j][i]
                m[i][j] = 0.5*temp
                m[j][i] = -0.5*temp
                
        return m
    
    def project(self, m):
        m = np.array(m)
        side = m.shape[0]
        S = self.symmetrize(m[0:int(side/2 - 1),0:int(side/2 - 1)])
        S_minus = self.symmetrize(m[int(side/2):int(side-1), int(side/2):int(side-1)])

        A = self.antisymmetrize(m[0:int(side/2-1), int(side/2):int(side-1)])
        A_minus = self.antisymmetrize(m[int(side/2):int(side-1), 0:int(side/2-1)])
        S_new = (S - S_minus)/2
        S_minus_new = (S_minus - S)/2
        
        if np.allclose(A, -1*A_minus, 1e-10, 1e-10):
            A_new = A
            A_minus_new = A_minus
        elif np.linalg.norm(A.T - A_minus, 2) < np.linalg.norm(A - A_minus, 2):
            A_new = 0.5*(A + A_minus)
            A_minus_new = A_new.T
        else:
            A_new = 0.5*(A + A_minus.T)
            A_minus_new = A_new.T
            
        M = np.zeros(m.shape)
        M[0:int(side/2 - 1),0:int(side/2 - 1)] = S_new
        M[int(side/2):int(side-1),int(side/2):int(side - 1)] = S_minus_new
        M[0:int(side/2 - 1),int(side/2):int(side - 1)] = A_new
        M[int(side/2):int(side - 1),0:int(side/2 - 1)] = A_minus_new
        
        return M
    
    def training_predict(self, test_x, test_y): 
        predicted_y = []
        for i in range(test_x.shape[0]):
            x = test_x[i,:]
            prediction = self.predict(x)
            if np.asscalar(prediction) > 0.5: 
                predicted_y.append(1)
            else: 
                predicted_y.append(0)

        predicted_y = np.array(predicted_y)
        return np.mean(np.array(predicted_y) == np.array(test_y.T))
    
    def general_predict(self, teams, dates, results): 
        test_x, test_y = self.process_data(teams, dates, results)
        
        predicted_y = []
        for i in range(test_x.shape[0]):
            x = test_x[i,:]           
            prediction = self.predict(x)
            if np.asscalar(prediction) > 0.5: 
                predicted_y.append(1)
            else: 
                predicted_y.append(0)

        predicted_y = np.array(predicted_y)
        return np.mean(np.array(predicted_y) == np.array(test_y.T))
    
    def playoff_prediction(self, playoff_filename, playoff_date): 
        # Load playoff data
        playoff_data = pd.read_csv(os.path.join(curr_directory, playoff_filename))

        # Extract features of interest
        raw_playoff_results = np.array(list(playoff_data['PTS'] - playoff_data['PTS.1']))
        raw_playoff_team_pairs = np.array(list(zip(playoff_data['Visitor/Neutral'], playoff_data['Home/Neutral'])))
        raw_playoff_dates = np.array(list(playoff_data['Date']))

        playoff_pairs = {}

        for i in range(len(raw_playoff_team_pairs)): 
            team_1 = raw_playoff_team_pairs[i][0]
            team_2 = raw_playoff_team_pairs[i][1]
            if (team_1,team_2) in playoff_pairs.keys(): 
                # if results > 0 --> team A won --> +1
                # if results < 0 --> team B won --> -1
                if raw_playoff_results[i] > 0: 
                    playoff_pairs[team_1,team_2] += 1
                else: 
                    playoff_pairs[team_1,team_2] += -1
            elif (team_2,team_1) in playoff_pairs.keys():
                # if results > 0 --> team B won --> -1
                # if results < 0 --> team A won --> +1
                if raw_playoff_results[i] > 0: 
                    playoff_pairs[team_2,team_1] += -1
                else: 
                    playoff_pairs[team_2,team_1] += 1
            else: 
                if raw_playoff_results[i] > 0: 
                    playoff_pairs[team_1,team_2] = 1
                else: 
                    playoff_pairs[team_1,team_2] = -1

        playoff_teams = []
        playoff_results = []
        playoff_dates = []

        for key in playoff_pairs: 
            playoff_teams.append([key[0], key[1]])
            playoff_results.append(playoff_pairs[key])
            playoff_dates.append(playoff_date)

        playoff_teams = np.array(playoff_teams)
        playoff_results = np.array(playoff_results)
        playoff_dates = np.array(playoff_dates)

        playoff_x, playoff_y = self.process_data(playoff_teams, playoff_dates, playoff_results)

        predicted_y = []
        for i in range(playoff_x.shape[0]):
            x = playoff_x[i,:]
            prediction = self.predict(x)
            if np.asscalar(prediction) > 0.5: 
                predicted_y.append(1)
            else: 
                predicted_y.append(0)

        predicted_y = np.array(predicted_y)
        prediction_accuracy = np.mean(np.array(predicted_y) == np.array(playoff_y.T[0][:]))

        return prediction_accuracy

In [None]:
teams_s, dates_s, results_s = shuffle(teams_all, dates_all, results_all, random_state=0)
# 64% training, 16% dev, 20% test
n_train = 4159
n_dev = 5199
n_full = 6500

teams_train = teams_s[0:n_train]
dates_train = dates_s[0:n_train]
results_train = results_s[0:n_train]

teams_dev = teams_s[n_train + 1:n_dev]
dates_dev = dates_s[n_train + 1:n_dev]
results_dev = results_s[n_train + 1:n_dev]

teams_test = teams_s[n_dev + 1:n_full]
dates_test = dates_s[n_dev + 1:n_full]
results_test = results_s[n_dev + 1:n_full]

In [None]:
print('lr = 2E-6')
model_lr_2e6 = QuadraticRegression(step_size = 2e-6, max_iter = 20)
x_lr_2e6, y_lr_2e6= model_lr_2e6.process_data(teams_train, dates_train, results_train)
dev_x_2e6, dev_y_2e6 = model_lr_2e6.process_data(teams_dev, dates_dev, results_dev)
model_lr_2e6.fit(x_lr_2e6, y_lr_2e6, dev_x_2e6, dev_y_2e6)

train_accuracy_2e6 = model_lr_2e6.general_predict(teams_train, dates_train, results_train)
dev_accuracy_2e6 = model_lr_2e6.general_predict(teams_dev, dates_dev, results_dev)

print('Train Accuracy: {}'.format(train_accuracy_2e6))
print('Dev Accuracy: {}'.format(dev_accuracy_2e6))

np.savetxt('full_dataset_lr_2e6.txt', np.array(model_lr_2e6.error_list), delimiter =',')

In [None]:
print('lr = 1E-6')
model_lr_1e6 = QuadraticRegression(step_size = 1e-6, max_iter = 200)
x_lr_1e6, y_lr_1e6= model_lr_1e6.process_data(teams_train, dates_train, results_train)
model_lr_1e6.fit(x_lr_1e6, y_lr_1e6)

train_accuracy_1e6 = model_lr_1e6.general_predict(teams_train, dates_train, results_train)
dev_accuracy_1e6 = model_lr_1e6.general_predict(teams_dev, dates_dev, results_dev)

print('Train Accuracy: {}'.format(train_accuracy_1e6))
print('Dev Accuracy: {}'.format(dev_accuracy_1e6))

np.savetxt('full_dataset_lr_1e6.txt', np.array(model_lr_1e6.error_list), delimiter =',')

In [None]:
print('lr = 5E-7')
model_lr_5e7 = QuadraticRegression(step_size = 5e-7, max_iter = 200)
x_lr_5e7, y_lr_5e7= model_lr_5e7.process_data(teams_train, dates_train, results_train)
model_lr_5e7.fit(x_lr_5e7, y_lr_5e7)

train_accuracy_5e7 = model_lr_5e7.general_predict(teams_train, dates_train, results_train)
dev_accuracy_5e7 = model_lr_5e7.general_predict(teams_dev, dates_dev, results_dev)

print('Train Accuracy: {}'.format(train_accuracy_5e7))
print('Dev Accuracy: {}'.format(dev_accuracy_5e7))

np.savetxt('full_dataset_lr_5e7.txt', np.array(model_lr_5e7.error_list), delimiter =',')

In [None]:
# Individual season analysis -- best learning rate is 2E-6 (determined by previous experiments in quadratic_classifier_torch.ipynb)
n_train_s = 983
n_dev_s = 1229

In [None]:
# 2014-2015 Season
teams_2014, dates_2014, results_2014 = shuffle(teams_all[0:1229], dates_all[0:1229], results_all[0:1229], random_state=0)

teams_train_2014 = teams_2014[0:n_train_s]
dates_train_2014 = dates_2014[0:n_train_s]
results_train_2014 = results_2014[0:n_train_s]

teams_dev_2014 = teams_2014[n_train_s + 1:n_dev_s]
dates_dev_2014 = dates_2014[n_train_s + 1:n_dev_s]
results_dev_2014 = results_2014[n_train_s + 1:n_dev_s]

print('Season: 2014-2015')
model_2014 = QuadraticRegression(step_size = 2e-6, max_iter = 250)
x_2014, y_2014 = model_2014.process_data(teams_train_2014, dates_train_2014, results_train_2014)
model_2014.fit(x_2014, y_2014)

train_accuracy_2014 = model_2014.general_predict(teams_train_2014, dates_train_2014, results_train_2014)
dev_accuracy_2014 = model_2014.general_predict(teams_dev_2014, dates_dev_2014, results_dev_2014)
test_accuracy_2014 = model_2014.playoff_prediction('data_sets/2015_playoffs.csv', '2015-04-10')

print('Train Accuracy: {}'.format(train_accuracy_2014))
print('Dev Accuracy: {}'.format(dev_accuracy_2014))
print('Playoff Accuracy: {}'.format(test_accuracy_2014))

np.savetxt('2014_2015_lr_2e6.txt', np.array(model_2014.error_list), delimiter =',')

In [None]:
# 2015-2016 Season
teams_2015, dates_2015, results_2015 = shuffle(teams_all[1230:2459], dates_all[1230:2459], results_all[1230:2459], random_state=0)

teams_train_2015 = teams_2015[0:n_train_s]
dates_train_2015 = dates_2015[0:n_train_s]
results_train_2015 = results_2015[0:n_train_s]

teams_dev_2015 = teams_2015[n_train_s + 1:n_dev_s]
dates_dev_2015 = dates_2015[n_train_s + 1:n_dev_s]
results_dev_2015 = results_2015[n_train_s + 1:n_dev_s]

print('Season: 2015-2016')
model_2015 = QuadraticRegression(step_size = 2e-6, max_iter = 250)
x_2015, y_2015 = model_2015.process_data(teams_train_2015, dates_train_2015, results_train_2015)
model_2015.fit(x_2015, y_2015)

train_accuracy_2015 = model_2015.general_predict(teams_train_2015, dates_train_2015, results_train_2015)
dev_accuracy_2015 = model_2015.general_predict(teams_dev_2015, dates_dev_2015, results_dev_2015)
test_accuracy_2015 = model_2015.playoff_prediction('data_sets/2016_playoffs.csv', '2016-04-10')

print('Train Accuracy: {}'.format(train_accuracy_2015))
print('Dev Accuracy: {}'.format(dev_accuracy_2015))
print('Playoff Accuracy: {}'.format(test_accuracy_2015))

np.savetxt('2015_2016_lr_2e6.txt', np.array(model_2015.error_list), delimiter =',')

In [None]:
# 2016-2017 Season
teams_2016, dates_2016, results_2016 = shuffle(teams_all[2460:3689], dates_all[2460:3689], results_all[2460:3689], random_state=0)

teams_train_2016 = teams_2016[0:n_train_s]
dates_train_2016 = dates_2016[0:n_train_s]
results_train_2016 = results_2016[0:n_train_s]

teams_dev_2016 = teams_2016[n_train_s + 1:n_dev_s]
dates_dev_2016 = dates_2016[n_train_s + 1:n_dev_s]
results_dev_2016 = results_2016[n_train_s + 1:n_dev_s]

print('Season: 2016-2017')
model_2016 = QuadraticRegression(step_size = 2e-6, max_iter = 250)
x_2016, y_2016 = model_2016.process_data(teams_train_2016, dates_train_2016, results_train_2016)
model_2016.fit(x_2016, y_2016)

train_accuracy_2016 = model_2016.general_predict(teams_train_2016, dates_train_2016, results_train_2016)
dev_accuracy_2016 = model_2016.general_predict(teams_dev_2016, dates_dev_2016, results_dev_2016)
test_accuracy_2016 = model_2016.playoff_prediction('data_sets/2017_playoffs.csv', '2017-04-10')

print('Train Accuracy: {}'.format(train_accuracy_2016))
print('Dev Accuracy: {}'.format(dev_accuracy_2016))
print('Playoff Accuracy: {}'.format(test_accuracy_2016))

np.savetxt('2016_2017_lr_2e6.txt', np.array(model_2016.error_list), delimiter =',')

In [None]:
# 2017-2018 Season
teams_2017, dates_2017, results_2017 = shuffle(teams_all[3690:4919], dates_all[3690:4919], results_all[3690:4919], random_state=0)

teams_train_2017 = teams_2017[0:n_train_s]
dates_train_2017 = dates_2017[0:n_train_s]
results_train_2017 = results_2017[0:n_train_s]

teams_dev_2017 = teams_2017[n_train_s + 1:n_dev_s]
dates_dev_2017 = dates_2017[n_train_s + 1:n_dev_s]
results_dev_2017 = results_2017[n_train_s + 1:n_dev_s]

print('Season: 2017-2018')
model_2017 = QuadraticRegression(step_size = 2e-6, max_iter = 250)
x_2017, y_2017 = model_2017.process_data(teams_train_2017, dates_train_2017, results_train_2017)
model_2017.fit(x_2017, y_2017)

train_accuracy_2017 = model_2017.general_predict(teams_train_2017, dates_train_2017, results_train_2017)
dev_accuracy_2017 = model_2017.general_predict(teams_dev_2017, dates_dev_2017, results_dev_2017)
test_accuracy_2017 = model_2017.playoff_prediction('data_sets/2018_playoffs.csv', '2018-04-10')

print('Train Accuracy: {}'.format(train_accuracy_2017))
print('Dev Accuracy: {}'.format(dev_accuracy_2017))
print('Playoff Accuracy: {}'.format(test_accuracy_2017))

np.savetxt('2017_2018_lr_2e6.txt', np.array(model_2017.error_list), delimiter =',')

In [None]:
# 2018-2019 Season
teams_2018, dates_2018, results_2018 = shuffle(teams_all[4920:6149], dates_all[4920:6149], results_all[4920:6149], random_state=0)

teams_train_2018 = teams_2018[0:n_train_s]
dates_train_2018 = dates_2018[0:n_train_s]
results_train_2018 = results_2018[0:n_train_s]

teams_dev_2018 = teams_2018[n_train_s + 1:n_dev_s]
dates_dev_2018 = dates_2018[n_train_s + 1:n_dev_s]
results_dev_2018 = results_2018[n_train_s + 1:n_dev_s]

print('Season: 2017-2018')
model_2018 = QuadraticRegression(step_size = 2e-6, max_iter = 250)
x_2018, y_2018 = model_2018.process_data(teams_train_2018, dates_train_2018, results_train_2018)
model_2018.fit(x_2018, y_2018)

train_accuracy_2018 = model_2018.general_predict(teams_train_2018, dates_train_2018, results_train_2018)
dev_accuracy_2018 = model_2018.general_predict(teams_dev_2018, dates_dev_2018, results_dev_2018)
test_accuracy_2018 = model_2018.playoff_prediction('data_sets/2019_playoffs.csv', '2019-04-10')

print('Train Accuracy: {}'.format(train_accuracy_2018))
print('Dev Accuracy: {}'.format(dev_accuracy_2018))
print('Playoff Accuracy: {}'.format(test_accuracy_2018))

np.savetxt('2018_2019_lr_2e6.txt', np.array(model_2018.error_list), delimiter =',')