# NBA Game Outcome Predictor 
### CMPE 257 Project
Authors: Kaushika Uppu, Miranda Billawala, Yun Ei Hlaing, Iris Cheung

## Imports

In [None]:
import pandas as pd
import numpy as np
import time
import matplotlib.pyplot as plt
import seaborn as sns

import random
from datetime import datetime, timedelta
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier, XGBRegressor
from sklearn.metrics import mean_squared_error
import itertools

## Training Data
We load in the save statistics predictions for each game using our two methods: rolling window and a XGBoost model. Due to computational costs of predicting statistics with the model, the model data only spans 2014-2025.

In [None]:
all_stats_cleaned = pd.read_csv('all_stats_cleaned.csv')
all_stats_cleaned['GAME_DATE'] = pd.to_datetime(all_stats_cleaned['GAME_DATE'], format='ISO8601') # convert date to datetime object

all_stats_cleaned.head()

In [None]:
df_rolling = pd.read_csv('df_rolling.csv')
df_rolling['GAME_DATE'] = pd.to_datetime(df_rolling['GAME_DATE'])
df_rolling.head()

In [None]:
df_model = pd.read_csv('df_model_tuned.csv')
df_model['GAME_DATE'] = pd.to_datetime(df_model['GAME_DATE'])
df_model.head()

## Test Set

In [None]:
def get_val_set (first_season, last_season, n = 1) :
    dates = []
    for season in range(first_season, last_season) :
        season_data = all_stats_cleaned[all_stats_cleaned['SEASON_YEAR'] == season]
        start_date = season_data['GAME_DATE'].min()
        end_date = season_data['GAME_DATE'].max()

        # day around the beginning of the season
        beg = season_data[season_data['GAME_DATE'].between(start_date, start_date + timedelta(weeks = 4))]

        # day around trade deadline (after about 2/3 of the season)
        delta = round((2/3)*(end_date-start_date).days)
        approx_deadline = start_date + timedelta(days = delta)
        mid = season_data[season_data['GAME_DATE'].between(approx_deadline, approx_deadline + timedelta(weeks = 4))]
        
        # day around the end of the season
        end = season_data[season_data['GAME_DATE'].between(end_date - timedelta(weeks = 4), end_date)]

        dates.extend(list(pd.concat([beg.sample(n)['GAME_DATE'], mid.sample(n)['GAME_DATE'], end.sample(n)['GAME_DATE']])))

    return dates

In [None]:
first_season = all_stats_cleaned['SEASON_YEAR'].min() + 1
last_season = all_stats_cleaned['SEASON_YEAR'].max() - 5
val_set = get_val_set(first_season, last_season)

## Model Building
Since it is computationally expensive to run the second model to predict all the values in the dataset, we will perform feature selection and hyperparameter tuning on the model trained on the basic rolling statistics. Then, we will predict on the test set with both types of models to see which performs better

In [None]:
time_horizon = 5

In [None]:
def get_training_set (df, date, num_seasons) :
    """
    Input: Date of games and number of seasons to include in dataset
    Output: All rows from the last num_seasons and all games in the current season up till the given date
    """
    # determine season of the game
    season = date.year if date.month >= 10 else date.year - 1
    
    # get games for training
    data = df[df['SEASON_YEAR'].between(season - num_seasons, season)].copy()
    data['DAYS_SINCE_GAME'] = [(date-game_day).days for game_day in data['GAME_DATE']]
    data = data[data['DAYS_SINCE_GAME'] > 0]

    data = data.sort_values(by = 'DAYS_SINCE_GAME')

    # split into X and y and only look at relevant columns
    X = data.drop(columns = ['WIN_ONE', 'GAME_DATE'])
    y = data['WIN_ONE']

    return (X,y)

def pred_by_date (df, model, date) :
    """
    Predict the outcome of all games on the given date. 
    """
    n = time_horizon # how many years in the past for training
    
    # determine season of the game
    season = date.year if date.month >= 10 else date.year - 1

    # get data in relevant time frame
    X, y = get_training_set(df, date, n)

    games_on_day = df[df['GAME_DATE'] == date].copy()
    games_on_day['DAYS_SINCE_GAME'] = np.zeros(len(games_on_day))

    test = games_on_day.drop(columns = ['WIN_ONE', 'GAME_DATE'])

    model.fit(X,y)
    pred = model.predict(test)
    return pred, games_on_day['WIN_ONE']

def test_model(df, model, dates) :
    total_correct = total_games = 0

    for d in dates:
        pred, act = pred_by_date(df, model, d)
        correct = np.sum(pred == act)
        games = len(pred)
        total_correct += correct
        total_games += games
    return total_correct, total_games

### Random Forests (baseline)

In [None]:
model = RandomForestClassifier(random_state=33)
correct,games = test_model(df_rolling, model, val_set)
correct / games

### XGBoost

In [None]:
model = XGBClassifier(objective='binary:logistic', base_score = 0.5, random_state = 33)
correct,games = test_model(df_rolling, model, val_set)
correct / games

## Feature Selection
The average feature importance scores is calculated for the three games for each season using XG Boost built-in feature importance.

In [None]:
def pred_by_date_with_importance(df, model, date):
    n = 5 
    season = date.year if date.month >= 10 else date.year - 1
    X, y = get_training_set(date, n)
    # one hot encoding on the Home feature 
    games_on_day = df[df['GAME_DATE'] == date].copy()
    games_on_day['DAYS_SINCE_GAME'] = np.zeros(len(games_on_day))

    test = games_on_day.drop(columns = ['WIN_ONE', 'GAME_DATE'])

    model.fit(X,y)
    pred = model.predict(test)
    correct = np.sum(pred == games_on_day['WIN_ONE'])
    games = len(pred)
    importance_scores = model.get_booster().get_score(importance_type='gain')
    return correct, games, importance_scores

In [None]:
def test_model_with_importance(df, model) :
    """
    Outputs the average feature importance scores of game predictions
    """
    total_correct = total_games = 0
    feature_scores = {}
    for t in test:
        correct, games, importance_scores = pred_by_date_with_importance(df, model, t)
        
        for feature, score in importance_scores.items():
            if feature not in feature_scores:
                feature_scores[feature] = []
            feature_scores[feature].append(score)
            

        total_correct += correct
        total_games += games

    average_importance = {features: sum(scores)/len(scores) for features, scores in feature_scores.items()}  
    sorted_features = sorted(average_importance.items(), key=lambda x: x[1], reverse=True)
    
    return sorted_features

In [None]:
model = XGBClassifier(objective='binary:logistic')
importance_scores = test_model_with_importance(df_rolling, model)
print(importance_scores)

Testing the model with the feature importance scores by iteratively removing the least important features and comparing the accuracy:

In [None]:
def get_training_set_with_features (df, date, num_seasons, features) :
    """
    Input: Date of games, number of seasons and feature subset to include in dataset
    Output: All rows from the last num_seasons and all games in the current season up till the given date
    """
    season = date.year if date.month >= 10 else date.year - 1
    data = df[df['SEASON_YEAR'].between(season - num_seasons, season)].copy()
    data['DAYS_SINCE_GAME'] = [(date-game_day).days for game_day in data['GAME_DATE']]
    data = data[data['DAYS_SINCE_GAME'] > 0]

    data = data.sort_values(by = 'DAYS_SINCE_GAME')

    X = data[features]
    y = data['WIN_ONE']

    return (X,y)

def pred_by_date_with_features (df, model, date, features) :
    n = 5 
    season = date.year if date.month >= 10 else date.year - 1

    X, y = get_training_set_with_features(df, date, n, features)

    games_on_day = df[df['GAME_DATE'] == date].copy()
    games_on_day['DAYS_SINCE_GAME'] = np.zeros(len(games_on_day))

    test = games_on_day[features]
    model.fit(X,y)
    pred = model.predict(test)
    correct = np.sum(pred == games_on_day['WIN_ONE'])
    games = len(pred)
    return correct, games

In [None]:
def feature_selection_with_importance(df, model, current_features, min_subset_size, top_n) :
    """
    Iterates through the feature importance scores and iteratively remove the least importance features
    """
    results = []
    # current_features = [f[0] for f in feature_importance]
    
    while len(current_features) >= min_subset_size:
        total_correct = total_games = 0
        print(f"Evaluating with {len(current_features)} features...")
        for t in test:    
            correct, games = pred_by_date_with_featuresdf, model, t, features = current_features)
        
            total_correct += correct
            total_games += games
        print(current_features, ':', total_correct/total_games)
        results.append((current_features.copy(), total_correct/total_games))
        current_features.pop(-1)
    results.sort(key=lambda x: x[1], reverse=True)
    return results[:top_n]

In [None]:
model = XGBClassifier(objective='binary:logistic')
sorted_features = [f[0] for f in importance_scores]
print(sorted_features)
top_subsets = feature_selection_with_importance(df_rolling, model, sorted_features, min_subset_size=20, top_n=10)

for i, (subset, acc) in enumerate(top_subsets, 1):
    print(f"#{i}: Features = {subset}, Accuracy = {acc:.4f}")

In [None]:
# best performing feature subset
best_feature_subset = top_subsets[0][0]
print('Best feature subset: ', best_feature_subset)
total_correct = total_games = 0
for t in test:
    correct, games = pred_by_date_with_features(model, t, best_feature_subset)

    total_correct += correct
    total_games += games
print('Accuracy:', total_correct / total_games)

## Hyperparameter Tuning

In [None]:
def pred_by_date_multiple_models (models_dict, date) :
    """
    Predict the outcome of all games on the given date for all models given. Used specifically to make
    cross validation more efficient
    """
    n = 5 # how many years in the past for training
    
    # determine season of the game
    season = date.year if date.month >= 10 else date.year - 1

    # get data in relevant time frame
    X, y = get_training_set(date, n)

    games_on_day = df[df['GAME_DATE'] == date].copy()
    games_on_day['DAYS_SINCE_GAME'] = np.zeros(len(games_on_day))

    test = games_on_day.drop(columns = ['WIN_ONE', 'GAME_DATE'])

    scores = np.zeros(len(models_dict))
    for k, v in models_dict.items() :
        v.fit(X,y)
        pred = v.predict(val_set)
        scores[k] = np.sum(pred == games_on_day['WIN_ONE'])
    return scores, len(games_on_day)

In [None]:
# XGBoost parameters
param_grid = {
    "n_estimators": [50, 100, 200, 400],
    "eta": [0.01, 0.05, 0.1, 0.2], # learning_rate
    "max_depth": [4, 6, 8, 10], # maximum depth of a tree
    "subsample": [0.5, 0.7, 1], # fraction of observation to be radnomly sampled for each tree
    "colsample_bytree": [0.5, 0.7, 1], # fraction of columns to be random samples for each tree
    "alpha": [0.5, 1, 2, 5] # lasso regression
}

param_dict = {} # store params with key corresponding to index of score in np.array
index = 0

# Iterate over all combinations of hyperparameters
for values in itertools.product(*param_grid.values()):
    param_dict[index] = XGBClassifier(objective='binary:logistic', random_state = 33, **dict(zip(param_grid.keys(), values)))
    index += 1

scores = np.zeros(len(param_dict))
total_games = 0

first_season = df['SEASON_YEAR'].min()
last_season = df['SEASON_YEAR'].max()-4

for t in test:
    s, g = pred_by_date_multiple_models(param_dict, t)

    scores += s
    total_games += g
    print(scores / total_games)

print('final scores: ', scores / total_games)

In [None]:
all_scores = scores / total_games
best_model = param_dict[all_scores.argmax()]
best_model.get_params() #'n_estimators': 400, eta: 0.01, max_depth: 4, subsample: 0.7, colsample_bytree: 0.7, alpha: 2

In [None]:
top_five_models = np.argpartition(all_scores, -5)[-5:]
top_five_models = top_five_models[np.argsort(-all_scores[top_five_models])]
top_five_scores = all_scores[top_five_models]
print(top_five_scores)
for i in top_five_models : 
    p = param_dict[i].get_params()
    print(f"n_estimators = {p['n_estimators']}, eta = {p['eta']}, max_depth = {p['max_depth']}, subsample = {p['subsample']}, colsample_bytree = {p['colsample_bytree']}, alpha = {p['alpha']}")

## Test Models
We want to test the model trained on rolling averages and the predicted statistics from the second model. We will predict every game in the last 4 seasons. This means we need to predict all the statistics for the games in the last 9 seasons using the second model. We have these predictions stored in the csv files imported at the beginning.

In [None]:
# get all dates in the test set
time_horizon = 5 # can change here
first_test_season = df_rolling['SEASON_YEAR'].max() - 5
test_set = df_rolling[df_rolling['SEASON_YEAR'] >= first_test_season]['GAME_DATE'].sort_values().unique()

In [None]:
rf = RandomForestClassifier(random_state=33)
final_model = XGBClassifier(n_estimators = 200, eta = 0.05, max_depth = 4, subsample = 0.5, colsample_bytree = 0.5, alpha = 1, random_state=42)

### Rolling Window 

In [None]:
correct, games = test_model(df_rolling, rf, test_set)
print("Score:", correct / games)

In [None]:
correct, games = test_model(df_rolling, final_model, test_set)
print("Score:", correct / games)

### ML Model Predictions

In [None]:
correct, games = test_model(df_model, rf, test_set)
print("Score:", correct / games)

In [None]:
correct, games = test_model(df_model, final_model, test_set)
print("Score:", correct / games)

## Playoff Prediction
First Round:

Eastern
1. Cleveland (C) vs. Miami (H): 4/20 C, 4/23 C, 4/26 H, 4/28 H, 4/30 C, 5/2 H, 5/4 C
2. Boston (C) vs. Orlando (M): 4/20 C, 4/23 C, 4/25 M, 4/27 M, 4/29 C, 5/1 M, 5/3 C
3. New York (K) vs. Detroit (P): 4/19 K, 4/21 K, 4/24 P, 4/27 P, 4/29 K, 5/1 P, 5/3 K
4. Indiana (P) vs. Milwaukee (B): 4/19 P, 4/22 P, 4/25 B, 4/27 B, 4/29 P, 5/2 B, 5/4 P

Western
1. Oklahoma City (T) vs. Memphis (G): 4/20 T, 4/22 T, 4/24 G, 4/26 G, 4/28 T, 5/1 G, 5/3 T
2. Houston (R) vs Golden State (W): 4/20 R, 4/23 R, 4/26 W, 4/28 W, 4/30 R, 5/2 W, 5/4 R
3. LA Lakers (L) vs. Minnesota (T): 4/19 L, 4/22 L, 4/25 T, 4/27 T, 4/30 L, 5/2 T, 5/4 L
4. Denver (N) vs. LA Clippers (C): 4/19 N, 4/21 N, 4/24 C, 4/26 C, 4/29 N, 5/1 C, 5/3 N

Semifinals begin May 5-6

Conference Finals begin May 20-21

Western: 5/20, 5/22, 5/24, 5/26, 5/28, 5/30, 6/1

Eastern: 5/21, 5/23, 5/25, 5/27, 5/29, 5/31, 6/2

Finals begin June 5: 6/5, 6/8, 6/11, 6/13, 6/16, 6/19, 6/22


In [None]:
df_model = pd.read_csv('df_model_tuned.csv')
df_model['GAME_DATE'] = pd.to_datetime(df_model['GAME_DATE'])

### Round 1
#### Game 1

In [None]:
game_one = pd.read_csv('playoffs_round_one_one.csv')
game_one['GAME_DATE'] = pd.to_datetime(game_one['GAME_DATE'])

df_model= pd.concat([df_model, game_one])
test_set = game_one['GAME_DATE'].unique()

for t in test_set :
    pred, act = pred_by_date(df_model, final_model, t)

    game_outcomes = df_model[df_model['GAME_DATE'] == t][['TEAM_ID_ONE', 'TEAM_ID_TWO']]
    game_outcomes['OUTCOME'] = pred
    print(game_outcomes)

#### Game 2

In [None]:
game_two = pd.read_csv('playoffs_round_one_two.csv')
game_two['GAME_DATE'] = pd.to_datetime(game_two['GAME_DATE'])

df_model= pd.concat([df_model, game_two])
test_set = game_two['GAME_DATE'].unique()

for t in test_set :
    pred, act = pred_by_date(df_model, final_model, t)

    game_outcomes = df_model[df_model['GAME_DATE'] == t][['TEAM_ID_ONE', 'TEAM_ID_TWO']]
    game_outcomes['OUTCOME'] = pred
    print(game_outcomes)

#### Game 3

In [None]:
game_three= pd.read_csv('playoffs_round_one_three.csv')
game_three['GAME_DATE'] = pd.to_datetime(game_three['GAME_DATE'])

df_model= pd.concat([df_model, game_three])
test_set = game_three['GAME_DATE'].unique()

for t in test_set :
    pred, act = pred_by_date(df_model, final_model, t)

    game_outcomes = df_model[df_model['GAME_DATE'] == t][['TEAM_ID_ONE', 'TEAM_ID_TWO']]
    game_outcomes['OUTCOME'] = pred
    print(game_outcomes)

#### Game 4

In [None]:
game_four= pd.read_csv('playoffs_round_one_four.csv')
game_four['GAME_DATE'] = pd.to_datetime(game_four['GAME_DATE'])

df_model= pd.concat([df_model, game_four])
test_set = game_four['GAME_DATE'].unique()

for t in test_set :
    pred, act = pred_by_date(df_model, final_model, t)

    game_outcomes = df_model[df_model['GAME_DATE'] == t][['TEAM_ID_ONE', 'TEAM_ID_TWO']]
    game_outcomes['OUTCOME'] = pred
    print(game_outcomes)

#### Game 5

In [None]:
game_five = pd.read_csv('playoffs_round_one_five.csv')
game_five['GAME_DATE'] = pd.to_datetime(game_five['GAME_DATE'])

df_model= pd.concat([df_model, game_five])
test_set = game_five['GAME_DATE'].unique()

for t in test_set :
    pred, act = pred_by_date(df_model, final_model, t)

    game_outcomes = df_model[df_model['GAME_DATE'] == t][['TEAM_ID_ONE', 'TEAM_ID_TWO']]
    game_outcomes['OUTCOME'] = pred
    print(game_outcomes)

#### Game 6

In [None]:
game_six = pd.read_csv('playoffs_round_one_six.csv')
game_six['GAME_DATE'] = pd.to_datetime(game_six['GAME_DATE'])

df_model= pd.concat([df_model, game_six])
test_set = game_six['GAME_DATE'].unique()

for t in test_set :
    pred, act = pred_by_date(df_model, final_model, t)

    game_outcomes = df_model[df_model['GAME_DATE'] == t][['TEAM_ID_ONE', 'TEAM_ID_TWO']]
    game_outcomes['OUTCOME'] = pred
    print(game_outcomes)

#### Game 7

In [None]:
game_seven = pd.read_csv('playoffs_round_one_seven.csv')
game_seven['GAME_DATE'] = pd.to_datetime(game_seven['GAME_DATE'])

df_model= pd.concat([df_model, game_seven])
test_set = game_seven['GAME_DATE'].unique()

for t in test_set :
    pred, act = pred_by_date(df_model, final_model, t)

    game_outcomes = df_model[df_model['GAME_DATE'] == t][['TEAM_ID_ONE', 'TEAM_ID_TWO']]
    game_outcomes['OUTCOME'] = pred
    print(game_outcomes)

### Round 2
#### Game 1

In [None]:
game_one = pd.read_csv('playoffs_round_two_one.csv')
game_one['GAME_DATE'] = pd.to_datetime(game_one['GAME_DATE'])

df_model= pd.concat([df_model, game_one])
test_set = game_one['GAME_DATE'].unique()

for t in test_set :
    pred, act = pred_by_date(df_model, final_model, t)

    game_outcomes = df_model[df_model['GAME_DATE'] == t][['TEAM_ID_ONE', 'TEAM_ID_TWO']]
    game_outcomes['OUTCOME'] = pred
    print(game_outcomes)

#### Game 2

In [None]:
game_two = pd.read_csv('playoffs_round_two_two.csv')
game_two['GAME_DATE'] = pd.to_datetime(game_two['GAME_DATE'])

df_model= pd.concat([df_model, game_two])
test_set = game_two['GAME_DATE'].unique()

for t in test_set :
    pred, act = pred_by_date(df_model, final_model, t)

    game_outcomes = df_model[df_model['GAME_DATE'] == t][['TEAM_ID_ONE', 'TEAM_ID_TWO']]
    game_outcomes['OUTCOME'] = pred
    print(game_outcomes)

#### Game 3

In [None]:
game_three= pd.read_csv('playoffs_round_two_three.csv')
game_three['GAME_DATE'] = pd.to_datetime(game_three['GAME_DATE'])

df_model= pd.concat([df_model, game_three])
test_set = game_three['GAME_DATE'].unique()

for t in test_set :
    pred, act = pred_by_date(df_model, final_model, t)

    game_outcomes = df_model[df_model['GAME_DATE'] == t][['TEAM_ID_ONE', 'TEAM_ID_TWO']]
    game_outcomes['OUTCOME'] = pred
    print(game_outcomes)

#### Game 4

In [None]:
game_four= pd.read_csv('playoffs_round_two_four.csv')
game_four['GAME_DATE'] = pd.to_datetime(game_four['GAME_DATE'])

df_model= pd.concat([df_model, game_four])
test_set = game_four['GAME_DATE'].unique()

for t in test_set :
    pred, act = pred_by_date(df_model, final_model, t)

    game_outcomes = df_model[df_model['GAME_DATE'] == t][['TEAM_ID_ONE', 'TEAM_ID_TWO']]
    game_outcomes['OUTCOME'] = pred
    print(game_outcomes)

#### Game 5

In [None]:
game_five = pd.read_csv('playoffs_round_two_five.csv')
game_five['GAME_DATE'] = pd.to_datetime(game_five['GAME_DATE'])

df_model= pd.concat([df_model, game_five])
test_set = game_five['GAME_DATE'].unique()

for t in test_set :
    pred, act = pred_by_date(df_model, final_model, t)

    game_outcomes = df_model[df_model['GAME_DATE'] == t][['TEAM_ID_ONE', 'TEAM_ID_TWO']]
    game_outcomes['OUTCOME'] = pred
    print(game_outcomes)

#### Game 6

In [None]:
game_six = pd.read_csv('playoffs_round_two_six.csv')
game_six['GAME_DATE'] = pd.to_datetime(game_six['GAME_DATE'])

df_model= pd.concat([df_model, game_six])
test_set = game_six['GAME_DATE'].unique()

for t in test_set :
    pred, act = pred_by_date(df_model, final_model, t)

    game_outcomes = df_model[df_model['GAME_DATE'] == t][['TEAM_ID_ONE', 'TEAM_ID_TWO']]
    game_outcomes['OUTCOME'] = pred
    print(game_outcomes)

#### Game 7

In [None]:
game_seven = pd.read_csv('playoffs_round_two_seven.csv')
game_seven['GAME_DATE'] = pd.to_datetime(game_seven['GAME_DATE'])

df_model= pd.concat([df_model, game_seven])
test_set = game_seven['GAME_DATE'].unique()

for t in test_set :
    pred, act = pred_by_date(df_model, final_model, t)

    game_outcomes = df_model[df_model['GAME_DATE'] == t][['TEAM_ID_ONE', 'TEAM_ID_TWO']]
    game_outcomes['OUTCOME'] = pred
    print(game_outcomes)

### Round 3: Conference Finals
#### Game 1

In [None]:
game_one = pd.read_csv('playoffs_round_three_one.csv')
game_one['GAME_DATE'] = pd.to_datetime(game_one['GAME_DATE'])

df_model= pd.concat([df_model, game_one])
test_set = game_one['GAME_DATE'].unique()

for t in test_set :
    pred, act = pred_by_date(df_model, final_model, t)

    game_outcomes = df_model[df_model['GAME_DATE'] == t][['TEAM_ID_ONE', 'TEAM_ID_TWO']]
    game_outcomes['OUTCOME'] = pred
    print(game_outcomes)

#### Game 2

In [None]:
game_two = pd.read_csv('playoffs_round_three_two.csv')
game_two['GAME_DATE'] = pd.to_datetime(game_two['GAME_DATE'])

df_model= pd.concat([df_model, game_two])
test_set = game_two['GAME_DATE'].unique()

for t in test_set :
    pred, act = pred_by_date(df_model, final_model, t)

    game_outcomes = df_model[df_model['GAME_DATE'] == t][['TEAM_ID_ONE', 'TEAM_ID_TWO']]
    game_outcomes['OUTCOME'] = pred
    print(game_outcomes)

#### Game 3

In [None]:
game_three= pd.read_csv('playoffs_round_three_three.csv')
game_three['GAME_DATE'] = pd.to_datetime(game_three['GAME_DATE'])

df_model= pd.concat([df_model, game_three])
test_set = game_three['GAME_DATE'].unique()

for t in test_set :
    pred, act = pred_by_date(df_model, final_model, t)

    game_outcomes = df_model[df_model['GAME_DATE'] == t][['TEAM_ID_ONE', 'TEAM_ID_TWO']]
    game_outcomes['OUTCOME'] = pred
    print(game_outcomes)

#### Game 4

In [None]:
game_four= pd.read_csv('playoffs_round_three_four.csv')
game_four['GAME_DATE'] = pd.to_datetime(game_four['GAME_DATE'])

df_model= pd.concat([df_model, game_four])
test_set = game_four['GAME_DATE'].unique()

for t in test_set :
    pred, act = pred_by_date(df_model, final_model, t)

    game_outcomes = df_model[df_model['GAME_DATE'] == t][['TEAM_ID_ONE', 'TEAM_ID_TWO']]
    game_outcomes['OUTCOME'] = pred
    print(game_outcomes)

#### Game 5

In [None]:
game_five = pd.read_csv('playoffs_round_three_five.csv')
game_five['GAME_DATE'] = pd.to_datetime(game_five['GAME_DATE'])

df_model= pd.concat([df_model, game_five])
test_set = game_five['GAME_DATE'].unique()

for t in test_set :
    pred, act = pred_by_date(df_model, final_model, t)

    game_outcomes = df_model[df_model['GAME_DATE'] == t][['TEAM_ID_ONE', 'TEAM_ID_TWO']]
    game_outcomes['OUTCOME'] = pred
    print(game_outcomes)

#### Game 6

In [None]:
game_six = pd.read_csv('playoffs_round_three_six.csv')
game_six['GAME_DATE'] = pd.to_datetime(game_six['GAME_DATE'])

df_model= pd.concat([df_model, game_six])
test_set = game_six['GAME_DATE'].unique()

for t in test_set :
    pred, act = pred_by_date(df_model, final_model, t)

    game_outcomes = df_model[df_model['GAME_DATE'] == t][['TEAM_ID_ONE', 'TEAM_ID_TWO']]
    game_outcomes['OUTCOME'] = pred
    print(game_outcomes)

#### Game 7

In [None]:
game_seven = pd.read_csv('playoffs_round_three_seven.csv')
game_seven['GAME_DATE'] = pd.to_datetime(game_seven['GAME_DATE'])

df_model= pd.concat([df_model, game_seven])
test_set = game_seven['GAME_DATE'].unique()

for t in test_set :
    pred, act = pred_by_date(df_model, final_model, t)

    game_outcomes = df_model[df_model['GAME_DATE'] == t][['TEAM_ID_ONE', 'TEAM_ID_TWO']]
    game_outcomes['OUTCOME'] = pred
    print(game_outcomes)

### Round 4 : Finals

#### Game 1

In [None]:
game_one = pd.read_csv('playoffs_round_four_one.csv')
game_one['GAME_DATE'] = pd.to_datetime(game_one['GAME_DATE'])

df_model= pd.concat([df_model, game_one])
test_set = game_one['GAME_DATE'].unique()

for t in test_set :
    pred, act = pred_by_date(df_model, final_model, t)

    game_outcomes = df_model[df_model['GAME_DATE'] == t][['TEAM_ID_ONE', 'TEAM_ID_TWO']]
    game_outcomes['OUTCOME'] = pred
    print(game_outcomes)

#### Game 2

In [None]:
game_two = pd.read_csv('playoffs_round_four_two.csv')
game_two['GAME_DATE'] = pd.to_datetime(game_two['GAME_DATE'])

df_model= pd.concat([df_model, game_two])
test_set = game_two['GAME_DATE'].unique()

for t in test_set :
    pred, act = pred_by_date(df_model, final_model, t)

    game_outcomes = df_model[df_model['GAME_DATE'] == t][['TEAM_ID_ONE', 'TEAM_ID_TWO']]
    game_outcomes['OUTCOME'] = pred
    print(game_outcomes)

#### Game 3

In [None]:
game_three= pd.read_csv('playoffs_round_four_three.csv')
game_three['GAME_DATE'] = pd.to_datetime(game_three['GAME_DATE'])

df_model= pd.concat([df_model, game_three])
test_set = game_three['GAME_DATE'].unique()

for t in test_set :
    pred, act = pred_by_date(df_model, final_model, t)

    game_outcomes = df_model[df_model['GAME_DATE'] == t][['TEAM_ID_ONE', 'TEAM_ID_TWO']]
    game_outcomes['OUTCOME'] = pred
    print(game_outcomes)

#### Game 4

In [None]:
game_four= pd.read_csv('playoffs_round_four_four.csv')
game_four['GAME_DATE'] = pd.to_datetime(game_four['GAME_DATE'])

df_model= pd.concat([df_model, game_four])
test_set = game_four['GAME_DATE'].unique()

for t in test_set :
    pred, act = pred_by_date(df_model, final_model, t)

    game_outcomes = df_model[df_model['GAME_DATE'] == t][['TEAM_ID_ONE', 'TEAM_ID_TWO']]
    game_outcomes['OUTCOME'] = pred
    print(game_outcomes)

#### Game 5

In [None]:
game_five = pd.read_csv('playoffs_round_four_five.csv')
game_five['GAME_DATE'] = pd.to_datetime(game_five['GAME_DATE'])

df_model= pd.concat([df_model, game_five])
test_set = game_five['GAME_DATE'].unique()

for t in test_set :
    pred, act = pred_by_date(df_model, final_model, t)

    game_outcomes = df_model[df_model['GAME_DATE'] == t][['TEAM_ID_ONE', 'TEAM_ID_TWO']]
    game_outcomes['OUTCOME'] = pred
    print(game_outcomes)

#### Game 6

In [None]:
game_six = pd.read_csv('playoffs_round_four_six.csv')
game_six['GAME_DATE'] = pd.to_datetime(game_six['GAME_DATE'])

df_model= pd.concat([df_model, game_six])
test_set = game_six['GAME_DATE'].unique()

for t in test_set :
    pred, act = pred_by_date(df_model, final_model, t)

    game_outcomes = df_model[df_model['GAME_DATE'] == t][['TEAM_ID_ONE', 'TEAM_ID_TWO']]
    game_outcomes['OUTCOME'] = pred
    print(game_outcomes)

#### Game 7

In [None]:
game_seven = pd.read_csv('playoffs_round_four_seven.csv')
game_seven['GAME_DATE'] = pd.to_datetime(game_seven['GAME_DATE'])

df_model= pd.concat([df_model, game_seven])
test_set = game_seven['GAME_DATE'].unique()

for t in test_set :
    pred, act = pred_by_date(df_model, final_model, t)

    game_outcomes = df_model[df_model['GAME_DATE'] == t][['TEAM_ID_ONE', 'TEAM_ID_TWO']]
    game_outcomes['OUTCOME'] = pred
    print(game_outcomes)