In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C, Matern, WhiteKernel

from sklearn.decomposition import PCA

from scipy.stats import zscore

np.random.seed(1)

combo = pd.read_csv('2016-2017_combo.csv')

df = pd.read_excel('cts.xlsx', sheet_name='2016-2017')
to_keep = ['Team Name', 'Adj Off Efficiency', 'FG%', 'Avg. Scoring margin',
           'Rebounds', 'Wins Last 10 Games ', 'Turnovers per game ']
df = df[to_keep]
numeric_cols = df.select_dtypes(include=[np.number]).columns
df[numeric_cols] = df[numeric_cols].apply(zscore)

In [2]:
Xs = []
ys = []

for idx, row in combo.iterrows():
    winner = row['Winner']
    loser = row['Loser']

    w_v = np.array(df[df['Team Name'] == winner])[0][1:]
    l_v = np.array(df[df['Team Name'] == loser])[0][1:]

    Xs.append(np.array([w_v, l_v]).flatten())
    ys.append(np.array([row['Winner Points'] - row['Loser points']]))

X = np.array(Xs)
y = np.array(ys)

coval = pd.read_csv('2015-2016_combo.csv')
val = pd.read_excel('cts.xlsx', sheet_name='2015-2016')
val = val[to_keep]
numeric_cols = val.select_dtypes(include=[np.number]).columns
val[numeric_cols] = val[numeric_cols].apply(zscore)

Xvs = []
yvs = []

for idx, row in coval.iterrows():
    winner = row['Winner']
    loser = row['Loser']
    
    try:
        w_v = np.array(val[val['Team Name'] == winner])[0][1:]
        l_v = np.array(val[val['Team Name'] == loser])[0][1:]
        Xvs.append(np.array([w_v, l_v]).flatten())
        yvs.append(np.array([row['Winner Points'] - row['Loser points']]))
    except:
        print(winner)
        print(loser)

Xv = np.array(Xvs)
yv = np.array(yvs)

Syracuse
MTSU
MTSU
Michigan State


In [3]:
rep = -1000
rbest_ = 0
obest_ = 0
wbest_ = 0

rbest = 57.89515789473683
obest = 26.31652631578947
wbest = 78.7878788090909

print('starting')
count=0
for rbf_val in np.linspace(rbest - 10, rbest + 10, 10):
    for outside in np.linspace(obest - 10, obest + 10, 10):
        for wkb in np.linspace(wbest - 10, wbest + 10, 10):
            kernel = outside * RBF(rbf_val) + WhiteKernel(wkb)    
            gp = GaussianProcessRegressor(kernel,
                                          n_restarts_optimizer=10,
                                          normalize_y=True)
            gp.fit(X, y)
            score = gp.score(Xv, yv)
            if score > rep:
                rbest_ = rbf_val
                obest_ = outside
                wbest_ = wkb
                rep = score
            
            count += 1
            if count % int(10*10*10/100) == 0:
                print(f'{count / (10*10*10)}')
                
print('GP Validation R^2: ' + str(rep))
print(f'Best RBF Length Scale: {rbest_}')
print(f'Best RBF Kernel Scale: {obest_}')
print(f'Best White Noise: {wbest_}')

starting
0.01
0.02
0.03
0.04
0.05
0.06
0.07
0.08
0.09
0.1
0.11
0.12
0.13
0.14
0.15
0.16
0.17
0.18
0.19
0.2
0.21
0.22
0.23
0.24
0.25
0.26
0.27
0.28
0.29
0.3
0.31
0.32
0.33
0.34
0.35
0.36
0.37
0.38
0.39
0.4
0.41
0.42
0.43
0.44
0.45
0.46
0.47
0.48
0.49
0.5
0.51
0.52
0.53
0.54
0.55
0.56
0.57
0.58
0.59
0.6
0.61
0.62
0.63
0.64
0.65
0.66
0.67
0.68
0.69
0.7
0.71
0.72
0.73
0.74
0.75
0.76
0.77
0.78
0.79
0.8
0.81
0.82
0.83
0.84
0.85
0.86
0.87
0.88
0.89
0.9
0.91
0.92
0.93
0.94
0.95
0.96
0.97
0.98
0.99
1.0
GP Validation R^2: 0.13480468143867996
Best RBF Length Scale: 61.22849122807016
Best RBF Kernel Scale: 18.538748538011692
Best White Noise: 88.7878788090909


In [7]:
print('GP Validation R^2: ' + str(rep))
print(f'Best RBF Length Scale: {rbest_}')
print(f'Best RBF Kernel Scale: {obest_}')
print(f'Best White Noise: {wbest_}')

GP Validation R^2: 0.12420151957757786
Best RBF Length Scale: 61.22849122807016
Best RBF Kernel Scale: 18.538748538011692
Best White Noise: 88.7878788090909


In [8]:
rep = -1000
wbest = 0
for wkv in np.linspace(0.0000001, 100, 100):
    kernel = obest * RBF(rbest) + WhiteKernel(wkv)
    
    gp = GaussianProcessRegressor(kernel,
                                  n_restarts_optimizer=10,
                                  normalize_y=True)
    gp.fit(X, y)
    score = gp.score(Xv, yv)
    if score > rep:
        wbest = wkv
        rep = score
        
print('GP Validation R^2: ' + str(rep))
print(f'Best White Noise Parameter: {wbest}')

GP Validation R^2: 0.12420673550100036
Best White Noise Parameter: 34.34343440909091


In [6]:
kernel = obest * RBF(rbest) + WhiteKernel(wbest)
gp = GaussianProcessRegressor(kernel,
                                  n_restarts_optimizer=10,
                                  normalize_y=True)
gp.fit(X, y)
rep = gp.score(Xv, yv)

print('GP Validation R^2: ' + str(rep))

GP Validation R^2: 0.12420151957757786
