### Gaussian Naive Bayes Model Likelihood

In [16]:
import re
import numpy as np
import pandas as pd
#import cpl_main as cpl

In [17]:
year = '2019'

In [18]:
results = pd.read_csv(f'datasets/{year}/cpl-{year}-results.csv')
stats = pd.read_csv(f'datasets/{year}/cpl-{year}-stats.csv')
team_ref = pd.read_csv('datasets/teams.csv')

In [19]:
if year == '2019':
    team_ref = team_ref[1:]

In [20]:
results_brief = pd.read_csv(f'datasets/{year}/cpl-{year}-results_brief.csv')
schedule = pd.read_csv(f'datasets/{year}/cpl-{year}-schedule.csv')

In [21]:
#Import Gaussian Naive Bayes model
from sklearn.naive_bayes import GaussianNB, BernoulliNB, MultinomialNB
import statistics

In [22]:
# home side
q1 = schedule.iloc[2]['home']
# away side
q2 = schedule.iloc[2]['away']
print(q1,q2)

York9 FC HFX Wanderers FC


In [23]:
def get_team_comparison(data,q1,q2):
    # getting games with q1 in both home or away
    db = data[data['team'] == q1]
    db = db.reset_index()
    db.pop('index')
    # filering down more to get only the games against q2
    db = db.sort_values(by=['m','d'])
    db = db[(db['home'] == q2) | (db['away'] == q2)]
    db = db.reset_index()
    db.pop('index')
    if db.empty == True:
        db = pd.DataFrame([(0,0,0,0,q1,'D',q2,'D','empty',q1)],columns=['d','m','hs','as','home','hr','away','ar','summary','team'])
    return db

In [24]:
compare = get_team_comparison(results_brief,q1,q2)

In [25]:
compare.head()

Unnamed: 0,d,m,hs,as,home,hr,away,ar,summary,team
0,29,5,1,1,HFX Wanderers FC,D,York9 FC,D,D A 1 - 1 HFX,York9 FC
1,22,6,0,0,York9 FC,D,HFX Wanderers FC,D,D A 0 - 0 HFX,York9 FC
2,6,7,1,0,HFX Wanderers FC,W,York9 FC,L,L A 1 - 0 HFX,York9 FC
3,27,7,6,2,York9 FC,W,HFX Wanderers FC,L,W A 6 - 2 HFX,York9 FC
4,19,10,0,2,York9 FC,L,HFX Wanderers FC,W,L A 0 - 2 HFX,York9 FC


In [26]:
def get_match_tables(data,query):
    db = data[data['home'] == query]
    db = pd.concat([db,data[data['away'] == query]])
    db = db.sort_values(by=['m','d'])
    return db

def likelihood_input(array,a_list):
    b = a_list[0]
    c = a_list[1]
    d = a_list[2]
    array.append(b)
    array.append(c)
    array.append(d)
    return array

def likelihood_table(data,query):
    df = get_match_tables(data,query)
    array = []
    cols = data.columns
    for row in range(0,df.shape[0]):
        if df.iloc[row]['home'] == query:
            if df.iloc[row]['hr'] == 'W':
                array = likelihood_input(array,[[1,2,1],[1,0,0],[1,1,0]])
            if df.iloc[row]['hr'] == 'L':
                array = likelihood_input(array,[[1,2,0],[1,0,1],[1,1,0]])
            if df.iloc[row]['hr'] == 'D':
                array = likelihood_input(array,[[1,2,0],[1,0,0],[1,1,1]])
        if df.iloc[row]['away'] == query:
            if df.iloc[row]['ar'] == 'W':
                array = likelihood_input(array,[[2,2,1],[2,0,0],[2,1,0]])
            if df.iloc[row]['ar'] == 'L':
                array = likelihood_input(array,[[2,2,0],[2,0,1],[2,1,0]])
            if df.iloc[row]['ar'] == 'D':
                array = likelihood_input(array,[[2,2,1],[2,0,0],[2,1,1]])
    db= pd.DataFrame(array,columns=['h/a','w/l/d','y/n'])
    return db

In [27]:
def get_NB_data(data,query):
    db = likelihood_table(data,query)
    dy = db.pop('y/n').to_list()
    dx = [tuple(x) for x in db.values]
    return dx, dy

In [28]:
t1_x, t1_y = get_NB_data(compare,q1)
t2_x, t2_y = get_NB_data(compare,q2)

In [29]:
print(t1_x, t1_y)

[(2, 2), (2, 0), (2, 1), (1, 2), (1, 0), (1, 1), (2, 2), (2, 0), (2, 1), (1, 2), (1, 0), (1, 1), (1, 2), (1, 0), (1, 1)] [1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0]


In [30]:
def get_match_prediction_result(query,x,y,array):
    prediction = get_gnb_prediction(query,x,y,array)
    return prediction

def get_match_prediction(q1,q2,x1,y1,x2,y2):
    if (len(x1) == 0) or (len(x2) == 0):
        x = round(1/3,2)
        home_win, away_win,draw = x,x,x
        return home_win,away_win,draw
    home_win = get_match_prediction_result(q1,x1,y1,[1,2])
    draw = get_match_prediction_result(q1,x1,y1,[1,1])
    away_win = get_match_prediction_result(q2,x2,y2,[2,2])
    return home_win, draw, away_win

In [31]:
def get_gnb_prediction(query,x,y,result):

    gnb = GaussianNB()
    #bnb = BernoulliNB()
    # Train the model using the training sets

    gnb.fit(x,y)
    #bnb.fit(x,y)
    
    # use below instead of predicted = model.predict([result]) because we want the probability
    gnb_pred = np.round(gnb.predict_proba([result])[:, 1],decimals=2)
    #bnb_pred = np.round(bnb.predict_proba([result])[:, 1],decimals=2)

    #pred = round((gnb_pred[0] + bnb_pred[0]) / 2,2)
    pred = round(gnb_pred[0],2)
    #print(gnb_pred[0], bnb_pred[0], pred)

    return pred

In [50]:
def get_mnb_prediction(query,x,y,result):
    
    mnb = MultinomialNB()
    # Train the model using the training sets
    
    mnb.fit(x,y)
    
    # use below instead of predicted = model.predict([result]) because we want the probability
    mnb_pred = np.round(mnb.predict_proba([result])[:, 1],decimals=2)
    
    pred = round(mnb_pred[0],2)
    #print(gnb_pred[0], bnb_pred[0], pred)
    
    return pred

In [43]:
GaussianNB()

GaussianNB(priors=None, var_smoothing=1e-09)

In [44]:
MultinomialNB()

MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)

In [45]:
def get_bnb_prediction(query,x,y,result):
    bnb = BernoulliNB(alpha=0.25,
                      binarize=1.0,
                     fit_prior=False)
    # Train the model using the training sets
    bnb.fit(x,y)
    
    # use below instead of predicted = model.predict([result]) because we want the probability
    bnb_pred = np.round(bnb.predict_proba([result])[:, 1],decimals=2)
    
    pred = round(bnb_pred[0],2)
    
    return pred

In [46]:
def get_match_prediction_result(query,x,y,array,check):
    if check == 1:
        prediction = get_gnb_prediction(query,x,y,array)
    elif check == 2:
        prediction = get_bnb_prediction(query,x,y,array)
    else:
        prediction = get_mnb_prediction(query,x,y,array)
    return prediction

def get_match_prediction(q1,q2,x1,y1,x2,y2,check):
    if len(x1) == 0:
        x = round(1/3,2)
        home_win, away_win,draw = x,x,x
        return home_win,away_win,draw
    home_win = get_match_prediction_result(q1,x1,y1,[1,2],check)
    draw = get_match_prediction_result(q1,x1,y1,[1,1],check)
    away_win = get_match_prediction_result(q2,x2,y2,[2,2],check)
    return home_win, draw, away_win

In [47]:
home_win, draw, away_win = get_match_prediction(q1,q2,t1_x,t1_y,t2_x,t2_y,check=1)
print(f'{q1} home win probability: {round(home_win,3)} \n{q2} away win probability: {round(away_win,3)} \nDraw probability: {round(draw,3)}')

York9 FC home win probability: 0.33 
HFX Wanderers FC away win probability: 0.66 
Draw probability: 0.33


In [48]:
home_win, draw, away_win = get_match_prediction(q1,q2,t1_x,t1_y,t2_x,t2_y,check=2)
print(f'{q1} home win probability: {round(home_win,3)} \n{q2} away win probability: {round(away_win,3)} \nDraw probability: {round(draw,3)}')

York9 FC home win probability: 0.43 
HFX Wanderers FC away win probability: 0.71 
Draw probability: 0.43


In [49]:
home_win, draw, away_win = get_match_prediction(q1,q2,t1_x,t1_y,t2_x,t2_y,check=3)
print(f'{q1} home win probability: {round(home_win,3)} \n{q2} away win probability: {round(away_win,3)} \nDraw probability: {round(draw,3)}')

York9 FC home win probability: 0.48 
HFX Wanderers FC away win probability: 0.55 
Draw probability: 0.5


In [51]:
home_win, draw, away_win = get_match_prediction(q1,q2,t1_x,t1_y,t2_x,t2_y,check=3)
print(f'{q1} home win probability: {round(home_win,3)} \n{q2} away win probability: {round(away_win,3)} \nDraw probability: {round(draw,3)}')

York9 FC home win probability: 0.38 
HFX Wanderers FC away win probability: 0.44 
Draw probability: 0.4
