# Purpose
This notebook creates a baseline model as a point of reference for future model iterations. This model predicts the total number of significant strikes landed given two fighters average significant strikes landed in all of their previous bouts.

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import PoissonRegressor
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score

### Import and split

In [2]:
data = pd.read_csv('../../data/modelling_data/model_7_data.csv', index_col=0)
data

Unnamed: 0,ca_sig_str_a_p1m_0,3fa_sig_str_a_p1m_0,ca_td_s_p15m_di_0,3fa_td_s_p15m_di_0,ca_sig_str_s_p1m_di_0,3fa_sig_str_s_p1m_di_0,ca_sig_str_a_p1m_di_0,3fa_sig_str_a_p1m_di_0,ca_ground_s_p15m_di_0,3fa_ground_s_p15m_di_0,...,3fa_ground_a_p15m_di_1,ca_clinch_s_p15m_di_1,3fa_clinch_s_p15m_di_1,ca_clinch_a_p15m_di_1,3fa_clinch_a_p15m_di_1,ca_distance_a_p1m_di_1,3fa_distance_a_p1m_di_1,ca_distance_s_p1m_di_1,3fa_distance_s_p1m_di_1,c_sig_str_a_p1m
0,5.455385,5.455385,-5.681781,-5.681781,0.104453,0.104453,0.482591,0.482591,12.923077,12.923077,...,18.529221,4.595190,4.595190,0.080038,0.080038,0.366525,0.366525,1.010532,1.010532,8.466667
1,5.235224,6.021748,1.500000,0.857143,1.599104,1.598721,2.388358,2.383369,13.204478,6.863539,...,0.000000,-5.000000,-5.000000,-3.000000,-3.000000,-5.733333,-5.733333,-2.533333,-2.533333,5.962500
2,8.788065,10.881108,-0.433461,-0.600000,-0.916931,0.864547,-1.198873,3.379509,-1.755793,-0.015679,...,24.517986,3.825798,-15.292909,-7.405384,-35.435766,-3.095198,-2.870709,-1.265419,-1.430010,7.114625
3,3.200000,3.200000,3.000000,3.000000,0.466667,0.466667,-1.000000,-1.000000,3.000000,3.000000,...,1.607143,2.628510,-1.007143,3.107143,-0.407143,1.837749,1.710000,0.249837,-0.012857,27.073171
4,11.630700,8.208434,-0.307581,1.034432,4.098703,3.312711,7.565028,4.651228,17.180382,18.907950,...,19.146089,-10.989634,-12.296703,-9.656301,-10.582418,-0.024032,0.111959,-0.184414,-0.179961,12.640000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3311,6.486257,6.444444,1.429825,-0.333333,0.573392,0.955556,1.119006,0.755556,5.789474,10.000000,...,0.080882,5.199375,6.257353,6.996808,6.257353,-0.600820,0.109339,-0.009273,0.565661,13.923810
3312,6.067757,5.893269,0.779086,-1.899888,0.975783,0.653265,0.286456,0.479861,6.327674,-0.880148,...,-1.800000,5.219740,4.571323,7.296663,5.771323,4.438256,2.263102,2.433883,0.651732,9.400000
3313,9.590387,5.066667,3.018444,2.666667,0.320330,1.488889,4.057003,2.555556,6.073953,18.666667,...,23.895653,-2.625000,-3.000000,-3.750000,-4.285714,0.427614,0.759949,-0.747097,-0.419829,15.733333
3314,5.866690,4.377778,3.846692,5.333333,-0.431887,-0.711111,-0.736226,-1.400000,1.829027,4.666667,...,2.678571,1.852669,0.540941,1.276528,1.540941,1.106705,5.038618,0.508914,2.598084,22.165605


In [3]:
X = data.drop('c_sig_str_a_p1m', axis=1)
y = data.c_sig_str_a_p1m

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y,random_state=1)

### Preprocessing

In [5]:
ss = StandardScaler()
X_train_ss = ss.fit_transform(X_train)
X_test_ss = ss.transform(X_test)

### Modelling

In [6]:
pr = PoissonRegressor()
cross_val_score(pr, X_train_ss, y_train, scoring='r2')

array([0.1466439 , 0.12999798, 0.17210733, 0.15958932, 0.16540214])

In [7]:
pr.fit(X_train_ss, y_train)

PoissonRegressor()

### Evaluation

In [8]:
y_hat = pr.predict(X_test_ss)

In [9]:
r2_score(y_test, y_hat)

0.09150809685912342

In [10]:
pd.DataFrame({'model_predictions': list(pr.predict(X_test_ss)[:20]), 'actual_results': list(y_test[:20])})

Unnamed: 0,model_predictions,actual_results
0,15.725412,2.8
1,17.013061,20.8
2,15.178572,18.333333
3,16.529657,23.376623
4,17.928771,15.066667
5,12.5739,11.2
6,19.412804,16.133333
7,17.096499,18.066667
8,15.767686,21.117318
9,21.127688,18.0
