## Linear Regression

In [48]:
# import required package for data handling
import pandas as pd
import numpy as np

# import required packages for splitting data
from sklearn import model_selection
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split

# import required packages for evaluating models
from sklearn import metrics
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_recall_fscore_support

# Importing Linear Rgression Model 
from sklearn.linear_model import LinearRegression

In [49]:
# Loading in the data set
df =pd.read_csv('../csv_files/Model_Ready_Regression.csv', header=0, index_col=0)

In [50]:
df.head()

Unnamed: 0,H_FTPct,H_EFGPct,H_ThreePARt,H_FTR,H_REBPct,H_BLKPct,H_AST_TOV_Ratio,A_FTPct,A_EFGPct,A_ThreePARt,A_FTR,A_REBPct,A_BLKPct,A_AST_TOV_Ratio,Target
0,0.833,0.461538,0.395604,0.32967,0.506173,0.036585,1.785714,0.952,0.628049,0.463415,0.256098,0.493827,0.10989,2.142857,-14
1,0.885,0.430851,0.404255,0.276596,0.538462,0.02439,1.133333,0.87,0.542683,0.414634,0.280488,0.461538,0.053191,2.266667,-5
2,0.844,0.590909,0.428571,0.415584,0.486486,0.076923,2.5,0.935,0.512821,0.397436,0.397436,0.513514,0.025974,1.8125,9
3,0.727,0.567568,0.364865,0.297297,0.475,0.053333,2.0,0.71,0.48,0.453333,0.413333,0.525,0.094595,1.727273,6
4,0.722,0.511905,0.369048,0.214286,0.540816,0.02381,2.363636,0.806,0.47619,0.511905,0.369048,0.459184,0.047619,3.166667,-6


In [51]:
df.columns

Index(['H_FTPct', 'H_EFGPct', 'H_ThreePARt', 'H_FTR', 'H_REBPct', 'H_BLKPct',
       'H_AST_TOV_Ratio', 'A_FTPct', 'A_EFGPct', 'A_ThreePARt', 'A_FTR',
       'A_REBPct', 'A_BLKPct', 'A_AST_TOV_Ratio', 'Target'],
      dtype='object')

In [52]:
# Assign feature columns as list:
feature_cols = ['H_FTPct', 'H_EFGPct', 'H_ThreePARt', 'H_FTR', 'H_REBPct', 
               'H_BLKPct', 'H_AST_TOVRatio', 'A_FTPct', 'A_EFGPct', 'A_ThreePARt', 
                'A_FTR', 'A_REBPct', 'A_BLKPct', 'A_AST_TOVRatio' ]

In [53]:
# Splitting up our data into variable and target data
X = df.iloc[:, :-1] # Variable
Y = df.Target # Target

In [54]:
# Split dataset into training set and test set
# 70% training and 30% test
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=2019)

In [55]:
# Instantiate
linreg = LinearRegression()

# Fitting the model 
linreg.fit(X_train, Y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [56]:
# Printing the y-intercept
print(linreg.intercept_)

# Prints the beta coefficients in same order as passed
print(linreg.coef_)

# Zip can pair feature names and coefficients together
zip(feature_cols, linreg.coef_)

1.4177326068043565
[ 13.21669161  96.38006854   3.8243976   14.00154046  42.12513201
  -5.74311116   5.01798378 -13.98932507 -97.06000754  -3.68054951
 -13.56227531 -42.12513201   8.84709344  -5.37619548]


<zip at 0x7f39a87c4b08>

In [57]:
# Making a predictive model
Y_pred = linreg.predict(X_test)

In [58]:
# Print result of MAE
print(metrics.mean_absolute_error(Y_test, Y_pred))

# Print result of MSE
print(metrics.mean_squared_error(Y_test, Y_pred))

# Print result of RMSE 
print(np.sqrt(metrics.mean_squared_error(Y_test, Y_pred)))

3.8461474398193016
25.305343418017326
5.0304416722607295
