In [195]:
import pandas as pd
import numpy as np
from pathlib import Path
from sqlalchemy import create_engine
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import confusion_matrix
from imblearn.metrics import classification_report_imbalanced
from sklearn.metrics import explained_variance_score
from sklearn.metrics import r2_score
from sklearn.ensemble import RandomForestClassifier

In [165]:
# Load the data
file_path = Path('team_cautious_waffle-JTbranch/nfl_2020_player_stats.csv')
nfl_ml_df = pd.read_csv(file_path)
nfl_ml_df.set_index("name", inplace=True)

In [196]:
from config import db_password

In [230]:
# Create the connection to the PostgreSQL database
db_string = f"postgresql://postgres:{db_password}@127.0.0.1:5432/DFS"
engine = create_engine(db_string)
# movies_df.to_sql(name='movies', con=engine, if_exists='replace')
complete_nfl_df = pd.read_sql_table('nfl2020final', con=engine, index_col='name')

In [234]:
nfl_ml_df = complete_nfl_df.drop(['team'], axis=1)

In [235]:
nfl_ml_df['quarterback_rating'] = nfl_ml_df['quarterback_rating'].astype('int64')
nfl_ml_df['fantasy_points'] = nfl_ml_df['fantasy_points'].astype('int64')

In [236]:
nfl_ml_df = pd.get_dummies(nfl_ml_df)

In [237]:
nfl_ml_df['position_QB'] = nfl_ml_df['position_QB'].astype('int64')
nfl_ml_df['position_RB'] = nfl_ml_df['position_RB'].astype('int64')
nfl_ml_df['position_TE'] = nfl_ml_df['position_TE'].astype('int64')
nfl_ml_df['position_WR'] = nfl_ml_df['position_WR'].astype('int64')
nfl_ml_df.dtypes

completed_passes                   int64
attempted_passes                   int64
passing_yards                      int64
passing_touchdowns                 int64
interceptions_thrown               int64
times_sacked                       int64
yards_lost_from_sacks              int64
longest_pass                       int64
quarterback_rating                 int64
rush_attempts                      int64
rush_yards                         int64
rush_touchdowns                    int64
longest_rush                       int64
times_pass_target                  int64
receptions                         int64
receiving_yards                    int64
receiving_touchdowns               int64
longest_reception                  int64
fumbles                            int64
fumbles_lost                       int64
fumbles_recovered_for_touchdown    int64
kickoff_return_touchdown           int64
punt_return_touchdown              int64
fantasy_points                     int64
position_QB     

## Split data into features and target

In [238]:
# Create our features
X = nfl_ml_df.copy() 
X = X.drop(['fantasy_points'], axis=1)

# Create our target
y = nfl_ml_df['fantasy_points']

X.head()

Unnamed: 0_level_0,completed_passes,attempted_passes,passing_yards,passing_touchdowns,interceptions_thrown,times_sacked,yards_lost_from_sacks,longest_pass,quarterback_rating,rush_attempts,...,longest_reception,fumbles,fumbles_lost,fumbles_recovered_for_touchdown,kickoff_return_touchdown,punt_return_touchdown,position_QB,position_RB,position_TE,position_WR
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Alvin Kamara,0,0,0,0,0,0,0,0,0,11,...,11,0,0,0,0,0,0,1,0,0
Alvin Kamara,0,0,0,0,0,0,0,0,0,15,...,8,0,0,0,0,0,0,1,0,0
Alvin Kamara,0,0,0,0,0,0,0,0,0,11,...,-2,0,0,0,0,0,0,1,0,0
Alvin Kamara,0,0,0,0,0,0,0,0,0,12,...,47,1,0,0,0,0,0,1,0,0
Alvin Kamara,0,0,0,0,0,0,0,0,0,19,...,29,0,0,0,0,0,0,1,0,0


In [239]:
# Check the balance of our target values
y.value_counts()

 0      365
 4      254
 3      249
 2      246
 6      228
       ... 
-23       1
-19       1
-11       1
 56       1
-113      1
Name: fantasy_points, Length: 70, dtype: int64

In [240]:
# Split into Train and Test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78, train_size=0.84)

# Determine the shape of our training and testing sets.
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(3600, 27)
(686, 27)
(3600,)
(686,)


## Random Forest Classifier to determine feature importance

In [241]:
# Creating a StandardScaler instance
scaler = StandardScaler()

# Fitting the Standard Scaler with the training data
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [242]:
# Create a random forest classifier.
rf_model = RandomForestClassifier(n_estimators=128, random_state=78) 

In [243]:
# Fitting the model
rf_model = rf_model.fit(X_train_scaled, y_train)

In [244]:
# Making predictions using the testing data.
predictions = rf_model.predict(X_test_scaled)
predictions

array([ 3, 14,  4, 14, 35,  8,  8, 29,  5, 11, 13,  8,  3,  6,  9, 19,  0,
       10, 17,  4,  6, 14,  3,  1, 14, 12,  6,  5, 12,  0,  0,  7,  8,  0,
       24, 10,  4,  1, 12,  0, 23,  2, 31,  4, 16,  9, 10,  3,  2, 10,  0,
        5, 21, 38,  0,  0,  3,  2, 14, 11,  8,  6,  8, 15, 18, 16,  5, 17,
       16,  5,  4,  0, 29, 10,  2,  7,  3,  6,  3, 22,  5,  0,  1, 26,  1,
        1, 11,  0,  6,  7,  3,  9,  0,  6,  4,  7,  0, 12,  2,  5, 10, 13,
        7,  2, 17,  6,  0,  2, 30,  8,  2, 17, 12, 12, 19, 17,  7, 10,  2,
       20,  8, 22, 25, 14, 10, 23,  0,  8,  3,  2,  7,  0,  9,  5, 17,  2,
        2, 29, 12,  8, 12,  5, 13, 15,  5, 23, 23,  2,  0,  0,  0,  3, 24,
        9,  0,  1,  8,  0, 16,  7, 12,  8, 16,  3,  3, 11,  5,  0, 26,  4,
        3, 14,  9,  1,  0,  0, 25, 10,  1, 14,  2,  9, 15,  1, 15,  5,  8,
       13, 14,  3, 30, 10,  2, 11, 31, 17,  3,  1,  1,  7, 19, 16,  6, 18,
       22, 14, 24,  2,  1, 14, 12, 12,  8,  1,  0, 22, 12,  7,  3,  6, 22,
        4, 10, 12,  3,  1

In [245]:
# Calculate feature importance in the Random Forest model.
importances = rf_model.feature_importances_
importances

array([0.01622004, 0.01584347, 0.02056983, 0.00999011, 0.00509934,
       0.00738047, 0.01052101, 0.0152823 , 0.01590357, 0.05913319,
       0.10257377, 0.02259535, 0.06817016, 0.06941751, 0.11995226,
       0.20469824, 0.04636061, 0.13040646, 0.01494258, 0.01121587,
       0.00293395, 0.00132071, 0.0020571 , 0.00137226, 0.00783954,
       0.0069557 , 0.0112446 ])

In [246]:
# Features sorted by their importance
sorted(zip(rf_model.feature_importances_, X.columns), reverse=True)

[(0.2046982362142305, 'receiving_yards'),
 (0.13040645747989937, 'longest_reception'),
 (0.11995226254175373, 'receptions'),
 (0.10257377107414953, 'rush_yards'),
 (0.06941750796884143, 'times_pass_target'),
 (0.06817015821796776, 'longest_rush'),
 (0.0591331908262976, 'rush_attempts'),
 (0.04636061368128017, 'receiving_touchdowns'),
 (0.02259535330176367, 'rush_touchdowns'),
 (0.020569828006667745, 'passing_yards'),
 (0.01622003828680867, 'completed_passes'),
 (0.015903567514164482, 'quarterback_rating'),
 (0.015843474120409293, 'attempted_passes'),
 (0.015282302683960436, 'longest_pass'),
 (0.01494257548546067, 'fumbles'),
 (0.011244600453054244, 'position_WR'),
 (0.011215868800036182, 'fumbles_lost'),
 (0.01052101055779779, 'yards_lost_from_sacks'),
 (0.009990114599919142, 'passing_touchdowns'),
 (0.007839536059469415, 'position_RB'),
 (0.007380472123013414, 'times_sacked'),
 (0.006955697742615223, 'position_TE'),
 (0.0050993415110841865, 'interceptions_thrown'),
 (0.002933951378278

## Linear Regression Model

In [216]:
model = LinearRegression()
model.fit(X, y)

LinearRegression()

In [217]:
y_pred = model.predict(X_test)

In [218]:
explained_variance_score(y_test, y_pred)

0.9959988819924703

In [219]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.99599877282801

## Positional DFs

In [229]:
# Create our features
XX = nfl_ml_df.copy() 
if XX['position_QB'] == 1, xx.drop(['receiving_yards', 'receptions', 'longest_reception', 'times_pass_target', 'longest_rush', 'receiving_touchdowns', 'fumbles_recovered_for_touchdown', 'punt_return_touchdown', 'kickoff_return_touchdown', 'position_RB', 'position_WR', 'position_TE'], axis=1)

# Create our target
y = nfl_ml_df['fantasy_points']

QBs.head()

SyntaxError: invalid syntax (<ipython-input-229-d6481ec1e1d2>, line 3)

In [None]:
if XX['position_RB'] == 1, xx.drop(['passing_yards', 'completed_passes', 'quarterback_rating', 'attempted_passes', 'longest_pass', 'passing_touchdowns', 'yards_lost_from_sacks', 'times_sacked', 'interceptions_thrown', 'fumbles_recovered_for_touchdown', 'position_QB', 'position_WR', 'position_TE'], axis=1)

In [None]:
if XX['position_WR'] == 1, xx.drop(['passing_yards', 'completed_passes', 'quarterback_rating', 'attempted_passes', 'longest_pass', 'passing_touchdowns', 'yards_lost_from_sacks', 'times_sacked', 'interceptions_thrown', 'fumbles_recovered_for_touchdown', 'position_QB', 'position_RB', 'position_TE'], axis=1)

In [None]:
if XX['position_TE'] == 1, xx.drop(['passing_yards', 'completed_passes', 'quarterback_rating', 'attempted_passes', 'longest_pass', 'passing_touchdowns', 'yards_lost_from_sacks', 'times_sacked', 'interceptions_thrown', 'fumbles_recovered_for_touchdown', 'position_QB', 'position_WR', 'position_RB'], axis=1)