# FanDuel Analysis

This notebook is used to predict whether a player will score points on FanDuel for a game. This is a more simple version of predicting how many total points they may get on a day. This will help in creating FanDuel lineups, by knowing which players are more likely to score points for the upcoming game.

### Step 1 

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score

In [None]:
df = pd.read_csv('../data/all_batters_with_extra_feats.csv')

In [None]:
df.head()

### Select Features & Standardize Data 

In [None]:
def get_X_and_y(df):
    feats = ['hit_streak', 'prev_points', 'points_ma', 
             'above_avg_points', 'above_avg_streak']
    X = df[feats]
    df['positive_points'] = df['DFS(FD)'] > 0
    y = df['positive_points']
    return X, y

In [None]:
X, y = get_X_and_y(df)

In [None]:
hhr = df[df['high_home_runs'] == 1].copy()
hhr_X, hhr_y = get_X_and_y(hhr)

mhr = df[df['med_home_runs'] == 1].copy()
mhr_X, mhr_y = get_X_and_y(mhr)

lhr = df[df['low_home_runs'] == 1].copy()
lhr_X, lhr_y = get_X_and_y(lhr)

### Step 2 

In [None]:
# what is the baseline accuracy (if always guessed positive how many what would it be)
df['positive_points'].sum()/df.shape[0]

In [None]:
def run_classify_model(df, X, y):
    results = {'baseline': round(df['positive_points'].sum()/df.shape[0],3)*100}
    models = ['LOG', 'TREE', 'KNN', 'ADA', 'NN']
    for model in models:
        if model == 'LOG':
            clf = LogisticRegression(solver='saga', max_iter=10000)
        elif model == 'TREE':
            clf = DecisionTreeClassifier(max_depth=10)
        elif model == 'KNN':
            clf = KNeighborsClassifier()
        elif model == 'ADA':
            clf = AdaBoostClassifier()
        elif model == 'NN':
            clf = MLPClassifier(random_state=1, max_iter=300)
        X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
        scaler = StandardScaler().fit(X_train)
        X_train_scaled = scaler.transform(X_train)
        X_test_scaled = scaler.transform(X_test)
        clf = clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test_scaled)
        acc = accuracy_score(y_pred, y_test)
        results[model] = round(acc, 3) * 100
    return results

In [None]:
run_classify_model(df, X, y)

In [None]:
run_classify_model(hhr,hhr_X, hhr_y)

In [None]:
run_classify_model(mhr,mhr_X, mhr_y)

In [None]:
run_classify_model(lhr,lhr_X, lhr_y)