In [None]:
#==========
# Dictionary for position relevant metrics
#==========

# initialize full position dictionary
pos = {}

#---------
# RB dictionary
#---------
 
# initilize RB dictionary
pos['RB'] = {}

# total touch filter name
pos['RB']['touch_filter'] = 'total_touches'

# median feature categories
pos['RB']['med_features'] = ['fp', 'tgt', 'receptions', 'total_touches', 'rush_yds', 'rec_yds', 
                   'rush_yd_per_game', 'rec_yd_per_game', 'rush_td', 'games_started', 
                   'qb_rating', 'qb_yds', 'pass_off', 'tm_rush_td', 'tm_rush_yds', 
                   'tm_rush_att', 'adjust_line_yds', 'ms_rush_yd', 'ms_rec_yd', 'ms_rush_td',
                   'avg_pick', 'fp_per_touch', 'team_rush_avg_att']

# sum feature categories
pos['RB']['sum_features'] = ['total_touches', 'att', 'scrimmage_yds']

# max feature categories
pos['RB']['max_features'] = ['fp', 'rush_td', 'tgt', 'rush_yds', 'rec_yds', 'scrimmage_yds']

# age feature categories
pos['RB']['age_features'] = ['fp', 'rush_yd_per_game', 'rec_yd_per_game', 'total_touches', 'receptions', 'tgt',
                             'ms_rush_yd', 'ms_rec_yd', 'available_rush_att', 'available_tgt', 'total_touches_sum',
                             'scrimmage_yds_sum', 'avg_pick', 'fp_per_touch', 'ms_rush_yd_per_att', 'ms_tgts']




# User Inputs

In [None]:
# set core path
path = '/Users/Mark/Documents/Github/Fantasy_Football/'

# set to position to analyze: 'RB', 'WR', 'QB', or 'TE'
set_position = 'RB'

# set year to analyze
set_year = 2018

# set required touches (or pass thrown) and games for consideration
req_games = 8
req_touch = 50

# settings for fantasy points
yard_pts = 0.1
td_pts = 7
rec_pt = .5
fmb_pts = -2.0
int_pts = -2
sack_pts = -1

# Load Libraries

In [None]:
# core packages
import pandas as pd
import numpy as np
import os

# jupyter specifications
pd.options.mode.chained_assignment = None
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

# plotting functions
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# change directory temporarily to helper scripts
os.chdir(path + 'Scripts/Analysis/Helper_Scripts')

# load custom plot functions
from my_plot import PrettyPlot
PrettyPlot(plt)

# load custom helper functions
from helper_functions import *;

# Merge and Clean Data Files

In [None]:
df = pd.read_csv(path + 'Data/' + str(set_year) + '/' + set_position + '_Input.csv').iloc[:, 1:]

In [None]:
old = df[(df[pos['RB']['touch_filter']] > req_touch) & (df.games > req_games) & (df.year < set_year-1)].reset_index(drop=True)
this_year = df[df.year==set_year-1]

df = pd.concat([old, this_year], axis=0)

# Create FP and Target Columns

In [None]:
rb['fp'] = params[0]*rb['rush_yds'] + params[0]*rb['rec_yds'] + params[1]*rb['rush_td'] + \
           params[1]*rb['rec_td'] + params[2]*rb['receptions'] + params[3]*rb['fmb']
rb['fp_per_game'] = rb['fp'] / rb['games']
rb['qb_fp'] = .04*rb['qb_yds'] + 4*rb['qb_tds'] - 2*rb['int'] - 1*rb['qb_sacks']

rb['fp_per_touch'] = rb['fp'] / rb['total_touches']
rb['yd_per_tgt'] = rb['rec_yds'] / rb['tgt']

In [None]:
df_train, df_predict = features_target(rb, 2004, 2017, median_features, sum_features,
                                       max_features, age_features, 'fp_per_game')

In [None]:
df_train = df_train.dropna(subset=['y_act']).reset_index(drop=True)

In [None]:
df_train = df_train.fillna(df_train.mean())
#df_train = df_train.dropna()
df_predict = df_predict.dropna().reset_index(drop=True)

df_train.shape

# Visualization of Feature Correlations

# Running Models

In [None]:
corr_cutoff = 0.05
corr = df_train.corr()['y_act']
good_cols = list(corr[abs(corr) > corr_cutoff].index)

good_cols.extend(['player', 'year'])
df_train = df_train[good_cols]

good_cols.remove('y_act')
df_predict = df_predict[good_cols]

In [None]:
#=============
# Create parameter dictionaries for each algorithm
#=============

lgbm_params = {
    'n_estimators':[30, 40, 50, 60, 75],
    'max_depth':[2, 3, 4, 5, 6, 7],
    'freature_fraction':[0.5, 0.6, 0.7, 0.8, 0.9, 1],
    'subsample': [0.5, 0.6, 0.7, 0.8, 0.9, 1],
    'min_child_weight': [5, 10, 15, 20, 25],
}

xgb_params = {
    'n_estimators': [30, 40, 50, 60, 75], 
    'max_depth': [2, 3, 4, 5, 6, 7], 
    'subsample': [0.5, 0.6, 0.7, 0.8, 0.9, 1],
    'min_child_weight': [10, 15, 20, 25, 30],
    'freature_fraction':[0.5, 0.6, 0.7, 0.8, 0.9, 1]
}

knn_params = {
    'n_neighbors': [3, 4, 5, 6, 7],
    'weights': ['uniform', 'distance'],
    'algorithm': ['kd_tree', 'ball_tree']
}

lasso_params = {
    'alpha': [0.1, 0.2, 0.3, 0.4, 0.5, 0.75, 1, 1.5]
}

In [None]:
# generate a master dictionary of parameters (must match the)
param_list = [lgbm_params, xgb_params, lasso_params]
est_names = ['lgbm', 'xgb', 'lasso']

params = {}
for i, param in enumerate(param_list):
    params[est_names[i]] = param
    
param_results, summary, df_train_results, errors = validation(est_names, params, df_train, iterations=200, scale=True, random_state=1234)

In [None]:
summary.head(10)

In [None]:
best_result = 8
df_test_results, models = generate_predictions(best_result, param_results, summary, df_train, df_predict)

In [None]:
from sklearn.preprocessing import StandardScaler
models[2].predict(StandardScaler().fit_transform(df_predict.select_dtypes(include=['float64', 'uint8', 'int64'])))

In [None]:
plot_results(models[0].feature_importances_, df_predict.select_dtypes(include=['float64', 'int64', 'uint8']).columns)

# Clustering Players into Tiers

In [None]:
model_features = [models[0],models[1],models[2]]

cluster = clustering(df_train_results, df_test_results, model_features, pred_weight=4)
cluster.explore_k(k=15)

In [None]:
cluster.fit_and_predict(k=8)
c_train, c_test = cluster.add_clusters()

# Tier 1

In [None]:
cluster.show_results(j=3)

## Tier 2

In [None]:
cluster.show_results(j=1)

In [None]:
rb_sampling = cluster.create_distributions(dataset='train', wt=2.5)

In [None]:
cluster_predictions = pd.merge(rb_sampling[['player', 'year', 'pred']], df_train[['player', 'year', 'y_act']], 
                               how='inner', left_on=['player', 'year'], right_on=['player', 'year'])

In [None]:
XX = cluster_predictions.pred
yy = cluster_predictions.y_act

pearsonr(XX, yy)

from sklearn.linear_model import LinearRegression

lr = LinearRegression()

lr.fit(XX.values.reshape(-1,1), yy)
lr.score(XX.values.reshape(-1,1), yy)

# Example Distribution (Assuming 16 Games)

In [None]:
view_projections(rb_sampling, 'Dalvin Cook')

In [None]:
view_projections(rb_sampling, 'Alvin Kamara')

In [None]:
view_projections(rb_sampling, 'Joe Mixon')

In [None]:
view_projections(rb_sampling, "Le'Veon Bell")

In [None]:
view_projections(rb_sampling, "Melvin Gordon")

In [None]:
rb_sampling.to_csv('/Users/Mark/Desktop/Jupyter Projects/Fantasy Football/Projections/rb_sampling.csv')