# User Inputs

In [None]:
# set core path
path = '/Users/Mark/Documents/Github/Fantasy_Football/'

# set to position to analyze: 'RB', 'WR', 'QB', or 'TE'
set_position = 'RB'

# set year to analyze
set_year = 2018
earliest_year = 2004

# set required touches (or pass thrown) and games for consideration
req_games = 8
req_touch = 50

# settings for fantasy points
pts = {}
pts['yd_pts'] = 0.1
pts['pass_yd_pts'] = 0.04
pts['td_pts'] = 7
pts['pass_td_pts'] = 5
pts['rec_pts'] = .5
pts['fmb_pts'] = -2.0
pts['int_pts'] = -2
pts['sack_pts'] = -1

# Load Libraries

In [None]:
# core packages
import pandas as pd
import numpy as np
import os

# jupyter specifications
pd.options.mode.chained_assignment = None
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

# plotting functions
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# change directory temporarily to helper scripts
os.chdir(path + 'Scripts/Analysis/Helper_Scripts')

# load custom plot functions
from my_plot import PrettyPlot
PrettyPlot(plt)

# load custom helper functions
from helper_functions import *;

# Merge and Clean Data Files

In [None]:
df = pd.read_csv(path + 'Data/' + str(set_year) + '/' + set_position + '_Input.csv').iloc[:, 1:]

df['td_per_game'] = (df.rush_td + df.rec_td) / df.games

In [None]:
# split old and new to filter past years based on touches.
# leave all new players in to ensure everyone gets a prediction
old = df[(df[pos['RB']['touch_filter']] > req_touch) & (df.games > req_games) & (df.year < set_year-1)].reset_index(drop=True)
this_year = df[df.year==set_year-1]

# merge old and new back together after filtering
df = pd.concat([old, this_year], axis=0)

# create dataframes to store results
df_train_results = pd.DataFrame([old.player, old.year-earliest_year+1]).T
df_test_results = pd.DataFrame([this_year.player]).T

In [None]:
df = calculate_fp(df, pts, pos='RB')

In [None]:
#==========
# Loop to create statistical predictions
#==========

metrics = ['td_per_game', 'rush_yd_per_game', 'rec_per_game', 'rec_yd_per_game']
for i in metrics:

    #--------
    # Create train and predict dataframes
    #--------
    df_train, df_predict = features_target(df, 
                                           earliest_year, set_year-1, 
                                           pos['RB']['med_features'], 
                                           pos['RB']['sum_features'],
                                           pos['RB']['max_features'], 
                                           pos['RB']['age_features'],
                                           target_feature=i)

    df_train = df_train.dropna(subset=['y_act']).reset_index(drop=True)

    df_train = df_train.fillna(df_train.mean())
    df_predict = df_predict.dropna().reset_index(drop=True)

    #--------
    # Remove low correlation features and high VIF features
    #--------

    # remove low correlation features
    df_train, df_predict = corr_removal(df_train, df_predict, corr_cutoff=0.05)

    # select only features with low vif for modeling
    transformer = ReduceVIF(thresh=100, scale=True, print_progress=False)
    df_train_ = transformer.fit_transform(df_train.drop(['y_act', 'player'], axis=1), df_train.y_act)

    # extract best columns and filter down df_predict
    best_cols = list(df_train_.columns)
    best_cols.append('player')
    df_predict = df_predict[best_cols]

    # add target and filter down df_train
    best_cols.extend(['y_act', 'year'])
    df_train = df_train[best_cols]
    df_train.loc[:,~df_train.columns.duplicated()]

    #--------
    # Run ensemble model with parameter optimization
    #--------

    # generate a master dictionary of parameters (must match the)
    param_list = [lgbm_params, xgb_params, lasso_params]
    est_names = ['lgbm', 'xgb', 'lasso']

    params = {}
    for i, param in enumerate(param_list):
        params[est_names[i]] = param

    param_results, summary, df_train_results_, errors = validation(est_names, params, df_train, iterations=100, random_state=1234)

    #--------
    # Print best results
    #--------

    print(summary.head(10))
    best_result = summary.index[0]
    df_test_results_, models = generate_predictions(best_result, param_results, summary, df_train, df_predict)
    
    #--------
    # Aggregate all results through merging
    #--------
    
    df_train_results = pd.merge(df_train_results, df_train_results_[['player', 'year', 'pred']], 
                                how='inner', left_on=['player', 'year'], right_on=['player', 'year'])
    
    df_test_results = pd.merge(df_test_results, df_test_results_[['player', 'pred']], 
                               how='inner', left_on='player', right_on='player')

# Clustering Players into Tiers

In [None]:
model_features = [models[0], models[1], models[2]]

cluster = clustering(df_train_results, df_test_results, model_features, pred_weight=4)
cluster.explore_k(k=15)

In [None]:
cluster.fit_and_predict(k=8)
c_train, c_test = cluster.add_clusters()

# Tier 1

In [None]:
cluster.show_results(j=7)

## Tier 2

In [None]:
cluster.show_results(j=1)

In [None]:
rb_sampling = cluster.create_distributions(dataset='train', wt=2.5)

In [None]:
cluster_predictions = pd.merge(rb_sampling[['player', 'year', 'pred']], df_train[['player', 'year', 'y_act']], 
                               how='inner', left_on=['player', 'year'], right_on=['player', 'year'])

In [None]:
XX = cluster_predictions.pred
yy = cluster_predictions.y_act

pearsonr(XX, yy)

from sklearn.linear_model import LinearRegression

lr = LinearRegression()

lr.fit(XX.values.reshape(-1,1), yy)
lr.score(XX.values.reshape(-1,1), yy)

# Example Distribution (Assuming 16 Games)

In [None]:
view_projections(rb_sampling, 'Dalvin Cook')

In [None]:
view_projections(rb_sampling, 'Alvin Kamara')

In [None]:
view_projections(rb_sampling, 'Joe Mixon')

In [None]:
view_projections(rb_sampling, "Le'Veon Bell")

In [None]:
view_projections(rb_sampling, "Melvin Gordon")

In [None]:
rb_sampling.to_csv('/Users/Mark/Desktop/Jupyter Projects/Fantasy Football/Projections/rb_sampling.csv')