In [1]:
##################################################################
#
# QBTopPerformerModel.ipynb
#
# Description: Train a classification model that classifies a QB
#   with a performance rank with respect to fantasy value last
#   season.
# Author: Ken Reeser
# Since: 2018-08-28
#
##################################################################

%matplotlib inline
import json
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pickle
import math
import sys, os
import boto3
import botocore

sys.path.append('.\\app\\util')
from FantasyFootballOffense import calc_past_offensive_fantasy_value

lastyear = 2017
statList = list()

#1. Get the game stats for each QB for every year they have played
s3 = boto3.resource('s3')
try:
    #profileObj = s3.Object('fantasyfootballdata', 'profile_QB_train.json')
    #profileDf = pd.read_json(profileObj.get()['Body'].read().decode('utf-8'))
    
    gamesObj = s3.Object('fantasyfootballdata', 'games_QB_train.json')
    gamesDf = pd.read_json(gamesObj.get()['Body'].read().decode('utf-8'))
    gamesDf.sort_values(by=['player_id'])

except botocore.exceptions.ClientError as e:
    # If a client error is thrown, then check that it was a 404 error.
    # If it was a 404 error, then the bucket does not exist.
    error_code = int(e.response['Error']['Code'])
    print(error_code)

#2. Loop through QB player_id's and calculate fantasy values last year
statDf = pd.DataFrame(columns=['player_id','value_last_season'])
for player_id in gamesDf.player_id.unique():
    career = gamesDf.loc[gamesDf.player_id == player_id]
    if career is not None and len(career) > 0:
        valueLastSeason = calc_past_offensive_fantasy_value(career,lastyear)
        if(valueLastSeason > 0):
            statList.append((player_id,valueLastSeason))
            #3. Perform fantasy value study for the QB position last year; then comment this out.
            #playerName = profileDf.loc[profileDf.player_id == player_id].name.get_values()
            #print("Name: ", playerName, "Value: ", valueLastSeason)
statDf = pd.concat([pd.DataFrame([i], columns=['player_id','value_last_season']) for i in statList], ignore_index=True)


NameError: name 'obj' is not defined

In [None]:
#3. Perform fantasy value study for the QB position last year
#Sort Data
statDf.sort_values(by=['value_last_season'],ascending=False,inplace=True)
#Scatter plot the fantasy values last season for the QB training data
plt.scatter(np.arange(0,34,1), statDf.value_last_season)
#plt.show()

#4. Classify fantasy value last year
# Calculating 20th,40th,60th,80th,and 100th quantiles
rank, qbins = pd.qcut(statDf['value_last_season'],q=[0.0,0.2,0.4,0.6,0.8,1.0],retbins=True)
statDf['rank'] = [x.left for x in pd.cut(statDf['value_last_season'], bins=qbins, include_lowest=True)]
statDf['rank'] = statDf['rank'].astype(np.int64)
#print(statDf)

plt.bar(np.arange(0,34,1),statDf['rank'])
plt.show()

#5. Save the QB top performer model to disk
filename = ".\\app\\data\\qb_top_performer_model.pkl"
pickle.dump(qbins, open(filename, 'wb'))
print("Model is SAVED!")
