In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pickle
import datetime
#import math
import difflib
import sys, os
import boto3

sys.path.append('.\\app\\util')
from FantasyFootballOffense import calc_past_offensive_fantasy_value

#load up the serialized models
paModelFile = ".\\app\\data\\qb_prime_age_model.pkl"
pa_model = pickle.load(open(paModelFile, 'rb'))

#1. Available QB training data has already been divided into 2 sets with a ratio 60/40 train/test
s3 = boto3.resource('s3')
try:
    gamesObj = s3.Object('fantasyfootballdata', 'games_QB_train.json')
    gamesDf = pd.read_json(gamesObj.get()['Body'].read().decode('utf-8'))
    
    testGamesObj = s3.Object('fantasyfootballdata', 'games_QB_test.json')
    testGamesDf = pd.read_json(gamesObj.get()['Body'].read().decode('utf-8'))
    gamesDf = gamesDf.append(testGamesDf)

except botocore.exceptions.ClientError as e:
    # If a client error is thrown, then check that it was a 404 error.
    # If it was a 404 error, then the bucket does not exist.
    error_code = int(e.response['Error']['Code'])
    print(error_code)


#2. Generate dictionary of all players and the year of their first season in the NFL
firstYearDict = dict()
for player_id in gamesDf.player_id.unique():
    gameList = gamesDf.loc[gamesDf.player_id == player_id]
    if gameList is not None and len(gameList) > 0:
        for index, row in gameList.iterrows():
            seasonYear = 0
            if (row.date.month >=8 and row.date.month <=12):
                seasonYear = row.date.year
            else:
                seasonYear = row.date.year - 1
                
            if firstYearDict.get(player_id) is None:
                firstYearDict[player_id] = int(seasonYear)
            else:
                if int(seasonYear) < firstYearDict.get(player_id):
                    firstYearDict[player_id] = int(seasonYear)


In [2]:
# param: pa_model - prime age regression model that determines a player's prime age
# param: fantasyvalue - fantasy value last year
# param: years_played - years played by this player in the next year
# return: some value that represents the fantasy value next year
def predictOffensiveFantasyValue(paModel, fantasyvalue, years_played):
    paArray = np.array([years_played])
    paValue = paModel.predict(paArray[:,np.newaxis])
    print("Fantasy Value Last Year: ", fantasyvalue)
    print("Prime Age Score: ", (1.5*paValue[0]))
    predictedFantasyValue = fantasyvalue + (1.5*paValue[0])
    #print("Predicted Fantasy Value: ", predictedFantasyValue)
    return predictedFantasyValue

#an alternative to difflib.ratio
def calcDiffScore(predictValues,actualValues):
    
    if len(predictValues) == 0:
        return 1.0
    if len(predictValues) != len(actualValues):
        return 1.0
    
    predictValuesDict = dict()
    actualValuesDict = dict()
    
    pos = 1
    for value in predictValues:
        predictValuesDict[value] = pos
        pos = pos + 1

    pos = 1
    for value in actualValues:
        actualValuesDict[value] = pos
        pos = pos + 1
        
    diffscore = 0
    totalsize = len(predictValues)
    for value in predictValues:
        predictIndex = predictValuesDict[value]
        actualIndex = actualValuesDict[value]
        distance = abs(predictIndex - actualIndex)
        diffscore = diffscore + distance
    
    finalscore = diffscore / totalsize
    return finalscore

In [3]:
#3.  Start with 1978 season for a 40 year study
firstSeason = 1978
lastSeason = None

now = datetime.datetime.now()

if(now.month < 3):
    lastSeason = now.year - 2
else:
    lastSeason = now.year - 1
    
def filterBySeason(date):
    if (date.month >=8 and date.month <=12):
        return date.year
    else:
        return date.year - 1

gamesDf['season'] = gamesDf['date'].apply(filterBySeason)

predStatDict = None
nextStatDict = None
totalDiffScore = 0
totalDiffCount = 0

#4.  Find all players who played in currSeason and calculate their fantasy values for that year
#5.  Take those same players and calculate their actual fantasy values for the next year
for currSeason in range(firstSeason,lastSeason):

    predStatDict = dict()
    nextStatDict = dict()
    print("Testing QB prime age and top performer models for year: ", currSeason)

    #compute the nextSeason
    nextSeason = currSeason + 1

    #get the games for the current season N
    gamesForCurrSeason = gamesDf.loc[gamesDf['season'] == currSeason]
    #get the games for the next season N+1
    gamesForNextSeason = gamesDf.loc[gamesDf['season'] == nextSeason]
    
    #PREDICT THE VALUES
    #loop through the games for current season N
    for player_id in gamesForCurrSeason.player_id.unique():
        gameListNextSeason = gamesForNextSeason.loc[gamesForNextSeason.player_id == player_id]
        gameListCurrSeason = gamesForCurrSeason.loc[gamesForCurrSeason.player_id == player_id]
        if (gameListCurrSeason is not None and len(gameListCurrSeason) > 0) and \
        (gameListNextSeason is not None and len(gameListNextSeason) > 0):
            currSeasonValue = calc_past_offensive_fantasy_value(gameListCurrSeason,currSeason)
            firstYear = firstYearDict.get(player_id)
            years_played = 0
            if firstYear is not None:
                years_played = nextSeason - firstYear + 1
            nextSeasonValuePredict = predictOffensiveFantasyValue(pa_model,currSeasonValue,years_played)
            predStatDict[player_id] = [nextSeasonValuePredict]
    
    #THESE ARE THE ANSWERS!
    #loop through the games for next season N+1
    for player_id in gamesForNextSeason.player_id.unique():
        gameListNextSeason = gamesForNextSeason.loc[gamesForNextSeason.player_id == player_id]
        gameListCurrSeason = gamesForCurrSeason.loc[gamesForCurrSeason.player_id == player_id]
        if (gameListCurrSeason is not None and len(gameListCurrSeason) > 0) and \
        (gameListNextSeason is not None and len(gameListNextSeason) > 0):
            nextSeasonValue = calc_past_offensive_fantasy_value(gameListNextSeason,nextSeason)
            nextStatDict[player_id] = [nextSeasonValue]
    
    #SCORE THE DIFFERENCES
    predictDf = pd.DataFrame.from_dict(predStatDict).transpose()
    predictDf.columns = ['fantasyvalue']
    predictDf.sort_values(by=['fantasyvalue'],ascending=False,inplace=True)
    #print(predictDf)
    nextDf = pd.DataFrame.from_dict(nextStatDict).transpose()
    nextDf.columns = ['fantasyvalue']
    nextDf.sort_values(by=['fantasyvalue'],ascending=False,inplace=True)
    #print(nextDf)

    sm1 = difflib.SequenceMatcher(None,a=predictDf.index.values,b=nextDf.index.values)
    diffScore = sm1.ratio()*100
    #diffScore = calcDiffScore(predictDf.index.values,nextDf.index.values)
    #print(predictDf.index.values)
    #print(nextDf.index.values)
    print("Fantasy Value Diff Score: ", diffScore)
    totalDiffScore = totalDiffScore + diffScore
    totalDiffCount = totalDiffCount + 1
    
print("Average Fantasy Value Diff Score: ", (totalDiffScore / totalDiffCount))

Testing QB prime age and top performer models for year:  1978
Fantasy Value Last Year:  0
Prime Age Score:  73.62653940890473
Fantasy Value Last Year:  0
Prime Age Score:  76.74002950685886
Fantasy Value Last Year:  472
Prime Age Score:  79.37168186429992
Fantasy Value Last Year:  156
Prime Age Score:  76.87429749566284
Fantasy Value Last Year:  14
Prime Age Score:  78.53880230709623
Fantasy Value Last Year:  534
Prime Age Score:  79.52435052801863
Fantasy Value Last Year:  0
Prime Age Score:  79.08055544804637
Fantasy Value Last Year:  120
Prime Age Score:  73.62653940890473
Fantasy Value Last Year:  232
Prime Age Score:  77.92490767220175
Fantasy Value Last Year:  182
Prime Age Score:  78.68460623888708
Fantasy Value Last Year:  18
Prime Age Score:  78.53880230709623
Fantasy Value Last Year:  234
Prime Age Score:  76.74002950685886
Fantasy Value Last Year:  114
Prime Age Score:  77.92490767220175
Fantasy Value Last Year:  184
Prime Age Score:  74.56416740582264
Fantasy Value Last Yea

Fantasy Value Last Year:  316
Prime Age Score:  78.53880230709623
Fantasy Value Last Year:  76
Prime Age Score:  77.77712396789909
Fantasy Value Last Year:  0
Prime Age Score:  76.87429749566284
Fantasy Value Diff Score:  33.33333333333333
Testing QB prime age and top performer models for year:  1982
Fantasy Value Last Year:  24
Prime Age Score:  76.87429749566284
Fantasy Value Last Year:  0
Prime Age Score:  78.53880230709623
Fantasy Value Last Year:  24
Prime Age Score:  79.52435052801863
Fantasy Value Last Year:  0
Prime Age Score:  79.08055544804637
Fantasy Value Last Year:  288
Prime Age Score:  69.80171494267435
Fantasy Value Last Year:  206
Prime Age Score:  79.08055544804637
Fantasy Value Last Year:  164
Prime Age Score:  78.68460623888708
Fantasy Value Last Year:  188
Prime Age Score:  79.08055544804637
Fantasy Value Last Year:  0
Prime Age Score:  78.53880230709623
Fantasy Value Last Year:  66
Prime Age Score:  77.92490767220175
Fantasy Value Last Year:  350
Prime Age Score: 

Fantasy Value Diff Score:  24.390243902439025
Testing QB prime age and top performer models for year:  1986
Fantasy Value Last Year:  90
Prime Age Score:  79.52435052801863
Fantasy Value Last Year:  78
Prime Age Score:  76.74002950685886
Fantasy Value Last Year:  466
Prime Age Score:  78.53880230709623
Fantasy Value Last Year:  0
Prime Age Score:  78.53880230709623
Fantasy Value Last Year:  0
Prime Age Score:  73.62653940890473
Fantasy Value Last Year:  26
Prime Age Score:  69.80171494267435
Fantasy Value Last Year:  82
Prime Age Score:  78.53880230709623
Fantasy Value Last Year:  164
Prime Age Score:  73.62653940890473
Fantasy Value Last Year:  0
Prime Age Score:  79.52435052801863
Fantasy Value Last Year:  178
Prime Age Score:  76.74002950685886
Fantasy Value Last Year:  138
Prime Age Score:  76.74002950685886
Fantasy Value Last Year:  236
Prime Age Score:  78.53880230709623
Fantasy Value Last Year:  0
Prime Age Score:  79.08055544804637
Fantasy Value Last Year:  28
Prime Age Score: 

Fantasy Value Diff Score:  42.42424242424242
Testing QB prime age and top performer models for year:  1990
Fantasy Value Last Year:  302
Prime Age Score:  79.08055544804637
Fantasy Value Last Year:  334
Prime Age Score:  76.74002950685886
Fantasy Value Last Year:  252
Prime Age Score:  78.53880230709623
Fantasy Value Last Year:  84
Prime Age Score:  76.74002950685886
Fantasy Value Last Year:  102
Prime Age Score:  76.87429749566284
Fantasy Value Last Year:  2
Prime Age Score:  78.53880230709623
Fantasy Value Last Year:  12
Prime Age Score:  73.62653940890473
Fantasy Value Last Year:  146
Prime Age Score:  79.37168186429992
Fantasy Value Last Year:  56
Prime Age Score:  76.74002950685886
Fantasy Value Last Year:  88
Prime Age Score:  79.08055544804637
Fantasy Value Last Year:  0
Prime Age Score:  73.62653940890473
Fantasy Value Last Year:  0
Prime Age Score:  79.37168186429992
Fantasy Value Last Year:  48
Prime Age Score:  77.5167001923055
Fantasy Value Last Year:  328
Prime Age Score: 

Fantasy Value Last Year:  74
Prime Age Score:  79.37168186429992
Fantasy Value Last Year:  18
Prime Age Score:  79.52435052801863
Fantasy Value Last Year:  2
Prime Age Score:  78.53880230709623
Fantasy Value Last Year:  2
Prime Age Score:  63.72108625597795
Fantasy Value Last Year:  840
Prime Age Score:  79.37168186429992
Fantasy Value Last Year:  0
Prime Age Score:  79.08055544804637
Fantasy Value Last Year:  232
Prime Age Score:  79.08055544804637
Fantasy Value Last Year:  16
Prime Age Score:  73.62653940890473
Fantasy Value Last Year:  12
Prime Age Score:  73.62653940890473
Fantasy Value Last Year:  380
Prime Age Score:  74.56416740582264
Fantasy Value Last Year:  288
Prime Age Score:  77.77712396789909
Fantasy Value Last Year:  580
Prime Age Score:  77.92490767220175
Fantasy Value Last Year:  70
Prime Age Score:  73.62653940890473
Fantasy Value Last Year:  156
Prime Age Score:  76.87429749566284
Fantasy Value Last Year:  384
Prime Age Score:  76.87429749566284
Fantasy Value Last Ye

Fantasy Value Last Year:  4
Prime Age Score:  40.3714907575679
Fantasy Value Last Year:  400
Prime Age Score:  69.80171494267435
Fantasy Value Last Year:  120
Prime Age Score:  79.08055544804637
Fantasy Value Last Year:  24
Prime Age Score:  76.74002950685886
Fantasy Value Last Year:  100
Prime Age Score:  76.74002950685886
Fantasy Value Last Year:  374
Prime Age Score:  77.92490767220175
Fantasy Value Last Year:  176
Prime Age Score:  79.08055544804637
Fantasy Value Last Year:  302
Prime Age Score:  78.53880230709623
Fantasy Value Last Year:  140
Prime Age Score:  76.87429749566284
Fantasy Value Last Year:  350
Prime Age Score:  77.92490767220175
Fantasy Value Last Year:  10
Prime Age Score:  73.62653940890473
Fantasy Value Last Year:  526
Prime Age Score:  56.615202085381696
Fantasy Value Last Year:  634
Prime Age Score:  77.5167001923055
Fantasy Value Last Year:  32
Prime Age Score:  73.62653940890473
Fantasy Value Last Year:  302
Prime Age Score:  77.92490767220175
Fantasy Value La

Fantasy Value Last Year:  334
Prime Age Score:  79.08055544804637
Fantasy Value Last Year:  0
Prime Age Score:  77.92490767220175
Fantasy Value Last Year:  110
Prime Age Score:  78.68460623888708
Fantasy Value Last Year:  582
Prime Age Score:  77.77712396789909
Fantasy Value Last Year:  388
Prime Age Score:  77.92490767220175
Fantasy Value Last Year:  256
Prime Age Score:  76.87429749566284
Fantasy Value Last Year:  58
Prime Age Score:  78.68460623888708
Fantasy Value Last Year:  536
Prime Age Score:  56.615202085381696
Fantasy Value Last Year:  354
Prime Age Score:  77.92490767220175
Fantasy Value Last Year:  144
Prime Age Score:  79.08055544804637
Fantasy Value Last Year:  12
Prime Age Score:  76.74002950685886
Fantasy Value Last Year:  26
Prime Age Score:  79.37168186429992
Fantasy Value Last Year:  0
Prime Age Score:  76.74002950685886
Fantasy Value Last Year:  76
Prime Age Score:  73.62653940890473
Fantasy Value Last Year:  438
Prime Age Score:  77.5167001923055
Fantasy Value Last

Fantasy Value Last Year:  12
Prime Age Score:  76.87429749566284
Fantasy Value Last Year:  356
Prime Age Score:  76.87429749566284
Fantasy Value Last Year:  404
Prime Age Score:  76.87429749566284
Fantasy Value Last Year:  12
Prime Age Score:  76.87429749566284
Fantasy Value Last Year:  114
Prime Age Score:  76.74002950685886
Fantasy Value Last Year:  388
Prime Age Score:  74.56416740582264
Fantasy Value Last Year:  256
Prime Age Score:  77.92490767220175
Fantasy Value Last Year:  28
Prime Age Score:  73.62653940890473
Fantasy Value Last Year:  370
Prime Age Score:  77.77712396789909
Fantasy Value Last Year:  0
Prime Age Score:  77.77712396789909
Fantasy Value Last Year:  124
Prime Age Score:  77.5167001923055
Fantasy Value Last Year:  468
Prime Age Score:  77.5167001923055
Fantasy Value Last Year:  34
Prime Age Score:  78.53880230709623
Fantasy Value Last Year:  30
Prime Age Score:  78.68460623888708
Fantasy Value Last Year:  332
Prime Age Score:  56.615202085381696
Fantasy Value Last

Fantasy Value Last Year:  66
Prime Age Score:  77.92490767220175
Fantasy Value Last Year:  434
Prime Age Score:  77.92490767220175
Fantasy Value Last Year:  134
Prime Age Score:  77.77712396789909
Fantasy Value Last Year:  106
Prime Age Score:  79.08055544804637
Fantasy Value Last Year:  0
Prime Age Score:  74.56416740582264
Fantasy Value Last Year:  632
Prime Age Score:  22.925891159885595
Fantasy Value Last Year:  370
Prime Age Score:  78.53880230709623
Fantasy Value Last Year:  92
Prime Age Score:  79.08055544804637
Fantasy Value Last Year:  -2
Prime Age Score:  77.5167001923055
Fantasy Value Last Year:  282
Prime Age Score:  77.77712396789909
Fantasy Value Last Year:  234
Prime Age Score:  78.53880230709623
Fantasy Value Last Year:  16
Prime Age Score:  73.62653940890473
Fantasy Value Last Year:  0
Prime Age Score:  76.74002950685886
Fantasy Value Last Year:  388
Prime Age Score:  76.87429749566284
Fantasy Value Last Year:  16
Prime Age Score:  77.92490767220175
Fantasy Value Last 

Fantasy Value Diff Score:  29.72972972972973
Testing QB prime age and top performer models for year:  2014
Fantasy Value Last Year:  536
Prime Age Score:  77.5167001923055
Fantasy Value Last Year:  268
Prime Age Score:  76.74002950685886
Fantasy Value Last Year:  232
Prime Age Score:  73.62653940890473
Fantasy Value Last Year:  456
Prime Age Score:  76.74002950685886
Fantasy Value Last Year:  0
Prime Age Score:  77.92490767220175
Fantasy Value Last Year:  720
Prime Age Score:  56.615202085381696
Fantasy Value Last Year:  58
Prime Age Score:  63.72108625597795
Fantasy Value Last Year:  608
Prime Age Score:  63.72108625597795
Fantasy Value Last Year:  40
Prime Age Score:  79.08055544804637
Fantasy Value Last Year:  280
Prime Age Score:  73.62653940890473
Fantasy Value Last Year:  130
Prime Age Score:  79.52435052801863
Fantasy Value Last Year:  350
Prime Age Score:  73.62653940890473
Fantasy Value Last Year:  44
Prime Age Score:  79.37168186429992
Fantasy Value Last Year:  82
Prime Age S

In [5]:
#6. Make QB predictions for 2018
#FANTASY VALUES FROM 2017
pred2018Dict = dict()
  
#get all 2017 games
gamesForLastSeason = gamesDf.loc[gamesDf['season'] == 2017]
    
#loop through the games for last season
for player_id in gamesForLastSeason.player_id.unique():
    gameList = gamesForLastSeason.loc[gamesForLastSeason.player_id == player_id]
    if gameList is not None and len(gameList) > 0:
        lastSeasonValue = calc_past_offensive_fantasy_value(gameList,2017)
        firstYear = firstYearDict.get(player_id)
        years_played = 0
        if firstYear is not None:
            years_played = nextSeason - firstYear + 1
        print("Player ID: ", player_id)
        nextSeasonValuePredict = predictOffensiveFantasyValue(pa_model,lastSeasonValue,years_played)
        pred2018Dict[player_id] = [nextSeasonValuePredict]
        
pred2018Df = pd.DataFrame.from_dict(pred2018Dict).transpose()
pred2018Df.columns = ['fantasyvalue']
pred2018Df.sort_values(by=['fantasyvalue'],ascending=False,inplace=True)
print(pred2018Df)

#try:
#    profileObj = s3.Object('fantasyfootballdata', 'profile_QB_train.json')
#    profileDf = pd.read_json(gamesObj.get()['Body'].read().decode('utf-8'))
#    
#    testProfileObj = s3.Object('fantasyfootballdata', 'profile_QB_test.json')
#    testProfileDf = pd.read_json(gamesObj.get()['Body'].read().decode('utf-8'))
#    profileDf = profileDf.append(testProfileDf)
#
#except botocore.exceptions.ClientError as e:
#    # If a client error is thrown, then check that it was a 404 error.
#    # If it was a 404 error, then the bucket does not exist.
#    error_code = int(e.response['Error']['Code'])
#    print(error_code)

#for player_id in pred2018Df.index.unique():
#    playerName = profileDf.loc[profileDf.player_id == player_id].name.get_values()
#    print("Name: ", playerName)

Player ID:  19494
Fantasy Value Last Year:  0
Prime Age Score:  61.440671082190576
Player ID:  19561
Fantasy Value Last Year:  416
Prime Age Score:  79.52435052801863
Player ID:  20475
Fantasy Value Last Year:  222
Prime Age Score:  78.53880230709623
Player ID:  2064
Fantasy Value Last Year:  482
Prime Age Score:  79.08055544804637
Player ID:  21331
Fantasy Value Last Year:  540
Prime Age Score:  78.68460623888708
Player ID:  22183
Fantasy Value Last Year:  338
Prime Age Score:  76.74002950685886
Player ID:  2269
Fantasy Value Last Year:  740
Prime Age Score:  40.3714907575679
Player ID:  2335
Fantasy Value Last Year:  0
Prime Age Score:  61.440671082190576
Player ID:  2358
Fantasy Value Last Year:  548
Prime Age Score:  48.75758066835007
Player ID:  23691
Fantasy Value Last Year:  348
Prime Age Score:  61.440671082190576
Player ID:  2410
Fantasy Value Last Year:  -2
Prime Age Score:  79.08055544804637
Player ID:  2447
Fantasy Value Last Year:  316
Prime Age Score:  73.62653940890473
P