# CS544 Foundations of Analytics
# Professor Suresh Kalathur
# Final Project - Mike Zhong

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly
import plotly.plotly as py
import plotly.graph_objs as go
import plotly.figure_factory as ff

## Part 5) 
## This notebook will walk through some of the additional analysis done using Plotly (in Python)

In [2]:
plotly.tools.set_credentials_file(username='myz540', api_key='MY9bR5aQvUHSQrtT2FmX')

In [3]:
__author__ = "Mike Zhong"

In [4]:
df = pd.read_csv("./data/complete_data.csv")

In [5]:
print(df.columns)
df.head()

Index(['Player', 'Team', 'Opponent', 'Location', 'Pts', 'Att', 'Cmp', 'Yds',
       'TD', 'Int', '2Pt', 'Rush Att', 'Rush Yds', 'Rush TD', 'Rush 2Pt',
       'Rec', 'Rec Yds', 'Rec TD', 'Rec 2Pt', 'FL', 'Misc TD', 'Week',
       'Position'],
      dtype='object')


Unnamed: 0,Player,Team,Opponent,Location,Pts,Att,Cmp,Yds,TD,Int,...,Rush TD,Rush 2Pt,Rec,Rec Yds,Rec TD,Rec 2Pt,FL,Misc TD,Week,Position
0,Andrew Luck,IND,@DET,away,43,47,31,385,4,0,...,0,0,0,0,0,0,0,0,1,QB
1,Drew Brees,NO,@OAK,away,39,42,28,423,4,0,...,0,0,0,0,0,0,1,0,1,QB
2,AJ Green,CIN,NYJ,home,36,0,0,0,0,0,...,0,0,12,180,1,0,0,0,1,WR
3,DeAngelo Williams,PIT,WAS,home,34,0,0,0,0,0,...,2,0,6,28,0,0,0,0,1,RB
4,Jameis Winston,TB,ATL,home,33,32,23,281,4,1,...,0,0,0,0,0,0,0,0,1,QB


## Box plots for each week

In [6]:
data = [go.Bar(x=df.Team, y=df.Yds)]

In [7]:
# show the distribution of points for each week
traces = []
colorscale = [[0, '#FAEE1C'], [18, '#F3558E'], [36, '#9C1DE7'], [54, '#581B98']]
for week in range(1, 17):
    trace = go.Box(
        x = df[df.loc[:, 'Week'] == week].Pts,
        name = "Week: " + str(week)
            
    )
    traces.append(trace)

py.iplot(traces, filename='point-distribution-all-weeks')

## Often times, we want to use rate statistics to evaluate a player. Three such statistics come to mind:
### Passing yards / attempt: (YPA) for quarterbacks
### Receiving yards / reception: (YPR) for wide receivers
### Rushing yards / attempt: (YPC) for running backs

In [8]:
ypa = df.Yds / df.Att
ypa.name = 'YPA'
ypa.head()

0     8.191489
1    10.071429
2          NaN
3          NaN
4     8.781250
Name: YPA, dtype: float64

In [9]:
ypr = df.loc[:, 'Rec Yds'] / df.loc[:, 'Rec']
ypr.name = 'YPR'
ypr.head()

0          NaN
1          NaN
2    15.000000
3     4.666667
4          NaN
Name: YPR, dtype: float64

In [10]:
ypc = df.loc[:, 'Rush Yds'] / df.loc[:, 'Rush Att']
ypc.name = 'YPC'
ypc.head()

0    7.00
1    2.50
2     NaN
3    5.50
4    0.75
Name: YPC, dtype: float64

In [11]:
# let's also compute completion percentage for QBs
comp_pct = df.loc[:, 'Cmp'] / df.loc[:, 'Att']
comp_pct.name = 'Completion Percentage'

In [12]:
augmented_df = pd.concat([df, ypa, comp_pct, ypr, ypc], axis=1)

In [13]:
augmented_df.head()

Unnamed: 0,Player,Team,Opponent,Location,Pts,Att,Cmp,Yds,TD,Int,...,Rec TD,Rec 2Pt,FL,Misc TD,Week,Position,YPA,Completion Percentage,YPR,YPC
0,Andrew Luck,IND,@DET,away,43,47,31,385,4,0,...,0,0,0,0,1,QB,8.191489,0.659574,,7.0
1,Drew Brees,NO,@OAK,away,39,42,28,423,4,0,...,0,0,1,0,1,QB,10.071429,0.666667,,2.5
2,AJ Green,CIN,NYJ,home,36,0,0,0,0,0,...,1,0,0,0,1,WR,,,15.0,
3,DeAngelo Williams,PIT,WAS,home,34,0,0,0,0,0,...,0,0,0,0,1,RB,,,4.666667,5.5
4,Jameis Winston,TB,ATL,home,33,32,23,281,4,1,...,0,0,0,0,1,QB,8.78125,0.71875,,0.75


In [14]:
qbs = augmented_df[augmented_df.loc[:, 'Position'] == 'QB']
wrs = augmented_df[augmented_df.loc[:, 'Position'] == 'WR']
rbs = augmented_df[augmented_df.loc[:, 'Position'] == 'RB']

In [15]:
trace1 = go.Scatter(
    x = qbs.loc[:, 'Completion Percentage'],
    y = qbs.loc[:, 'YPA'],
    mode = 'markers',
    marker = {
        'color': qbs.loc[:, 'Pts']
    },
    text = qbs.loc[:, 'Player']
)
layout = go.Layout(
    title = 'Quarterback efficiency metrics',
    hovermode = 'closest',
    xaxis = dict(title='Completion Percentage'),
    yaxis = dict(title='Yards per Attempt (YPA)'),
)
fig = go.Figure(data=[trace1], layout=layout)
py.iplot(fig, filename='qb-efficiency')

In [16]:
trace2 = go.Scatter(
    y = rbs.loc[:, 'YPC'],
    x = rbs.loc[:, 'Rush Att'],
    mode = 'markers',
    marker = {
        'color': rbs.loc[:, 'Pts']
    },
    text = rbs.loc[:, 'Player']
)
layout = go.Layout(
    title = 'Runningback efficiency metrics',
    hovermode = 'closest',
    xaxis = dict(title='Rushing Attempts (Carries)'),
    yaxis = dict(title='Yards per Carry (YPC)'),
)
fig = go.Figure(data=[trace2], layout=layout)
py.iplot(fig, filename='rb-efficiency')

In [17]:
trace3 = go.Scatter(
    y = wrs.loc[:, 'YPR'],
    x = wrs.loc[:, 'Rec'],
    mode = 'markers',
    marker = {
        'color': wrs.loc[:, 'Pts']
    },
    text = wrs.loc[:, 'Player']
)
layout = go.Layout(
    title = 'Wide receiver efficiency metrics',
    hovermode = 'closest',
    xaxis = dict(title='Receptions'),
    yaxis = dict(title='Yards per Reception (YPR)'),
)
fig = go.Figure(data=[trace3], layout=layout)
py.iplot(fig, filename='wr-efficiency')

## Let's analyze a few players and see how their performances stack up over a season
### Player 1: Antonio Brown
### Player 2: David Johnson
### Player 3: Rob Gronkowski
### Player 4: Aaron Rodgers

In [18]:
ab = augmented_df[augmented_df.loc[:, 'Player'] == 'Antonio Brown']
dj = augmented_df[augmented_df.loc[:, 'Player'] == 'David Johnson']
rg = augmented_df[augmented_df.loc[:, 'Player'] == 'Rob Gronkowski']
ar = augmented_df[augmented_df.loc[:, 'Player'] == 'Aaron Rodgers']

In [19]:
trace1 = go.Scatter(
    x = ab.loc[:, 'Week'],
    y = ab.loc[:, 'Pts'],
    mode = 'markers',
    marker = dict(color='red'),
    name = 'Antonio Brown',
)
trace2 = go.Scatter(
    x = dj.loc[:, 'Week'],
    y = dj.loc[:, 'Pts'],
    mode = 'markers',
    marker = dict(color='blue'),
    name = 'David Johnson'
)
trace3 = go.Scatter(
    x = rg.loc[:, 'Week'],
    y = rg.loc[:, 'Pts'],
    mode = 'markers',
    marker = dict(color='green'),
    name = 'Gronk',
)
trace4 = go.Scatter(
    x = ar.loc[:, 'Week'],
    y = ar.loc[:, 'Pts'],
    mode = 'markers',
    marker = dict(color='black'),
    name = 'Aaron Rodgers',
)


py.iplot([trace1, trace2, trace3, trace4], filename='4-players')

### Consistency is a valuable trend in fantasy, two players can achieve the same cumulative score over a season but the distribution of their weekly performances could vary drastically let's try to visualize this

In [20]:
players = augmented_df.loc[:, 'Player'].drop_duplicates()
players.head()

0          Andrew Luck
1           Drew Brees
2             AJ Green
3    DeAngelo Williams
4       Jameis Winston
Name: Player, dtype: object

In [21]:
# To take into account that some players do not show up for all 16 weeks, let's use points/game as our metric
temp = []
for player in players.values:
    scores = augmented_df[augmented_df.loc[:, 'Player'] == player].loc[:, 'Pts']
    total_points = sum(scores.values)
    games_played = len(scores)
    pts_per_game = total_points / games_played
    sd = np.std(scores.values)
    temp.append((player, pts_per_game, sd, total_points, games_played))

In [22]:
# make a new dataframe with only this info
df1 = pd.DataFrame(temp,
                  columns=['Player', 'Pts Per Game', 'Standard Deviation', 'Total Points', 'Games Played'])
df1.head()

Unnamed: 0,Player,Pts Per Game,Standard Deviation,Total Points,Games Played
0,Andrew Luck,24.428571,8.286946,342,14
1,Drew Brees,28.153846,9.045578,366,13
2,AJ Green,24.0,8.246211,168,7
3,DeAngelo Williams,28.0,6.0,56,2
4,Jameis Winston,22.416667,5.86598,269,12


In [23]:
trace = go.Scatter(
    x = df1.loc[:, 'Pts Per Game'],
    y = df1.loc[:, 'Standard Deviation'],
    mode = 'markers',
    marker = {
        'color': df1.loc[:, 'Games Played']
    },
    text = df1.loc[:, 'Player']
)
layout = go.Layout(
    title = 'Consistency Kings',
    hovermode = 'closest',
    xaxis = dict(title='Pts Per Game'),
    yaxis = dict(title='Standard Deviation of Points'),
)
fig = go.Figure(data=[trace], layout=layout)
py.iplot(fig, filename='consistency kings')

## Points can come from yardage or TDs. TDs are largely considered random on a week to week basis, but yardage tends to be more predictable. Let's analyze yardage vs. TDs.

In [24]:
wrs = df[df.loc[:, 'Position'] == 'WR'].loc[:, 'Player'].drop_duplicates()
rbs = df[df.loc[:, 'Position'] == 'RB'].loc[:, 'Player'].drop_duplicates()
qbs = df[df.loc[:, 'Position'] == 'QB'].loc[:, 'Player'].drop_duplicates()

In [25]:
qb_entry = []

for player in qbs:
    tot_points = sum(df[df.loc[:, 'Player'] == player].loc[:, 'Pts'])
    pts_from_tds = 6 * sum(df[df.loc[:, 'Player'] == player].loc[:, 'TD'])
    pts_from_yds = 0.025 * sum(df[df.loc[:, 'Player'] == player].loc[:, 'Yds'])
    qb_entry.append((player, tot_points, pts_from_tds, pts_from_yds))
    
df2 = pd.DataFrame(qb_entry,
                  columns = ['Player', 'Total Points', 'Points from TDs', 'Points from Yds'])    
    
trace = go.Scatter3d(
    x = df2.loc[:, 'Points from TDs'],
    y = df2.loc[:, 'Points from Yds'],
    z = df2.loc[:, 'Total Points'],
    mode = 'markers',
    marker = dict(color='red', size=4),
    text = df2.loc[:, 'Player'],
)
                           
layout = go.Layout(
    title = 'QB point source',
    hovermode = 'closest',
)

fig = go.Figure(data=[trace], layout=layout)
py.iplot(fig, filename='qb-point-source')

In [27]:
wr_entry = []

for player in wrs:
    tot_points = sum(df[df.loc[:, 'Player'] == player].loc[:, 'Pts'])
    pts_from_tds = 6 * sum(df[df.loc[:, 'Player'] == player].loc[:, 'Rec TD'])
    pts_from_yds = 0.1 * sum(df[df.loc[:, 'Player'] == player].loc[:, 'Rec Yds'])
    wr_entry.append((player, tot_points, pts_from_tds, pts_from_yds))
    
df2 = pd.DataFrame(wr_entry,
                  columns = ['Player', 'Total Points', 'Points from TDs', 'Points from Yds'])    
    
trace = go.Scatter3d(
    x = df2.loc[:, 'Points from TDs'],
    y = df2.loc[:, 'Points from Yds'],
    z = df2.loc[:, 'Total Points'],
    mode = 'markers',
    marker = dict(color='red', size=4),
    text = df2.loc[:, 'Player'],
)
                           
layout = go.Layout(
    title = 'WR point source',
    hovermode = 'closest',
)

fig = go.Figure(data=[trace], layout=layout)
py.iplot(fig, filename='wr-point-source')

In [29]:
rb_entry = []

for player in rbs:
    tot_points = sum(df[df.loc[:, 'Player'] == player].loc[:, 'Pts'])
    pts_from_tds = 6 * sum(df[df.loc[:, 'Player'] == player].loc[:, 'Rush TD'])
    pts_from_yds = 0.1 * sum(df[df.loc[:, 'Player'] == player].loc[:, 'Rush Yds'])
    rb_entry.append((player, tot_points, pts_from_tds, pts_from_yds))
    
df2 = pd.DataFrame(rb_entry,
                  columns = ['Player', 'Total Points', 'Points from TDs', 'Points from Yds'])    
    
trace = go.Scatter3d(
    x = df2.loc[:, 'Points from TDs'],
    y = df2.loc[:, 'Points from Yds'],
    z = df2.loc[:, 'Total Points'],
    mode = 'markers',
    marker = dict(color='red', size=4),
    text = df2.loc[:, 'Player'],
)
                           
layout = go.Layout(
    title = 'RB point source',
    hovermode = 'closest',
)

fig = go.Figure(data=[trace], layout=layout)
py.iplot(fig, filename='rb-point-source')