In [1]:
import pandas
import seaborn
%matplotlib inline
import matplotlib.pyplot as plt
from bokeh.plotting import figure, show, output_notebook, output_file, ColumnDataSource 
from bokeh.charts import Dot, show, output_file
from bokeh.models import HoverTool
from bokeh.models.widgets import Panel, Tabs
import numpy as np

In [2]:
url = 'http://www.fantasypros.com/nfl/rankings/consensus-cheatsheets.php'
player_ranking_table_number = 1

#pandas will actually scrape all the tables on the page. After playing around, I figured out we only want table 1. 
rankings_2016 = pandas.read_html(url)[player_ranking_table_number]
rankings_2016.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 344 entries, 0 to 343
Data columns (total 10 columns):
Rank

						    344 non-null object
Player          343 non-null object
Pos             343 non-null object
Bye             335 non-null float64
Best            343 non-null float64
Worst           343 non-null float64
Avg             343 non-null float64
Std Dev         343 non-null float64
ADP             292 non-null float64
vs. ADP         292 non-null float64
dtypes: float64(7), object(3)
memory usage: 26.9+ KB


In [3]:
rankings_2016.head()

Unnamed: 0,Rank,Player,Pos,Bye,Best,Worst,Avg,Std Dev,ADP,vs. ADP
0,1,Antonio Brown PIT,WR1,8.0,1.0,4.0,1.4,0.8,1.0,0.0
1,2,Odell Beckham Jr. NYG,WR2,8.0,2.0,12.0,3.4,1.9,3.0,1.0
2,3,Julio Jones ATL,WR3,11.0,1.0,9.0,3.8,1.6,5.0,2.0
3,4,Todd Gurley LA,RB1,8.0,1.0,12.0,4.7,2.7,4.0,0.0
4,5,David Johnson ARI,RB2,9.0,1.0,19.0,7.4,4.5,7.0,2.0


In [7]:
rankings_2016.columns = [x.strip() for x in rankings_2016.columns]
rankings_2016.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 344 entries, 0 to 343
Data columns (total 10 columns):
Rank       344 non-null object
Player     343 non-null object
Pos        343 non-null object
Bye        335 non-null float64
Best       343 non-null float64
Worst      343 non-null float64
Avg        343 non-null float64
Std Dev    343 non-null float64
ADP        292 non-null float64
vs. ADP    292 non-null float64
dtypes: float64(7), object(3)
memory usage: 26.9+ KB


In [9]:
rankings_2016.dropna(subset=['Pos'],inplace=True)
rankings_2016.head()

Unnamed: 0,Rank,Player,Pos,Bye,Best,Worst,Avg,Std Dev,ADP,vs. ADP
0,1,Antonio Brown PIT,WR1,8.0,1.0,4.0,1.4,0.8,1.0,0.0
1,2,Odell Beckham Jr. NYG,WR2,8.0,2.0,12.0,3.4,1.9,3.0,1.0
2,3,Julio Jones ATL,WR3,11.0,1.0,9.0,3.8,1.6,5.0,2.0
3,4,Todd Gurley LA,RB1,8.0,1.0,12.0,4.7,2.7,4.0,0.0
4,5,David Johnson ARI,RB2,9.0,1.0,19.0,7.4,4.5,7.0,2.0


In [10]:
rankings_2016['position'] = rankings_2016.Pos.apply(lambda x: x[:2])

In [11]:
positions = ['QB','RB','WR','TE']
rankings_2016 = rankings_2016[rankings_2016.position.apply(lambda x: x in positions)]

In [12]:
def bye_week_parse(name):
    comma_pos = name.rfind(",")
    if comma_pos == -1:
        return -1
    return int( name[comma_pos+1:])

In [13]:
def team_name_parse(name):
    words = name.split(" ")
    for w in words:
        if w[-1] == ',':
            return w[:-1]
    return 'FA'

In [14]:
def player_name_parse(name):
    if name.find("FA") != -1:
        return name[:name.find("FA")-1].strip()
    words = name.split(" ")
    player_name = []
    for w in words:
        if w[-1] == ',':
            return ' '.join(player_name).strip()
        player_name.append(w)
    return ' '.join(player_name).strip()

In [16]:
rankings_2016['bye_week'] = rankings_2016['Player'].map(bye_week_parse)
rankings_2016['team'] = rankings_2016['Player'].map(team_name_parse)
rankings_2016['Player'] = rankings_2016['Player'].map(player_name_parse)
rankings_2016.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 293 entries, 0 to 333
Data columns (total 13 columns):
Rank        293 non-null object
Player      293 non-null object
Pos         293 non-null object
Bye         285 non-null float64
Best        293 non-null float64
Worst       293 non-null float64
Avg         293 non-null float64
Std Dev     293 non-null float64
ADP         248 non-null float64
vs. ADP     248 non-null float64
position    293 non-null object
bye_week    293 non-null int64
team        293 non-null object
dtypes: float64(7), int64(1), object(5)
memory usage: 32.0+ KB


In [17]:
def make_plot(player_data, title):
    player_data = player_data[player_data.Avg <= 120].copy()
    player_data['diff'] = player_data['Avg'] - player_data['ADP']
    def label_value(x):
        if x < 0:
            return 'under valued'
        if x == 0:
            return 'faily valued'
        return 'over valued'
    player_data['status'] = player_data['diff'].map(label_value)
    
    hover1 = HoverTool(
            tooltips=[
                ("Name", "$x"),
                ("Team", "@team"),
                ("Bye Week", "@bye_week"),
                ("Expert Avg", "@avg"),
                ("ADP", "@adp"),
                ("Status", "@status"),
            ]
        )
    
    data_dict = {
        'x':player_data['Player'],
        'status':player_data['status'],
        'adp':player_data['ADP'],
        'avg':player_data['Avg'],
        'team':player_data['team'],
        'bye_week':player_data['bye_week']
    }
    source_1 = ColumnDataSource(data=data_dict)
    source_2 = ColumnDataSource(data=data_dict)

    Tools = ['box_zoom','crosshair','resize','reset','pan']
    Tools.append(hover1)

    player_data.sort('Avg',inplace=True, ascending=True)
    
    #This is cool
    factors = list(player_data['Player'])
    all_positions = figure(title = "Draft Ranks",x_range=factors, tools=Tools,plot_width=800, plot_height=800)
    
    
    
    all_positions.xaxis.axis_label = 'Player'
    all_positions.yaxis.axis_label = 'Average Draft Pos'

    all_positions.circle(player_data['Player'], player_data["ADP"], fill_alpha=0.2, size=10, color='red', source=source_1)
    all_positions.circle(player_data['Player'], player_data["Avg"], fill_alpha=0.2, size=10, color='blue', source=source_2)
    all_positions.xaxis.major_label_orientation = 120
    
    tab = Panel(child=all_positions, title=title)
    
    return tab

In [20]:
output_notebook()
tabs_list = []
for p in positions:
    tabs_list.append(make_plot(rankings_2016[rankings_2016.position ==p],p))
tabs_list.append(make_plot(rankings_2016, 'All'))
tabs = Tabs(tabs=tabs_list)
show(tabs)

