In [2]:
import pandas as pd



In [5]:
# Import Fantasy data from Pro Football Reference

urls_for_pfr_data = ['2021',
                    '2020',
                    '2019',
                    '2018',
                    '2017',
                    '2016',
                    '2015',
                    '2014',
                    '2013',
                    '2012',
                    ]

list_of_dataframes = []
for year in urls_for_pfr_data: 
    link = "https://www.pro-football-reference.com/years/" + year + "/fantasy.htm" 
    df = pd.read_html(link)[0]
    df['year'] = year
    list_of_dataframes.append(df)
    
final_df = pd.concat(list_of_dataframes).reset_index(drop=True)

In [6]:
# Flatten dataframe
final_df.columns = final_df.columns.get_level_values(0) + '_' +  final_df.columns.get_level_values(1)

## Preprocess data
# Rename Columns
new_col_names = {'Unnamed: 0_level_0_Rk': 'Rank',
        'Unnamed: 1_level_0_Player': 'Player',
        'Unnamed: 2_level_0_Tm': 'Team',
        'Unnamed: 3_level_0_FantPos': 'Position',
        'Unnamed: 4_level_0_Age': 'Age'}
final_df.rename(columns=new_col_names, inplace=True)

# Clean Player Name Column
final_df['Player'] = final_df['Player'].str.strip('*+')

In [7]:
# Number 1 ranked players from each fantasy season
final_df[final_df['Fantasy_OvRank']=="1"]

Unnamed: 0,Rank,Player,Team,Position,Age,Games_G,Games_GS,Passing_Cmp,Passing_Att,Passing_Yds,...,Scoring_2PM,Scoring_2PP,Fantasy_FantPt,Fantasy_PPR,Fantasy_DKPt,Fantasy_FDPt,Fantasy_VBD,Fantasy_PosRank,Fantasy_OvRank,year_
0,1,Jonathan Taylor,IND,RB,22,17,17,0,0,0,...,,,333,373.1,381.1,353.1,187,1,1,2021
693,1,Derrick Henry,TEN,RB,26,16,16,0,0,0,...,1.0,,314,333.1,341.1,323.6,184,1,1,2020
1372,1,Christian McCaffrey,CAR,RB,23,16,16,0,2,0,...,1.0,,355,471.2,477.2,413.2,215,1,1,2019
2011,1,Todd Gurley,LAR,RB,24,14,14,0,0,0,...,3.0,,313,372.1,379.1,342.6,178,1,1,2018
2648,1,Todd Gurley,LAR,RB,23,15,15,0,0,0,...,,,319,383.3,391.3,351.3,192,1,1,2017
3265,1,David Johnson,ARI,RB,25,16,16,0,0,0,...,1.0,,328,407.8,416.8,367.8,191,1,1,2016
3869,1,Devonta Freeman,ATL,RB,23,15,13,0,0,0,...,,,243,316.4,324.4,279.9,123,1,1,2015
4477,1,DeMarco Murray,DAL,RB,26,16,16,0,0,0,...,,,294,351.1,362.1,322.6,173,1,1,2014
5082,1,Jamaal Charles,KAN,RB,27,15,15,0,0,0,...,,,308,378.0,386.0,343.0,182,1,1,2013
5686,1,Adrian Peterson,MIN,RB,27,16,16,0,0,0,...,1.0,,307,347.4,355.4,327.4,191,1,1,2012


In [8]:
# One players Fantasy Prodution during career
final_df[final_df['Player']=="Todd Gurley"]

Unnamed: 0,Rank,Player,Team,Position,Age,Games_G,Games_GS,Passing_Cmp,Passing_Att,Passing_Yds,...,Scoring_2PM,Scoring_2PP,Fantasy_FantPt,Fantasy_PPR,Fantasy_DKPt,Fantasy_FDPt,Fantasy_VBD,Fantasy_PosRank,Fantasy_OvRank,year_
762,68,Todd Gurley,ATL,RB,26,15,15,0,0,0,...,,,138,163.2,169.2,150.7,8,23,68,2020
1395,24,Todd Gurley,LAR,RB,25,15,15,0,0,0,...,1.0,,188,219.4,227.4,203.9,48,12,24,2019
2011,1,Todd Gurley,LAR,RB,24,14,14,0,0,0,...,3.0,,313,372.1,379.1,342.6,178,1,1,2018
2648,1,Todd Gurley,LAR,RB,23,15,15,0,0,0,...,,,319,383.3,391.3,351.3,192,1,1,2017
3315,50,Todd Gurley,LAR,RB,22,16,16,0,0,0,...,,,155,198.2,205.2,176.7,18,20,50,2016
3882,14,Todd Gurley,STL,RB,21,13,12,0,0,0,...,,,187,208.4,215.4,197.9,67,5,14,2015


In [42]:
final_df['Unnamed: 1_level_0']['Player'].str.strip('*+')

0       Jonathan Taylor
1           Cooper Kupp
2          Deebo Samuel
3            Josh Allen
4         Austin Ekeler
             ...       
6294     Brock Osweiler
6295     Graham Harrell
6296       Josh Johnson
6297       Ryan Mallett
6298          Joe Adams
Name: Player, Length: 6299, dtype: object

In [9]:
final_df.to_csv('data/pfr_10yr_data_2022.csv')