In [1]:
import pandas as pd
import numpy as np
import os

# Combine Data

In [4]:
comb_data = pd.DataFrame()

for num in range(2001, 2024):
    tables = pd.read_html(f"https://www.pro-football-reference.com/draft/{num}-combine.htm")
    data = tables[0]                     # first table on the page
    comb_data = pd.concat([comb_data, data], ignore_index=True)

comb_data.head()

Unnamed: 0,Player,Pos,School,College,Ht,Wt,40yd,Vertical,Bench,Broad Jump,3Cone,Shuttle,Drafted (tm/rnd/yr)
0,Keith Adams,OLB,Clemson,College Stats,5-11,219,4.82,,24.0,104.0,,,Tennessee Titans / 7th / 232nd pick / 2001
1,Hakim Akbar,S,Washington,College Stats,6-0,210,4.55,,,,,,New England Patriots / 5th / 163rd pick / 2001
2,Brian Allen,OLB,Florida State,College Stats,6-0,238,4.65,,25.0,,,,St. Louis Rams / 3rd / 83rd pick / 2001
3,David Allen,RB,Kansas State,College Stats,5-9,194,4.62,31.0,15.0,111.0,,,
4,Matt Allen,P,Troy,,6-3,246,,,,,,,


# Input Data

In [5]:
file_path = 'C:/Users/jasea/OneDrive/Desktop/NFL Big Data Bowl/Data Files/train'

csvs = os.listdir(file_path)

df_list = []

for file in csvs:
    if file.startswith('input'):
        dfs = pd.read_csv(f'{file_path}/{file}')

        df_list.append(dfs)

df_input = pd.concat(df_list, ignore_index=True)

df_input.head()

Unnamed: 0,game_id,play_id,player_to_predict,nfl_id,frame_id,play_direction,absolute_yardline_number,player_name,player_height,player_weight,...,player_role,x,y,s,a,dir,o,num_frames_output,ball_land_x,ball_land_y
0,2023090700,101,False,54527,1,right,42,Bryan Cook,6-1,210,...,Defensive Coverage,52.33,36.94,0.09,0.39,322.4,238.24,21,63.259998,-0.22
1,2023090700,101,False,54527,2,right,42,Bryan Cook,6-1,210,...,Defensive Coverage,52.33,36.94,0.04,0.61,200.89,236.05,21,63.259998,-0.22
2,2023090700,101,False,54527,3,right,42,Bryan Cook,6-1,210,...,Defensive Coverage,52.33,36.93,0.12,0.73,147.55,240.6,21,63.259998,-0.22
3,2023090700,101,False,54527,4,right,42,Bryan Cook,6-1,210,...,Defensive Coverage,52.35,36.92,0.23,0.81,131.4,244.25,21,63.259998,-0.22
4,2023090700,101,False,54527,5,right,42,Bryan Cook,6-1,210,...,Defensive Coverage,52.37,36.9,0.35,0.82,123.26,244.25,21,63.259998,-0.22


# NFL IDs

In [6]:
nfl_ids = df_input[['nfl_id', 'player_name']].drop_duplicates().reset_index(drop=True).copy()

comb_df = comb_data.merge(nfl_ids, left_on='Player', right_on='player_name', how='inner').drop(columns=['player_name'])

comb_df.head()

Unnamed: 0,Player,Pos,School,College,Ht,Wt,40yd,Vertical,Bench,Broad Jump,3Cone,Shuttle,Drafted (tm/rnd/yr),nfl_id
0,Kenny Clark,WR,Central Florida,College Stats,6-1,225,4.59,35.5,,121,,,,43316
1,Marcus Williams,WR,Washington State,College Stats,6-5,230,4.56,41.0,,130,7.37,4.2,,44854
2,Mike Williams,OT,Texas,College Stats,6-6,375,5.3,30.0,29.0,102,,,Buffalo Bills / 1st / 4th pick / 2002,44819
3,Asante Samuel,CB,Central Florida,College Stats,5-11,185,4.49,35.5,9.0,120,6.95,4.14,New England Patriots / 4th / 120th pick / 2003,53476
4,Keith Smith,CB,McNeese State,,5-11,201,4.48,36.5,12.0,116,,,Detroit Lions / 3rd / 73rd pick / 2004,41808


# WR Data

In [12]:
# Retrieve wide receiver data
wr_df = comb_df[comb_df['Pos'] == 'WR'].reset_index(drop=True).copy()

# Keep only relevant columsn
wr_df = wr_df[['Player', 'nfl_id', 'Pos', '40yd', 'Vertical', '3Cone', 'Shuttle']]

# Get ranks for the metrics we want to look at
wr_df['40yd_rank'] = wr_df['40yd'].rank(method='min').astype('Int64')
wr_df['3Cone_rank'] = wr_df['3Cone'].rank(method='min').astype('Int64')
wr_df['Shuttle_rank'] = wr_df['Shuttle'].rank(method='min').astype('Int64')

# Count valid 40, 3Cone, and Shuttle times
count_40 = len(wr_df[wr_df['40yd'].notnull()])
count_3cone = len(wr_df[wr_df['3Cone'].notnull()])
count_shuttle = len(wr_df[wr_df['Shuttle'].notnull()])

wr_df['40_percentile'] = round(wr_df['40yd_rank'] / count_40 * 100, 2)
wr_df['3Cone_percentile'] = round(wr_df['3Cone_rank'] / count_3cone * 100, 2)
wr_df['Shuttle_percentile'] = round(wr_df['Shuttle_rank'] / count_shuttle * 100, 2)

wr_df.sort_values(by='40yd_rank', ascending=True).head(20)

Unnamed: 0,Player,nfl_id,Pos,40yd,Vertical,3Cone,Shuttle,40yd_rank,3Cone_rank,Shuttle_rank,40_percentile,3Cone_percentile,Shuttle_percentile
10,Marquise Goodwin,40026,WR,4.27,,,,1,,,0.68,,
134,Tyquan Thornton,54515,WR,4.28,36.5,,,2,,,1.36,,
112,Rondale Moore,53478,WR,4.28,42.5,6.65,4.06,2,6.0,10.0,1.36,6.67,10.53
60,Parris Campbell,47842,WR,4.31,40.0,,4.03,4,,8.0,2.72,,8.42
66,Andy Isabella,47845,WR,4.31,36.5,6.95,4.15,4,40.0,26.0,2.72,44.44,27.37
43,Curtis Samuel,44852,WR,4.31,37.0,7.09,4.33,4,70.0,70.0,2.72,77.78,73.68
63,Mecole Hardman,47839,WR,4.33,36.5,,,7,,,4.76,,
15,Brandin Cooks,41246,WR,4.33,36.0,6.76,3.81,7,13.0,1.0,4.76,14.44,1.05
24,Phillip Dorsett,42372,WR,4.33,37.0,6.7,4.11,7,8.0,16.0,4.76,8.89,16.84
159,Trey Palmer,56056,WR,4.33,,,,7,,,4.76,,
