# The Game beyond NFL

In [1]:
%pip install pgmpy

Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
import matplotlib.pyplot as plt

## Reading the data and feature selection

In [3]:
df = pd.read_csv('data/imputed_data.csv')
# Check the first few rows to verify the structure
df.head()

Unnamed: 0,Name,Position,College,Round,Pick,Stat URL,Height,Weight,40 Yard Dash,Bench Press,...,rec_td,rush_att,rush_yds,rush_yds_per_att,rush_td,scrim_att,scrim_yds,scrim_yds_per_att,scrim_td,Year
0,Emmanuel Acho,OLB,Texas,6,204,https://www.sports-reference.com/cfb/players/e...,74.0,238.0,4.64,24.0,...,5.29,199.2,1282.58,8.83,14.91,239.71,1747.91,8.22,20.2,2012
1,Joe Adams,WR,Arkansas,4,104,https://www.sports-reference.com/cfb/players/j...,71.0,179.0,4.51,14.59,...,8.5,4.0,69.5,11.65,0.0,96.0,1393.5,14.45,8.5,2012
2,Chas Alecxih,DT,Pittsburgh,0,0,https://www.sports-reference.com/cfb/players/c...,76.0,296.0,5.31,19.0,...,0.0,1.19,5.2,-0.68,0.36,1.36,5.55,0.86,0.36,2012
3,Frank Alexander,DE,Oklahoma,4,103,https://www.sports-reference.com/cfb/players/f...,76.0,270.0,4.8,24.48,...,2.17,22.98,75.37,4.12,4.24,36.81,231.59,6.49,6.41,2012
4,Antonio Allen,S,South Carolina,7,242,https://www.sports-reference.com/cfb/players/a...,73.0,210.0,4.58,17.0,...,1.68,374.69,2061.25,4.94,19.21,420.39,2397.36,6.43,20.89,2012


In [4]:
df.columns.values

array(['Name', 'Position', 'College', 'Round', 'Pick', 'Stat URL',
       'Height', 'Weight', '40 Yard Dash', 'Bench Press', 'Vertical Jump',
       'Broad Jump', '3 Cone Drill', 'Shuttle', 'conf_abbr', 'games',
       'seasons', 'tackles_solo', 'tackles_assists', 'tackles_total',
       'tackles_loss', 'sacks', 'def_int', 'def_int_yds', 'def_int_td',
       'pass_defended', 'fumbles_rec', 'fumbles_rec_yds',
       'fumbles_rec_td', 'fumbles_forced', 'rec', 'rec_yds',
       'rec_yds_per_rec', 'rec_td', 'rush_att', 'rush_yds',
       'rush_yds_per_att', 'rush_td', 'scrim_att', 'scrim_yds',
       'scrim_yds_per_att', 'scrim_td', 'Year'], dtype=object)

In [5]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
label = le.fit_transform(df['Position'])
label

array([14, 21,  5, ...,  5,  6,  5])

In [6]:
df.drop("Position", axis=1, inplace=True)
df["Position"] = label

In [7]:
df.drop("Stat URL", axis=1, inplace=True)

In [8]:
df.drop("Year", axis=1, inplace=True)

In [9]:
df.drop("Name", axis=1, inplace=True)

In [10]:
df.drop("College", axis=1, inplace=True)

In [11]:
df.drop("conf_abbr",axis=1,inplace=True)

In [12]:
df["Round"] = df["Round"].map(lambda x: 1/(1 + x))

In [13]:
df["Pick"] = df["Pick"].map(lambda x: 1/(1 + x))

In [14]:
df.columns.values

array(['Round', 'Pick', 'Height', 'Weight', '40 Yard Dash', 'Bench Press',
       'Vertical Jump', 'Broad Jump', '3 Cone Drill', 'Shuttle', 'games',
       'seasons', 'tackles_solo', 'tackles_assists', 'tackles_total',
       'tackles_loss', 'sacks', 'def_int', 'def_int_yds', 'def_int_td',
       'pass_defended', 'fumbles_rec', 'fumbles_rec_yds',
       'fumbles_rec_td', 'fumbles_forced', 'rec', 'rec_yds',
       'rec_yds_per_rec', 'rec_td', 'rush_att', 'rush_yds',
       'rush_yds_per_att', 'rush_td', 'scrim_att', 'scrim_yds',
       'scrim_yds_per_att', 'scrim_td', 'Position'], dtype=object)

In [15]:
df.head()

Unnamed: 0,Round,Pick,Height,Weight,40 Yard Dash,Bench Press,Vertical Jump,Broad Jump,3 Cone Drill,Shuttle,...,rec_td,rush_att,rush_yds,rush_yds_per_att,rush_td,scrim_att,scrim_yds,scrim_yds_per_att,scrim_td,Position
0,0.142857,0.004878,74.0,238.0,4.64,24.0,35.5,118.0,7.13,4.28,...,5.29,199.2,1282.58,8.83,14.91,239.71,1747.91,8.22,20.2,14
1,0.2,0.009524,71.0,179.0,4.51,14.59,36.0,123.0,7.09,4.12,...,8.5,4.0,69.5,11.65,0.0,96.0,1393.5,14.45,8.5,21
2,1.0,1.0,76.0,296.0,5.31,19.0,25.5,99.0,7.74,4.62,...,0.0,1.19,5.2,-0.68,0.36,1.36,5.55,0.86,0.36,5
3,0.2,0.009615,76.0,270.0,4.8,24.48,31.13,115.26,7.19,4.48,...,2.17,22.98,75.37,4.12,4.24,36.81,231.59,6.49,6.41,3
4,0.125,0.004115,73.0,210.0,4.58,17.0,34.0,118.0,7.02,4.25,...,1.68,374.69,2061.25,4.94,19.21,420.39,2397.36,6.43,20.89,19


In [16]:
corr = df.corr(numeric_only=True)
corr.style.background_gradient(cmap='YlOrRd')

Unnamed: 0,Round,Pick,Height,Weight,40 Yard Dash,Bench Press,Vertical Jump,Broad Jump,3 Cone Drill,Shuttle,games,seasons,tackles_solo,tackles_assists,tackles_total,tackles_loss,sacks,def_int,def_int_yds,def_int_td,pass_defended,fumbles_rec,fumbles_rec_yds,fumbles_rec_td,fumbles_forced,rec,rec_yds,rec_yds_per_rec,rec_td,rush_att,rush_yds,rush_yds_per_att,rush_td,scrim_att,scrim_yds,scrim_yds_per_att,scrim_td,Position
Round,1.0,0.97346,-0.067902,-0.089816,0.071431,-0.12666,-0.071442,-0.059875,0.009962,0.037007,-0.01503,0.05739,-0.003942,-0.001207,-0.002941,-0.069933,-0.089097,0.013928,0.015143,-0.02883,-0.00254,-0.001236,0.01175,0.019096,-0.03534,-4.2e-05,-0.00815,0.00769,-0.020165,0.020255,0.013859,0.000907,0.012226,0.019885,0.007428,0.013203,0.00012,0.056097
Pick,0.97346,1.0,-0.086651,-0.102752,0.087066,-0.139674,-0.095506,-0.083214,0.015757,0.045866,-0.000233,0.085602,0.007013,0.009414,0.008416,-0.080435,-0.105873,0.017399,0.019697,-0.034803,-0.00053,0.004147,0.008324,0.021208,-0.03698,-0.004505,-0.017712,0.001037,-0.033501,0.029026,0.020522,0.001354,0.018928,0.02725,0.007462,0.005485,-0.001219,0.06503
Height,-0.067902,-0.086651,1.0,0.699247,0.558811,0.394923,-0.384103,-0.38498,0.487619,0.508317,-0.417703,-0.338,-0.452117,-0.256673,-0.389967,0.031393,0.167998,-0.463762,-0.390957,-0.243251,-0.459519,-0.130372,-0.272103,-0.333306,-0.131597,-0.299976,-0.223828,-0.092864,-0.162954,-0.43601,-0.457795,-0.387078,-0.408228,-0.507112,-0.551998,-0.107983,-0.491123,-0.071286
Weight,-0.089816,-0.102752,0.699247,1.0,0.83513,0.724522,-0.618406,-0.692371,0.815447,0.796615,-0.585279,-0.502428,-0.646397,-0.357751,-0.553601,0.015711,0.160128,-0.666684,-0.546361,-0.313232,-0.683923,-0.192547,-0.338302,-0.480252,-0.250542,-0.529781,-0.470313,-0.409898,-0.415742,-0.351489,-0.375595,-0.54505,-0.321651,-0.481592,-0.628216,-0.451029,-0.551935,-0.230252
40 Yard Dash,0.071431,0.087066,0.558811,0.83513,1.0,0.543357,-0.728508,-0.796166,0.806633,0.796327,-0.576934,-0.484265,-0.605544,-0.360718,-0.529632,-0.107125,0.003934,-0.561873,-0.464616,-0.268083,-0.630203,-0.195788,-0.307407,-0.463535,-0.315134,-0.507031,-0.467349,-0.467491,-0.433114,-0.225776,-0.253686,-0.519812,-0.211159,-0.353199,-0.517258,-0.502087,-0.455843,-0.200489
Bench Press,-0.12666,-0.139674,0.394923,0.724522,0.543357,1.0,-0.355294,-0.435963,0.546699,0.517426,-0.3428,-0.291779,-0.408736,-0.153117,-0.318676,0.178636,0.272948,-0.531864,-0.444215,-0.23847,-0.54099,-0.062772,-0.247386,-0.357616,-0.106177,-0.477995,-0.449165,-0.355159,-0.400374,-0.205895,-0.221651,-0.401268,-0.173568,-0.324911,-0.476088,-0.439778,-0.399341,-0.255533
Vertical Jump,-0.071442,-0.095506,-0.384103,-0.618406,-0.728508,-0.355294,1.0,0.839557,-0.664664,-0.678232,0.429444,0.370855,0.467289,0.287886,0.412819,0.13561,0.054164,0.393409,0.313954,0.203457,0.443941,0.165857,0.2398,0.317573,0.242359,0.394166,0.369613,0.421861,0.352793,0.131486,0.157868,0.401426,0.119428,0.23104,0.37022,0.415671,0.319932,0.099815
Broad Jump,-0.059875,-0.083214,-0.38498,-0.692371,-0.796166,-0.435963,0.839557,1.0,-0.712417,-0.707721,0.461669,0.406217,0.504055,0.302136,0.441678,0.11881,0.03387,0.43925,0.354741,0.223921,0.482391,0.181993,0.256861,0.360371,0.264492,0.411189,0.391073,0.457939,0.373382,0.149045,0.1774,0.438899,0.139054,0.252817,0.40116,0.450016,0.351046,0.139791
3 Cone Drill,0.009962,0.015757,0.487619,0.815447,0.806633,0.546699,-0.664664,-0.712417,1.0,0.886175,-0.579829,-0.487497,-0.606618,-0.379425,-0.538292,-0.137536,-0.018151,-0.545576,-0.454954,-0.280018,-0.591332,-0.215714,-0.287863,-0.419702,-0.320371,-0.440511,-0.394674,-0.416523,-0.366651,-0.221846,-0.243712,-0.444977,-0.206296,-0.331492,-0.462865,-0.414531,-0.411994,-0.145119
Shuttle,0.037007,0.045866,0.508317,0.796615,0.796327,0.517426,-0.678232,-0.707721,0.886175,1.0,-0.569645,-0.467011,-0.602601,-0.383729,-0.537662,-0.139755,-0.017464,-0.536229,-0.439261,-0.270755,-0.581011,-0.220924,-0.293864,-0.413646,-0.314131,-0.414274,-0.367497,-0.387069,-0.338518,-0.226773,-0.250152,-0.444189,-0.210024,-0.329717,-0.452102,-0.377614,-0.399486,-0.118338


## Estimating the DAG using Structural Learning

In [17]:
from pgmpy.estimators import PC

In [18]:
est = PC(data=df)

In [19]:
# est.estimate(variant="stable", max_cond_vars=2, return_type="dag")