In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
files_dir = 'nfl-draft-data/'

profiles_file = 'nfl_draft_profiles.csv'
prospects_file = 'nfl_draft_prospects.csv'
stats_file = 'college_statistics.csv'

profiles_path = files_dir + profiles_file
prospects_path = files_dir + prospects_file
stats_path = files_dir + stats_file

In [3]:
profiles_df = pd.read_csv(profiles_path)
prospects_df = pd.read_csv(prospects_path)
stats_df = pd.read_csv(stats_path)

# Datasets

## Prospects

In [4]:
prospects_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13354 entries, 0 to 13353
Data columns (total 24 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   draft_year      13354 non-null  int64  
 1   player_id       13354 non-null  int64  
 2   player_name     13354 non-null  object 
 3   position        13045 non-null  object 
 4   pos_abbr        13045 non-null  object 
 5   school          13145 non-null  object 
 6   school_name     13127 non-null  object 
 7   school_abbr     12958 non-null  object 
 8   link            13354 non-null  object 
 9   pick            11788 non-null  float64
 10  overall         11788 non-null  float64
 11  round           11788 non-null  float64
 12  traded          11788 non-null  object 
 13  trade_note      4506 non-null   object 
 14  team            11788 non-null  object 
 15  team_abbr       11788 non-null  object 
 16  team_logo_espn  11754 non-null  object 
 17  guid            6172 non-null  

In [5]:
prospects_df.describe()

Unnamed: 0,draft_year,player_id,pick,overall,round,weight,height,pos_rk,ovr_rk,grade
count,13354.0,13354.0,11788.0,11788.0,11788.0,6112.0,6081.0,5989.0,5220.0,5995.0
mean,1998.189831,29884.278718,16.905921,113.918392,4.110875,244.736093,73.941169,15.862581,156.32567,57.78749
std,16.116808,28600.931243,10.123831,67.882416,2.004959,86.888394,2.684655,14.069466,96.825064,20.367434
min,1967.0,1.0,1.0,1.0,1.0,155.0,38.0,1.0,1.0,20.0
25%,1984.0,13684.25,8.0,56.0,2.0,205.0,72.0,6.0,73.0,39.0
50%,2001.0,20725.5,16.0,112.0,4.0,234.0,74.0,13.0,148.0,57.0
75%,2012.0,28963.75,24.0,167.0,6.0,285.0,76.0,22.0,235.0,74.0
max,2021.0,105496.0,54.0,262.0,7.0,6022.0,81.625,151.0,373.0,99.0


## Profiles

In [6]:
profiles_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12905 entries, 0 to 12904
Data columns (total 21 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   player_id      12905 non-null  int64  
 1   guid           5949 non-null   object 
 2   alt_player_id  5882 non-null   float64
 3   player_name    12905 non-null  object 
 4   position       12905 non-null  object 
 5   pos_abbr       12905 non-null  object 
 6   weight         12905 non-null  int64  
 7   height         12905 non-null  float64
 8   player_image   1506 non-null   object 
 9   link           12905 non-null  object 
 10  school_logo    12485 non-null  object 
 11  school         12905 non-null  object 
 12  school_abbr    12867 non-null  object 
 13  school_name    12895 non-null  object 
 14  pos_rk         5761 non-null   float64
 15  ovr_rk         5100 non-null   float64
 16  grade          5767 non-null   float64
 17  text1          2992 non-null   object 
 18  text2 

In [7]:
profiles_df.describe()

Unnamed: 0,player_id,alt_player_id,weight,height,pos_rk,ovr_rk,grade
count,12905.0,5882.0,12905.0,12905.0,5761.0,5100.0,5767.0
mean,30054.561875,2715410.0,124.214878,33.460672,15.453394,155.609608,58.343333
std,28767.447874,780809.8,136.383997,36.825631,13.460563,97.114488,20.321728
min,1.0,13029.0,0.0,0.0,1.0,1.0,20.0
25%,13980.0,2158407.0,0.0,0.0,6.0,72.0,40.0
50%,20691.0,2470314.0,178.0,0.0,13.0,147.0,58.0
75%,29007.0,3084650.0,235.0,73.625,21.0,234.25,75.0
max,105496.0,4820589.0,6022.0,81.625,151.0,373.0,99.0


# Removing cols

In [8]:
cols_remove_prospects = ['draft_year','position','school_name','school_abbr', 'link', 'round', 'trade_note', 'team', 'team_logo_espn', 'guid', 'player_image']
cols_prospects = list(prospects_df.columns)

for i in cols_remove_prospects:
    cols_prospects.remove(i)
print(cols_prospects)

df_prospects = prospects_df[cols_prospects].copy()

df_prospects

['player_id', 'player_name', 'pos_abbr', 'school', 'pick', 'overall', 'traded', 'team_abbr', 'weight', 'height', 'pos_rk', 'ovr_rk', 'grade']


Unnamed: 0,player_id,player_name,pos_abbr,school,pick,overall,traded,team_abbr,weight,height,pos_rk,ovr_rk,grade
0,23590,Bubba Smith,DE,Michigan State,1.0,1.0,False,IND,,,,,
1,23591,Clinton Jones,RB,Michigan State,2.0,2.0,False,MIN,,,,,
2,23592,Steve Spurrier,QB,Florida,3.0,3.0,False,SF,,,,,
3,23593,Bob Griese,QB,Purdue,4.0,4.0,False,MIA,,,,,
4,23594,George Webster,LB,Michigan State,5.0,5.0,False,TEN,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
13349,105466,Justus Reed,DE,Virginia Tech,,,,,253.0,75.000,31.0,344.0,30.0
13350,105467,K.J. Costello,QB,Mississippi State,,,,,227.0,76.625,15.0,345.0,30.0
13351,105468,Donovan Stiner,S,Florida,,,,,205.0,73.500,29.0,346.0,30.0
13352,105478,Mac McCain III,CB,North Carolina A&T,,,,,186.0,71.000,44.0,348.0,30.0


In [9]:
df_clear = df_prospects.dropna().copy()
df_clear

Unnamed: 0,player_id,player_name,pos_abbr,school,pick,overall,traded,team_abbr,weight,height,pos_rk,ovr_rk,grade
7202,7841,Eli Manning,QB,Ole Miss,1.0,1.0,False,SD,221.0,77.0,2.0,5.0,98.0
7203,7747,Robert Gallery,OT,Iowa,2.0,2.0,False,OAK,323.0,79.0,1.0,2.0,99.0
7204,7993,Larry Fitzgerald,WR,Pittsburgh,3.0,3.0,False,ARI,221.0,75.0,1.0,1.0,99.0
7205,7842,Philip Rivers,QB,North Carolina State,4.0,4.0,False,NYG,224.0,77.0,3.0,14.0,95.0
7206,7405,Sean Taylor,DB,Miami,5.0,5.0,False,WSH,228.0,75.0,1.0,3.0,99.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
13241,105459,Pressley Harvin III,P,Georgia Tech,27.0,254.0,True,PIT,263.0,71.0,4.0,337.0,30.0
13243,104931,Kylin Hill,RB,Mississippi State,29.0,256.0,False,GB,214.0,71.0,16.0,219.0,44.0
13244,105325,Jermar Jefferson,RB,Oregon State,30.0,257.0,True,DET,206.0,70.0,10.0,163.0,59.0
13245,105318,Dax Milne,WR,BYU,31.0,258.0,True,WSH,193.0,73.0,44.0,279.0,34.0


In [10]:
df_clear['team_abbr'].value_counts()

MIN    136
BAL    132
CIN    131
CLE    130
SF     127
GB     121
TEN    120
PHI    119
DAL    116
DEN    116
BUF    112
TB     111
NE     110
SEA    109
IND    109
PIT    109
DET    109
ARI    109
MIA    108
CAR    108
HOU    107
NYJ    106
JAX    106
WSH    106
KC     106
NYG    106
ATL    102
CHI     98
OAK     96
NO      90
STL     76
SD      68
LAR     42
LAC     34
LV      13
Name: team_abbr, dtype: int64

Teams that changed cities since 2004

STL -> LAR

SD -> LAC

OAK -> LV

In [11]:
df_clear.replace(to_replace = 'STL', value = 'LAR', inplace=True)
df_clear.replace(to_replace = 'SD', value = 'LAC', inplace=True)
df_clear.replace(to_replace = 'OAK', value = 'LV', inplace=True)

In [12]:
df_clear['team_abbr'].value_counts()

MIN    136
BAL    132
CIN    131
CLE    130
SF     127
GB     121
TEN    120
PHI    119
LAR    118
DAL    116
DEN    116
BUF    112
TB     111
NE     110
SEA    109
IND    109
PIT    109
LV     109
ARI    109
DET    109
MIA    108
CAR    108
HOU    107
WSH    106
NYG    106
KC     106
NYJ    106
JAX    106
ATL    102
LAC    102
CHI     98
NO      90
Name: team_abbr, dtype: int64

In [13]:
features = list(df_clear.columns)
features.remove('team_abbr')

# Creating network

In [21]:
path_save = "nfl.net.gz"
path_graph = "nfl_graph.net.gz"

In [15]:
import netpixi
from netpixi.integration.gt import *
from regression.integration.gt import *



In [16]:
cols = list(df_clear.columns)
team_abbr_index = cols.index("team_abbr")
player_id_index = cols.index("player_id")

In [17]:
g = Graph(directed=False)

In [18]:
for i in features:
    g.add_vp(i)

In [19]:
for i in df_clear.values:
    g.add_vertex(i[player_id_index])
    try:
        g.add_vertex(i[team_abbr_index])
    except:
        pass
    g.add_edge(i[player_id_index], i[team_abbr_index])

In [23]:
gt_save(g, path_save)

In [None]:
g = gt_load(path_save)
m = gt_draw.sfdp_layout(g)
gt_move(g, m)
gt_save(g, path_graph)

In [30]:
r = netpixi.render(path_graph)

In [31]:
# mudar a cor de fundo do grafo
r.graph(color = 0x000002, kscale = 2)

# mudar a cor e o tamanho dos vértices
r.vertex_default(color = 0x03FF07, size = 4)

# configurando arestas
r.edge_default(color = 0x48FA22, width = 0.3, curve1 = 1, curve2 = 2)