<a href="https://colab.research.google.com/github/rafabandoni/nfl-predict/blob/main/notebooks/00_nfl_predict_class.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd

In [2]:
pd.set_option('display.max_columns', None)

# 00. Loading and Cleaning Data

## Creating dataframes

In [3]:
PATH = 'https://raw.githubusercontent.com/rafabandoni/nfl-predict/refs/heads/main/data/input/'

In [4]:
score_historical = pd.read_csv(PATH + 'spreadspoke_scores.csv')
score_historical.head()

Unnamed: 0,schedule_date,schedule_season,schedule_week,schedule_playoff,team_home,score_home,score_away,team_away,team_favorite_id,spread_favorite,over_under_line,stadium,stadium_neutral,weather_temperature,weather_wind_mph,weather_humidity,weather_detail
0,9/2/1966,1966,1,False,Miami Dolphins,14,23,Oakland Raiders,,,,Orange Bowl,False,83.0,6.0,71.0,
1,9/3/1966,1966,1,False,Houston Oilers,45,7,Denver Broncos,,,,Rice Stadium,False,81.0,7.0,70.0,
2,9/4/1966,1966,1,False,San Diego Chargers,27,7,Buffalo Bills,,,,Balboa Stadium,False,70.0,7.0,82.0,
3,9/9/1966,1966,2,False,Miami Dolphins,14,19,New York Jets,,,,Orange Bowl,False,82.0,11.0,78.0,
4,9/10/1966,1966,1,False,Green Bay Packers,24,3,Baltimore Colts,,,,Lambeau Field,False,64.0,8.0,62.0,


In [5]:
# Iter over a list so we don't need to call each df individually
data_list = [
  'defense_downs',
  'defense_fumbles',
  'defense_interceptions',
  'defense_passing',
  'defense_receiving',
  'defense_rushing',
  'defense_scoring',
  'defense_tackles',
  'offense_downs',
  'offense_passing',
  'offense_receiving',
  'offense_rushing',
  'offense_scoring',
  'special-teams_field-goals',
  'special-teams_kickoff-returns',
  'special-teams_kickoffs',
  'special-teams_punt-returns',
  'special-teams_punting',
  'special-teams_scoring',
]

In [6]:
dataframe_dict = {}
for item in data_list:
  data = pd.read_csv(PATH + f'{item}.csv')
  dataframe_dict[item] = data

In [7]:
# Unpack dict into each of the dataframe variables
(
  defense_downs,
  defense_fumbles,
  defense_interceptions,
  defense_passing,
  defense_receiving,
  defense_rushing,
  defense_scoring,
  defense_tackles,
  offense_downs,
  offense_passing,
  offense_receiving,
  offense_rushing,
  offense_scoring,
  special_teams_field_goals,
  special_teams_kickoff_returns,
  special_teams_kickoffs,
  special_teams_punt_returns,
  special_teams_punting,
  special_teams_scoring
) = tuple(dataframe_dict.values())

In [8]:
# Test
offense_rushing.head()

Unnamed: 0,Team,Att,Rush Yds,YPC,TD,20+,40+,Lng,Rush 1st,Rush 1st%,Rush FUM,year
0,Chiefs,556,2627,4.7,13,15,4,80T,129,23.2,7,2010
1,Jets,534,2374,4.4,14,11,2,53,118,22.1,11,2010
2,Jaguars,512,2395,4.7,14,14,1,74,148,28.9,8,2010
3,Raiders,504,2494,5.0,19,27,6,71,113,22.4,9,2010
4,Falcons,497,1891,3.8,14,12,1,55,111,22.3,4,2010


## Cleaning data

### Joining dataframes

So we can work with all as one.

Let's start joining and treating all defense data.

In order to do better code and avoid repetitions, let's create a function!

**Note**: In class, let's first create the code and then the function

In [9]:
def get_df_name(df):
  name =[x for x in globals() if globals()[x] is df][0]
  return name

def change_dataframes_columns(dataframes_list):
  keep_names = ['year', 'Team']
  for df in dataframes_list:
    df_name = get_df_name(df)
    new_columns = []
    for column in df.columns:
      if not column in (keep_names):
        new_name = f'{df_name}_{column}'
      else:
        new_name = column
      new_columns.append(new_name)
    df.columns = new_columns

def merge_dataframes(dataframes_list):
  new_dataframe = dataframes_list[0].copy()
  for df in dataframes_list[1:]:
    new_dataframe = new_dataframe.merge(df,
                                        on=['year', 'Team'],
                                        how='left')
  return new_dataframe

In [10]:
defense_dfs = [
  defense_downs,
  defense_fumbles,
  defense_interceptions,
  defense_passing,
  defense_receiving,
  defense_rushing,
  defense_scoring,
  defense_tackles,
]

change_dataframes_columns(defense_dfs)
defense_dataframe = merge_dataframes(defense_dfs)

defense_dataframe.head()

Unnamed: 0,Team,defense_downs_3rd Att,defense_downs_3rd Md,defense_downs_4th Att,defense_downs_4th Md,defense_downs_Rec 1st,defense_downs_Rec 1st%,defense_downs_Rush 1st,defense_downs_Rush 1st%,defense_downs_Scrm Plys,year,defense_fumbles_FF,defense_fumbles_FR,defense_fumbles_FR TD,defense_fumbles_Rec FUM,defense_fumbles_Rush FUM,defense_interceptions_INT,defense_interceptions_INT TD,defense_interceptions_INT Yds,defense_interceptions_Lng,defense_passing_Att,defense_passing_Cmp,defense_passing_Cmp %,defense_passing_Yds/Att,defense_passing_Yds,defense_passing_TD,defense_passing_INT,defense_passing_Rate,defense_passing_1st,defense_passing_1st%,defense_passing_20+,defense_passing_40+,defense_passing_Lng,defense_passing_Sck,defense_receiving_Rec,defense_receiving_Yds,defense_receiving_Yds/Rec,defense_receiving_TD,defense_receiving_20+,defense_receiving_40+,defense_receiving_Lng,defense_receiving_Rec 1st,defense_receiving_Rec 1st%,defense_receiving_Rec FUM,defense_receiving_PDef,defense_rushing_Att,defense_rushing_Rush Yds,defense_rushing_YPC,defense_rushing_TD,defense_rushing_20+,defense_rushing_40+,defense_rushing_Lng,defense_rushing_Rush 1st,defense_rushing_Rush 1st%,defense_rushing_Rush FUM,defense_scoring_FR TD,defense_scoring_SFTY,defense_scoring_INT TD,defense_tackles_Sck,defense_tackles_Comb,defense_tackles_Asst,defense_tackles_Solo
0,Lions,198,77,12,5,187,56.7,98,22.1,1005,2010,21,13,1,8,8,14,2,194,42,518,330,63.7,7.3,3786,23,14,89.2,187,36.1,43,9,89T,44,330,3786,11.5,23,43,9,89T,187,56.7,8,66,443,1999,4.5,18,11,3,80T,98,22.1,8,1,1,2,44,949,246,743
1,Falcons,201,79,17,9,183,50.3,87,23.8,957,2010,14,9,1,4,8,22,2,337,40,560,364,65.0,6.9,3846,23,22,82.2,183,32.7,38,6,83T,31,364,3846,10.6,23,38,6,83T,183,50.3,4,71,366,1694,4.6,9,10,5,80T,87,23.8,8,1,0,2,31,873,179,686
2,Rams,221,74,12,6,191,58.2,88,21.8,1017,2010,23,12,0,5,5,14,0,161,34,570,328,57.5,6.8,3868,21,14,80.4,191,33.5,51,5,65,43,328,3868,11.8,21,51,5,65,191,58.2,5,81,404,1810,4.5,7,15,1,80,88,21.8,5,0,0,0,43,861,127,770
3,Jets,219,81,6,4,169,62.8,70,17.2,979,2010,17,17,0,2,12,12,3,214,66T,531,269,50.7,6.5,3454,24,12,77.0,169,31.8,48,5,50,40,269,3454,12.8,24,48,5,50,169,62.8,2,87,408,1454,3.6,11,8,0,32,70,17.2,12,0,0,3,40,835,181,633
4,Dolphins,226,84,22,8,166,57.6,84,18.8,988,2010,13,8,1,1,4,11,0,66,21,502,288,57.4,7.1,3573,22,11,85.0,166,33.1,50,11,86T,39,288,3573,12.4,22,50,11,86T,166,57.6,1,82,447,1601,3.6,8,4,0,30,84,18.8,4,1,1,0,39,869,156,739


In [11]:
offense_dfs = [
  offense_downs,
  offense_passing,
  offense_receiving,
  offense_rushing,
  offense_scoring,
]

change_dataframes_columns(offense_dfs)
offense_dataframe = merge_dataframes(offense_dfs)

offense_dataframe.head()

Unnamed: 0,Team,offense_downs_3rd Att,offense_downs_3rd Md,offense_downs_4th Att,offense_downs_4th Md,offense_downs_Rec 1st,offense_downs_Rec 1st%,offense_downs_Rush 1st,offense_downs_Rush 1st%,offense_downs_Scrm Plys,year,offense_passing_Att,offense_passing_Cmp,offense_passing_Cmp %,offense_passing_Yds/Att,offense_passing_Pass Yds,offense_passing_TD,offense_passing_INT,offense_passing_Rate,offense_passing_1st,offense_passing_1st%,offense_passing_20+,offense_passing_40+,offense_passing_Lng,offense_passing_Sck,offense_passing_SckY,offense_receiving_Rec,offense_receiving_Yds,offense_receiving_Yds/Rec,offense_receiving_TD,offense_receiving_20+,offense_receiving_40+,offense_receiving_Lng,offense_receiving_Rec 1st,offense_receiving_Rec 1st%,offense_receiving_Rec FUM,offense_rushing_Att,offense_rushing_Rush Yds,offense_rushing_YPC,offense_rushing_TD,offense_rushing_20+,offense_rushing_40+,offense_rushing_Lng,offense_rushing_Rush 1st,offense_rushing_Rush 1st%,offense_rushing_Rush FUM,offense_scoring_Rsh TD,offense_scoring_Rec TD,offense_scoring_Tot TD,offense_scoring_2-PT
0,Lions,242,97,16,10,199,52.0,83,20.5,1064,2010,633,383,60.5,6.3,4001,26,16,82.0,199,31.4,43,6,87,27,191,383,4001,10.4,26,43,6,87,199,52.0,4,404,1613,4.0,11,9,2,45,83,20.5,6,11,26,41,2
1,Falcons,240,112,15,11,200,55.4,111,22.3,1097,2010,577,361,62.6,6.5,3725,28,9,90.8,200,34.7,32,6,46,23,158,361,3725,10.3,28,32,6,46T,200,55.4,3,497,1891,3.8,14,12,1,55,111,22.3,4,14,28,47,2
2,Rams,235,78,15,8,179,50.6,84,19.6,1053,2010,590,354,60.0,6.0,3512,18,15,76.5,179,30.3,36,4,49,34,244,354,3512,9.9,18,36,4,49,179,50.6,2,429,1578,3.7,9,9,1,42T,84,19.6,2,9,18,27,0
3,Jets,235,93,14,5,171,59.4,118,22.1,1087,2010,525,288,54.9,6.5,3420,20,14,76.5,171,32.6,44,11,74,28,178,288,3420,11.9,20,44,11,74T,171,59.4,3,534,2374,4.4,14,11,2,53,118,22.1,11,14,20,39,1
4,Dolphins,230,92,10,3,189,56.4,91,20.4,1040,2010,557,335,60.1,6.7,3755,17,21,74.8,189,33.9,44,4,57T,38,228,335,3755,11.2,17,44,4,57,189,56.4,3,445,1643,3.7,8,5,2,51,91,20.4,12,8,17,26,0


In [12]:
special_teams_dfs = [
  special_teams_field_goals,
  special_teams_kickoff_returns,
  special_teams_kickoffs,
  special_teams_punt_returns,
  special_teams_punting,
  special_teams_scoring
]

change_dataframes_columns(special_teams_dfs)
special_teams_dataframe = merge_dataframes(special_teams_dfs)

special_teams_dataframe.head()

Unnamed: 0,Team,special_teams_field_goals_FGM,special_teams_field_goals_Att,special_teams_field_goals_FG %,special_teams_field_goals_1-19 > A-M,special_teams_field_goals_20-29 > A-M,special_teams_field_goals_30-39 > A-M,special_teams_field_goals_40-49 > A-M,special_teams_field_goals_50-59 > A-M,special_teams_field_goals_60+ > A-M,special_teams_field_goals_Lng,special_teams_field_goals_FG Blk,year,special_teams_kickoff_returns_Avg,special_teams_kickoff_returns_Ret,special_teams_kickoff_returns_Yds,special_teams_kickoff_returns_KRet TD,special_teams_kickoff_returns_20+,special_teams_kickoff_returns_40+,special_teams_kickoff_returns_Lng,special_teams_kickoff_returns_FC,special_teams_kickoff_returns_FUM,special_teams_kickoff_returns_FG Blk,special_teams_kickoff_returns_XP Blk,special_teams_kickoffs_KO,special_teams_kickoffs_Yds,special_teams_kickoffs_TB,special_teams_kickoffs_TB %,special_teams_kickoffs_Ret,special_teams_kickoffs_Ret Avg,special_teams_kickoffs_OSK,special_teams_kickoffs_OSK Rec,special_teams_kickoffs_OOB,special_teams_kickoffs_TD,special_teams_punt_returns_Avg,special_teams_punt_returns_Ret,special_teams_punt_returns_Yds,special_teams_punt_returns_PRet T,special_teams_punt_returns_20+,special_teams_punt_returns_40+,special_teams_punt_returns_Lng,special_teams_punt_returns_FC,special_teams_punt_returns_FUM,special_teams_punt_returns_P Blk,special_teams_punting_Att,special_teams_punting_Cmp,special_teams_punting_Cmp %,special_teams_punting_Yds/Att,special_teams_punting_Pass Yds,special_teams_punting_TD,special_teams_punting_INT,special_teams_punting_Rate,special_teams_punting_1st,special_teams_punting_1st%,special_teams_punting_20+,special_teams_punting_40+,special_teams_punting_Lng,special_teams_punting_Sck,special_teams_punting_SckY,data_FGM,data_FG %,data_XPM,data_XP Pct,data_KRet TD,data_PRet T
0,Rams,33,39,84.6,0_0,12_11,14_12,9_7,4_3,0_0,53,2,2010,21.8,74,1614,0,47,1,84,0,2,1,0,73,4866,5,6.8,65,23.7,2,0,2,1,11.3,40,452,0,7,2,42,19,0,1,590,354,60.0,6.0,3512,18,15,76.5,179,30.3,36,4,49,34,244,33,84.6,26,96.3,0,0
1,Raiders,33,41,80.5,0_0,8_8,14_13,12_8,7_4,0_0,59,0,2010,22.5,68,1532,3,33,4,101T,0,4,0,0,93,6091,29,31.2,61,23.2,1,0,1,1,7.0,55,386,0,3,2,53,11,2,2,491,279,56.8,7.1,3471,18,16,77.5,159,32.4,45,12,73T,44,291,33,80.5,43,100.0,3,0
2,Eagles,32,38,84.2,0_0,12_12,12_10,11_9,3_1,0_0,50,1,2010,20.5,62,1272,0,34,2,46,1,3,0,0,96,6214,23,24.0,69,21.8,2,1,2,0,10.6,38,404,1,4,2,65T,19,3,0,561,348,62.0,7.5,4215,28,13,92.1,189,33.7,61,15,91,49,309,32,84.2,47,100.0,0,1
3,Dolphins,30,41,73.2,1_1,9_9,5_5,18_11,6_3,2_1,60,2,2010,21.4,56,1199,0,30,2,46,1,1,1,0,70,4603,14,20.0,53,24.6,0,0,3,2,10.5,28,293,0,2,1,47,22,4,0,557,335,60.1,6.7,3755,17,21,74.8,189,33.9,44,4,57T,38,228,30,73.2,25,100.0,0,0
4,Jets,30,39,76.9,1_1,11_10,16_14,6_3,4_2,1_0,56,1,2010,25.2,63,1588,2,35,6,97T,0,0,0,0,81,5025,7,8.6,73,19.6,0,0,1,0,9.4,56,529,0,5,0,32,25,3,1,525,288,54.9,6.5,3420,20,14,76.5,171,32.6,44,11,74,28,178,30,76.9,37,100.0,2,0


## Cleaning data

Now that we already have 3 main datasets, we can start cleaning data! Cleaning data goes through some process, as such:
1. Check null values
2. Drop unused columns
3. Rename columns and/or values if needed
4. Check overall data

In [13]:
# Cleaning % data
def clean_percent_data(df):
  for column in df.columns:
    if '%' in column:
      df[column] = df[column] / 100

In [14]:
# Remove special characters from columns
def remove_special_char_columns(df):
  for column in df.columns:
    new_name = column.lower().replace(' ','_').replace('%', '_perc')
    df.rename(columns={
        column : new_name
    }, inplace=True)

In [15]:
# Transform turnover in negative data
def negative_turnover_number(turnover_columns, df):
  for column in turnover_columns:
    new_column = []
    for item in df[column]:
      if 'T' in item:
        new_value = item.replace('T', '')
        new_value = int(new_value)
        new_value = new_value * -1
      else:
        new_value = int(item)
      new_column.append(new_value)
    df[column] = new_column

### Cleaning defense data

In [16]:
clean_percent_data(defense_dataframe)

In [17]:
remove_special_char_columns(defense_dataframe)

In [18]:
turnover_columns = [
    'defense_passing_lng',
    'defense_receiving_lng',
    'defense_rushing_lng',
    'defense_interceptions_lng'
]

negative_turnover_number(turnover_columns, defense_dataframe)

In [19]:
defense_dataframe.head()

Unnamed: 0,team,defense_downs_3rd_att,defense_downs_3rd_md,defense_downs_4th_att,defense_downs_4th_md,defense_downs_rec_1st,defense_downs_rec_1st_perc,defense_downs_rush_1st,defense_downs_rush_1st_perc,defense_downs_scrm_plys,year,defense_fumbles_ff,defense_fumbles_fr,defense_fumbles_fr_td,defense_fumbles_rec_fum,defense_fumbles_rush_fum,defense_interceptions_int,defense_interceptions_int_td,defense_interceptions_int_yds,defense_interceptions_lng,defense_passing_att,defense_passing_cmp,defense_passing_cmp__perc,defense_passing_yds/att,defense_passing_yds,defense_passing_td,defense_passing_int,defense_passing_rate,defense_passing_1st,defense_passing_1st_perc,defense_passing_20+,defense_passing_40+,defense_passing_lng,defense_passing_sck,defense_receiving_rec,defense_receiving_yds,defense_receiving_yds/rec,defense_receiving_td,defense_receiving_20+,defense_receiving_40+,defense_receiving_lng,defense_receiving_rec_1st,defense_receiving_rec_1st_perc,defense_receiving_rec_fum,defense_receiving_pdef,defense_rushing_att,defense_rushing_rush_yds,defense_rushing_ypc,defense_rushing_td,defense_rushing_20+,defense_rushing_40+,defense_rushing_lng,defense_rushing_rush_1st,defense_rushing_rush_1st_perc,defense_rushing_rush_fum,defense_scoring_fr_td,defense_scoring_sfty,defense_scoring_int_td,defense_tackles_sck,defense_tackles_comb,defense_tackles_asst,defense_tackles_solo
0,Lions,198,77,12,5,187,0.567,98,0.221,1005,2010,21,13,1,8,8,14,2,194,42,518,330,0.637,7.3,3786,23,14,89.2,187,0.361,43,9,-89,44,330,3786,11.5,23,43,9,-89,187,0.567,8,66,443,1999,4.5,18,11,3,-80,98,0.221,8,1,1,2,44,949,246,743
1,Falcons,201,79,17,9,183,0.503,87,0.238,957,2010,14,9,1,4,8,22,2,337,40,560,364,0.65,6.9,3846,23,22,82.2,183,0.327,38,6,-83,31,364,3846,10.6,23,38,6,-83,183,0.503,4,71,366,1694,4.6,9,10,5,-80,87,0.238,8,1,0,2,31,873,179,686
2,Rams,221,74,12,6,191,0.582,88,0.218,1017,2010,23,12,0,5,5,14,0,161,34,570,328,0.575,6.8,3868,21,14,80.4,191,0.335,51,5,65,43,328,3868,11.8,21,51,5,65,191,0.582,5,81,404,1810,4.5,7,15,1,80,88,0.218,5,0,0,0,43,861,127,770
3,Jets,219,81,6,4,169,0.628,70,0.172,979,2010,17,17,0,2,12,12,3,214,-66,531,269,0.507,6.5,3454,24,12,77.0,169,0.318,48,5,50,40,269,3454,12.8,24,48,5,50,169,0.628,2,87,408,1454,3.6,11,8,0,32,70,0.172,12,0,0,3,40,835,181,633
4,Dolphins,226,84,22,8,166,0.576,84,0.188,988,2010,13,8,1,1,4,11,0,66,21,502,288,0.574,7.1,3573,22,11,85.0,166,0.331,50,11,-86,39,288,3573,12.4,22,50,11,-86,166,0.576,1,82,447,1601,3.6,8,4,0,30,84,0.188,4,1,1,0,39,869,156,739


In [20]:
defense_dataframe.describe()

Unnamed: 0,defense_downs_3rd_att,defense_downs_3rd_md,defense_downs_4th_att,defense_downs_4th_md,defense_downs_rec_1st,defense_downs_rec_1st_perc,defense_downs_rush_1st,defense_downs_rush_1st_perc,defense_downs_scrm_plys,year,defense_fumbles_ff,defense_fumbles_fr,defense_fumbles_fr_td,defense_fumbles_rec_fum,defense_fumbles_rush_fum,defense_interceptions_int,defense_interceptions_int_td,defense_interceptions_int_yds,defense_interceptions_lng,defense_passing_att,defense_passing_cmp,defense_passing_cmp__perc,defense_passing_yds/att,defense_passing_yds,defense_passing_td,defense_passing_int,defense_passing_rate,defense_passing_1st,defense_passing_1st_perc,defense_passing_20+,defense_passing_40+,defense_passing_lng,defense_passing_sck,defense_receiving_rec,defense_receiving_yds,defense_receiving_yds/rec,defense_receiving_td,defense_receiving_20+,defense_receiving_40+,defense_receiving_lng,defense_receiving_rec_1st,defense_receiving_rec_1st_perc,defense_receiving_rec_fum,defense_receiving_pdef,defense_rushing_att,defense_rushing_rush_yds,defense_rushing_ypc,defense_rushing_td,defense_rushing_20+,defense_rushing_40+,defense_rushing_lng,defense_rushing_rush_1st,defense_rushing_rush_1st_perc,defense_rushing_rush_fum,defense_scoring_fr_td,defense_scoring_sfty,defense_scoring_int_td,defense_tackles_sck,defense_tackles_comb,defense_tackles_asst,defense_tackles_solo
count,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0
mean,212.808333,83.325,17.945833,9.10625,193.245833,0.547454,101.2875,0.231708,1036.495833,2017.0,13.80625,9.079167,0.75,3.91875,6.58125,13.785417,1.41875,186.779167,15.91875,560.441667,353.564583,0.630667,7.149583,4003.414583,24.79375,13.785417,88.989375,193.233333,0.344942,50.491667,8.785417,-37.110417,38.439583,353.564583,4003.414583,11.352708,24.79375,50.470833,8.777083,-37.13125,193.245833,0.547454,3.91875,72.44375,435.29375,1859.8125,4.259375,13.685417,10.833333,2.066667,-3.189583,101.2875,0.231708,6.58125,0.75,0.439583,1.41875,38.439583,974.177083,293.533333,684.752083
std,12.94362,9.908907,6.588183,4.146982,20.256136,0.037961,18.750216,0.028602,48.749027,4.325001,4.150353,3.152025,0.888338,2.155651,2.750149,4.467675,1.281661,95.029416,54.291563,41.345137,34.177909,0.036519,0.548522,386.251183,5.336013,4.467675,8.600155,20.252574,0.027862,8.25051,3.155966,61.863226,8.23763,34.177909,386.251183,0.794128,5.336013,8.248438,3.147438,61.829767,20.256136,0.037961,2.155651,12.609146,45.232306,301.60806,0.409485,4.731959,4.281013,1.652395,60.011616,18.750216,0.028602,2.750149,0.888338,0.681163,1.281661,8.23763,83.856896,79.167926,50.491065
min,176.0,47.0,4.0,0.0,136.0,0.443,53.0,0.15,921.0,2010.0,4.0,2.0,0.0,0.0,1.0,2.0,0.0,-2.0,-99.0,454.0,249.0,0.507,5.6,2981.0,11.0,2.0,62.8,136.0,0.254,30.0,0.0,-99.0,13.0,249.0,2981.0,9.6,11.0,30.0,0.0,-99.0,136.0,0.443,0.0,39.0,332.0,1004.0,3.0,3.0,1.0,0.0,-99.0,53.0,0.15,1.0,0.0,0.0,0.0,13.0,811.0,109.0,570.0
25%,204.0,76.75,13.0,6.0,180.75,0.52175,88.0,0.212,999.0,2013.0,11.0,7.0,0.0,2.0,5.0,11.0,0.0,114.0,-39.0,534.0,329.75,0.60675,6.8,3767.75,21.0,11.0,83.1,180.75,0.326,45.0,7.0,-78.0,33.0,329.75,3767.75,10.8,21.0,45.0,7.0,-78.0,180.75,0.52175,2.0,64.0,405.0,1662.5,4.0,10.0,8.0,1.0,-65.0,88.0,0.212,5.0,0.0,0.0,0.0,33.0,912.0,234.0,648.0
50%,213.0,83.0,17.0,9.0,192.0,0.547,100.0,0.233,1033.0,2017.0,14.0,9.0,1.0,4.0,6.0,13.0,1.0,174.5,33.0,557.5,351.5,0.634,7.1,4000.5,24.5,13.0,89.25,192.0,0.344,50.0,9.0,-70.0,39.0,351.5,4000.5,11.3,24.5,50.0,9.0,-70.0,192.0,0.547,4.0,72.0,433.0,1841.0,4.3,13.0,10.0,2.0,32.0,100.0,0.233,6.0,1.0,0.0,1.0,39.0,962.5,291.0,678.5
75%,222.0,89.25,22.0,12.0,205.25,0.573,112.25,0.251,1073.25,2021.0,16.0,11.0,1.0,5.0,8.0,17.0,2.0,247.0,55.0,590.0,375.0,0.656,7.5,4245.25,29.0,17.0,95.025,205.25,0.363,56.0,11.0,50.0,44.0,375.0,4245.25,11.9,29.0,56.0,11.0,50.0,205.25,0.573,5.0,81.0,465.25,2051.0,4.5,17.0,13.0,3.0,51.0,112.25,0.251,8.0,1.0,1.0,2.0,44.0,1032.25,353.0,717.25
max,250.0,112.0,41.0,22.0,253.0,0.661,179.0,0.304,1201.0,2024.0,34.0,22.0,5.0,11.0,19.0,31.0,8.0,496.0,104.0,680.0,450.0,0.727,8.7,4988.0,45.0,31.0,116.2,253.0,0.454,79.0,18.0,95.0,70.0,450.0,4988.0,13.4,45.0,79.0,18.0,95.0,253.0,0.661,11.0,116.0,592.0,3057.0,5.4,31.0,28.0,8.0,84.0,179.0,0.304,19.0,5.0,3.0,8.0,70.0,1261.0,531.0,850.0


### Cleaning offense data

In [21]:
clean_percent_data(offense_dataframe)

In [22]:
remove_special_char_columns(offense_dataframe)

In [23]:
turnover_columns = [
    'offense_passing_lng',
    'offense_receiving_lng',
    'offense_rushing_lng'
]

negative_turnover_number(turnover_columns, offense_dataframe)

In [24]:
offense_dataframe.head()

Unnamed: 0,team,offense_downs_3rd_att,offense_downs_3rd_md,offense_downs_4th_att,offense_downs_4th_md,offense_downs_rec_1st,offense_downs_rec_1st_perc,offense_downs_rush_1st,offense_downs_rush_1st_perc,offense_downs_scrm_plys,year,offense_passing_att,offense_passing_cmp,offense_passing_cmp__perc,offense_passing_yds/att,offense_passing_pass_yds,offense_passing_td,offense_passing_int,offense_passing_rate,offense_passing_1st,offense_passing_1st_perc,offense_passing_20+,offense_passing_40+,offense_passing_lng,offense_passing_sck,offense_passing_scky,offense_receiving_rec,offense_receiving_yds,offense_receiving_yds/rec,offense_receiving_td,offense_receiving_20+,offense_receiving_40+,offense_receiving_lng,offense_receiving_rec_1st,offense_receiving_rec_1st_perc,offense_receiving_rec_fum,offense_rushing_att,offense_rushing_rush_yds,offense_rushing_ypc,offense_rushing_td,offense_rushing_20+,offense_rushing_40+,offense_rushing_lng,offense_rushing_rush_1st,offense_rushing_rush_1st_perc,offense_rushing_rush_fum,offense_scoring_rsh_td,offense_scoring_rec_td,offense_scoring_tot_td,offense_scoring_2-pt
0,Lions,242,97,16,10,199,0.52,83,0.205,1064,2010,633,383,0.605,6.3,4001,26,16,82.0,199,0.314,43,6,87,27,191,383,4001,10.4,26,43,6,87,199,0.52,4,404,1613,4.0,11,9,2,45,83,0.205,6,11,26,41,2
1,Falcons,240,112,15,11,200,0.554,111,0.223,1097,2010,577,361,0.626,6.5,3725,28,9,90.8,200,0.347,32,6,46,23,158,361,3725,10.3,28,32,6,-46,200,0.554,3,497,1891,3.8,14,12,1,55,111,0.223,4,14,28,47,2
2,Rams,235,78,15,8,179,0.506,84,0.196,1053,2010,590,354,0.6,6.0,3512,18,15,76.5,179,0.303,36,4,49,34,244,354,3512,9.9,18,36,4,49,179,0.506,2,429,1578,3.7,9,9,1,-42,84,0.196,2,9,18,27,0
3,Jets,235,93,14,5,171,0.594,118,0.221,1087,2010,525,288,0.549,6.5,3420,20,14,76.5,171,0.326,44,11,74,28,178,288,3420,11.9,20,44,11,-74,171,0.594,3,534,2374,4.4,14,11,2,53,118,0.221,11,14,20,39,1
4,Dolphins,230,92,10,3,189,0.564,91,0.204,1040,2010,557,335,0.601,6.7,3755,17,21,74.8,189,0.339,44,4,-57,38,228,335,3755,11.2,17,44,4,57,189,0.564,3,445,1643,3.7,8,5,2,51,91,0.204,12,8,17,26,0


In [25]:
offense_dataframe.describe()

Unnamed: 0,offense_downs_3rd_att,offense_downs_3rd_md,offense_downs_4th_att,offense_downs_4th_md,offense_downs_rec_1st,offense_downs_rec_1st_perc,offense_downs_rush_1st,offense_downs_rush_1st_perc,offense_downs_scrm_plys,year,offense_passing_att,offense_passing_cmp,offense_passing_cmp__perc,offense_passing_yds/att,offense_passing_pass_yds,offense_passing_td,offense_passing_int,offense_passing_rate,offense_passing_1st,offense_passing_1st_perc,offense_passing_20+,offense_passing_40+,offense_passing_lng,offense_passing_sck,offense_passing_scky,offense_receiving_rec,offense_receiving_yds,offense_receiving_yds/rec,offense_receiving_td,offense_receiving_20+,offense_receiving_40+,offense_receiving_lng,offense_receiving_rec_1st,offense_receiving_rec_1st_perc,offense_receiving_rec_fum,offense_rushing_att,offense_rushing_rush_yds,offense_rushing_ypc,offense_rushing_td,offense_rushing_20+,offense_rushing_40+,offense_rushing_lng,offense_rushing_rush_1st,offense_rushing_rush_1st_perc,offense_rushing_rush_fum,offense_scoring_rsh_td,offense_scoring_rec_td,offense_scoring_tot_td,offense_scoring_2-pt
count,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0
mean,212.808333,83.325,17.945833,9.10625,193.245833,0.54686,101.2875,0.231104,1036.495833,2017.0,560.441667,353.564583,0.63005,7.15125,4003.414583,24.79375,13.785417,88.866458,193.233333,0.344577,50.491667,8.785417,-12.554167,38.439583,253.839583,353.564583,4003.414583,11.354583,24.79375,50.470833,8.777083,53.602083,193.245833,0.54686,3.91875,435.29375,1859.8125,4.252083,13.685417,10.833333,2.066667,10.66875,101.2875,0.231104,6.58125,13.685417,24.79375,41.479167,1.48125
std,14.505658,11.158312,7.045423,4.290004,29.634129,0.04377,22.166212,0.0317,53.142067,4.325001,59.521059,47.261234,0.040114,0.687769,543.200879,7.594991,4.687499,11.081312,29.628312,0.035573,10.021477,3.413377,71.551587,10.623785,75.569441,47.261234,543.200879,0.887997,7.594991,10.029559,3.414065,48.944453,29.634129,0.04377,2.006165,51.60116,351.848365,0.442185,5.130753,4.726037,1.662472,57.749736,22.166212,0.0317,2.982493,5.130753,7.594991,9.950167,1.487478
min,176.0,52.0,4.0,0.0,109.0,0.417,56.0,0.159,878.0,2010.0,377.0,217.0,0.506,5.4,2510.0,8.0,2.0,57.0,109.0,0.256,28.0,1.0,-99.0,14.0,63.0,217.0,2510.0,8.9,8.0,28.0,1.0,-95.0,109.0,0.417,0.0,320.0,1156.0,3.1,3.0,2.0,0.0,-99.0,56.0,0.159,0.0,3.0,8.0,17.0,0.0
25%,203.0,76.0,13.0,6.0,171.0,0.517,85.0,0.20875,1002.0,2013.0,518.0,320.0,0.601,6.7,3599.75,20.0,11.0,81.175,171.0,0.31875,43.0,6.0,-75.0,31.0,196.5,320.0,3599.75,10.8,20.0,43.0,6.0,58.75,171.0,0.517,2.0,398.0,1612.0,4.0,10.0,7.0,1.0,-49.0,85.0,0.20875,5.0,10.0,20.0,34.75,0.0
50%,212.0,83.0,16.5,8.0,192.0,0.546,98.0,0.23,1032.0,2017.0,563.0,355.0,0.63,7.1,3992.0,24.0,13.0,88.45,192.0,0.345,51.0,9.0,-60.0,38.0,246.0,355.0,3992.0,11.3,24.0,51.0,9.0,71.0,192.0,0.546,4.0,430.5,1812.5,4.2,13.0,10.0,2.0,39.0,98.0,0.23,6.0,13.0,24.0,41.0,1.0
75%,223.0,91.0,22.0,12.0,211.0,0.575,113.0,0.252,1073.0,2021.0,602.25,382.0,0.66,7.6,4366.25,29.0,17.0,95.825,211.0,0.36825,57.0,11.0,67.25,46.0,304.0,382.0,4366.25,11.9,29.0,57.0,11.0,79.0,211.0,0.575,5.0,470.0,2056.75,4.5,17.0,13.0,3.0,58.0,113.0,0.252,8.0,17.0,29.0,48.0,2.0
max,257.0,118.0,48.0,24.0,293.0,0.671,188.0,0.318,1191.0,2024.0,751.0,499.0,0.734,9.4,5572.0,55.0,30.0,122.6,293.0,0.445,80.0,21.0,98.0,85.0,504.0,499.0,5572.0,14.2,55.0,80.0,21.0,99.0,293.0,0.671,12.0,621.0,3296.0,5.8,32.0,33.0,8.0,98.0,188.0,0.318,19.0,32.0,55.0,76.0,8.0


### Cleaning special teams data

In [26]:
clean_percent_data(special_teams_dataframe)

In [27]:
remove_special_char_columns(special_teams_dataframe)

In [28]:
turnover_columns = [
    'special_teams_kickoff_returns_lng',
    'special_teams_punt_returns_lng',
    'special_teams_punting_lng'
]

negative_turnover_number(turnover_columns, special_teams_dataframe)

In [29]:
# Fix columns with A_M (attemps_made)
columns_list = [
    'special_teams_field_goals_1-19_>_a-m',
    'special_teams_field_goals_20-29_>_a-m',
    'special_teams_field_goals_30-39_>_a-m',
    'special_teams_field_goals_40-49_>_a-m',
    'special_teams_field_goals_50-59_>_a-m',
    'special_teams_field_goals_60+_>_a-m',
]

# We will transform it in a percent so we don't need to create a new column for each case
for column in columns_list:
  attps_list = special_teams_dataframe[column].str.split('_').str[0]
  matches_list = special_teams_dataframe[column].str.split('_').str[1]

  percent_matches = matches_list.astype('int') / attps_list.astype('int')
  percent_matches.fillna(0, inplace=True)
  special_teams_dataframe[column] = percent_matches
  special_teams_dataframe.rename(columns={
      column : column.replace('a-m', 'percent_a_m')
  }, inplace=True)

In [30]:
# Removing non important columns (trash from the origin)
special_teams_dataframe.drop(['data_fgm',
                              'data_fg__perc',
                              'data_xpm',
                              'data_xp_pct',
                              'data_kret_td',
                              'data_pret_t'],
                             axis=1,
                             inplace=True)

In [31]:
special_teams_dataframe.head()

Unnamed: 0,team,special_teams_field_goals_fgm,special_teams_field_goals_att,special_teams_field_goals_fg__perc,special_teams_field_goals_1-19_>_percent_a_m,special_teams_field_goals_20-29_>_percent_a_m,special_teams_field_goals_30-39_>_percent_a_m,special_teams_field_goals_40-49_>_percent_a_m,special_teams_field_goals_50-59_>_percent_a_m,special_teams_field_goals_60+_>_percent_a_m,special_teams_field_goals_lng,special_teams_field_goals_fg_blk,year,special_teams_kickoff_returns_avg,special_teams_kickoff_returns_ret,special_teams_kickoff_returns_yds,special_teams_kickoff_returns_kret_td,special_teams_kickoff_returns_20+,special_teams_kickoff_returns_40+,special_teams_kickoff_returns_lng,special_teams_kickoff_returns_fc,special_teams_kickoff_returns_fum,special_teams_kickoff_returns_fg_blk,special_teams_kickoff_returns_xp_blk,special_teams_kickoffs_ko,special_teams_kickoffs_yds,special_teams_kickoffs_tb,special_teams_kickoffs_tb__perc,special_teams_kickoffs_ret,special_teams_kickoffs_ret_avg,special_teams_kickoffs_osk,special_teams_kickoffs_osk_rec,special_teams_kickoffs_oob,special_teams_kickoffs_td,special_teams_punt_returns_avg,special_teams_punt_returns_ret,special_teams_punt_returns_yds,special_teams_punt_returns_pret_t,special_teams_punt_returns_20+,special_teams_punt_returns_40+,special_teams_punt_returns_lng,special_teams_punt_returns_fc,special_teams_punt_returns_fum,special_teams_punt_returns_p_blk,special_teams_punting_att,special_teams_punting_cmp,special_teams_punting_cmp__perc,special_teams_punting_yds/att,special_teams_punting_pass_yds,special_teams_punting_td,special_teams_punting_int,special_teams_punting_rate,special_teams_punting_1st,special_teams_punting_1st_perc,special_teams_punting_20+,special_teams_punting_40+,special_teams_punting_lng,special_teams_punting_sck,special_teams_punting_scky
0,Rams,33,39,0.846,0.0,0.916667,0.857143,0.777778,0.75,0.0,53,2,2010,21.8,74,1614,0,47,1,84,0,2,1,0,73,4866,5,0.068,65,23.7,2,0,2,1,11.3,40,452,0,7,2,42,19,0,1,590,354,0.6,6.0,3512,18,15,76.5,179,0.303,36,4,49,34,244
1,Raiders,33,41,0.805,0.0,1.0,0.928571,0.666667,0.571429,0.0,59,0,2010,22.5,68,1532,3,33,4,-101,0,4,0,0,93,6091,29,0.312,61,23.2,1,0,1,1,7.0,55,386,0,3,2,53,11,2,2,491,279,0.568,7.1,3471,18,16,77.5,159,0.324,45,12,-73,44,291
2,Eagles,32,38,0.842,0.0,1.0,0.833333,0.818182,0.333333,0.0,50,1,2010,20.5,62,1272,0,34,2,46,1,3,0,0,96,6214,23,0.24,69,21.8,2,1,2,0,10.6,38,404,1,4,2,-65,19,3,0,561,348,0.62,7.5,4215,28,13,92.1,189,0.337,61,15,91,49,309
3,Dolphins,30,41,0.732,1.0,1.0,1.0,0.611111,0.5,0.5,60,2,2010,21.4,56,1199,0,30,2,46,1,1,1,0,70,4603,14,0.2,53,24.6,0,0,3,2,10.5,28,293,0,2,1,47,22,4,0,557,335,0.601,6.7,3755,17,21,74.8,189,0.339,44,4,-57,38,228
4,Jets,30,39,0.769,1.0,0.909091,0.875,0.5,0.5,0.0,56,1,2010,25.2,63,1588,2,35,6,-97,0,0,0,0,81,5025,7,0.086,73,19.6,0,0,1,0,9.4,56,529,0,5,0,32,25,3,1,525,288,0.549,6.5,3420,20,14,76.5,171,0.326,44,11,74,28,178


In [32]:
special_teams_dataframe.describe()

Unnamed: 0,special_teams_field_goals_fgm,special_teams_field_goals_att,special_teams_field_goals_fg__perc,special_teams_field_goals_1-19_>_percent_a_m,special_teams_field_goals_20-29_>_percent_a_m,special_teams_field_goals_30-39_>_percent_a_m,special_teams_field_goals_40-49_>_percent_a_m,special_teams_field_goals_50-59_>_percent_a_m,special_teams_field_goals_60+_>_percent_a_m,special_teams_field_goals_lng,special_teams_field_goals_fg_blk,year,special_teams_kickoff_returns_avg,special_teams_kickoff_returns_ret,special_teams_kickoff_returns_yds,special_teams_kickoff_returns_kret_td,special_teams_kickoff_returns_20+,special_teams_kickoff_returns_40+,special_teams_kickoff_returns_lng,special_teams_kickoff_returns_fc,special_teams_kickoff_returns_fum,special_teams_kickoff_returns_fg_blk,special_teams_kickoff_returns_xp_blk,special_teams_kickoffs_ko,special_teams_kickoffs_yds,special_teams_kickoffs_tb,special_teams_kickoffs_tb__perc,special_teams_kickoffs_ret,special_teams_kickoffs_ret_avg,special_teams_kickoffs_osk,special_teams_kickoffs_osk_rec,special_teams_kickoffs_oob,special_teams_kickoffs_td,special_teams_punt_returns_avg,special_teams_punt_returns_ret,special_teams_punt_returns_yds,special_teams_punt_returns_pret_t,special_teams_punt_returns_20+,special_teams_punt_returns_40+,special_teams_punt_returns_lng,special_teams_punt_returns_fc,special_teams_punt_returns_fum,special_teams_punt_returns_p_blk,special_teams_punting_att,special_teams_punting_cmp,special_teams_punting_cmp__perc,special_teams_punting_yds/att,special_teams_punting_pass_yds,special_teams_punting_td,special_teams_punting_int,special_teams_punting_rate,special_teams_punting_1st,special_teams_punting_1st_perc,special_teams_punting_20+,special_teams_punting_40+,special_teams_punting_lng,special_teams_punting_sck,special_teams_punting_scky
count,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0
mean,26.597917,31.56875,0.840829,0.220833,0.970496,0.910688,0.774623,0.63076,0.057639,54.333333,0.63125,2017.0,23.029583,35.395833,818.545833,0.258333,22.591667,1.8625,20.3875,0.264583,0.7375,0.63125,0.2625,82.91875,5251.929167,44.995833,0.5381,35.395833,23.3025,1.802083,0.227083,0.73125,0.258333,9.01125,31.3375,282.41875,0.314583,2.875,0.741667,6.66875,18.6625,2.4,0.40625,560.441667,353.564583,0.63005,7.15125,4003.414583,24.79375,13.785417,88.866458,193.233333,0.344577,50.491667,8.785417,-12.554167,38.439583,253.839583
std,5.553572,5.77044,0.071474,0.415241,0.082643,0.105281,0.152113,0.271536,0.224434,3.449204,0.84217,4.325001,2.766705,12.503093,308.174804,0.524885,8.856071,1.706505,62.252532,1.311104,0.9217,0.859348,0.542408,11.115434,744.052091,16.924738,0.176757,14.607373,3.00983,1.507253,0.525444,0.884016,0.520893,2.452556,7.925715,104.615579,0.580652,2.077064,0.967064,50.945022,6.06873,1.668322,0.686556,59.521059,47.261234,0.040114,0.687769,543.200879,7.594991,4.687499,11.081312,29.628312,0.035573,10.021477,3.413377,71.551587,10.623785,75.569441
min,8.0,16.0,0.444,0.0,0.0,0.5,0.25,0.0,0.0,43.0,0.0,2010.0,14.6,9.0,145.0,0.0,3.0,0.0,-109.0,0.0,0.0,0.0,0.0,54.0,3350.0,1.0,0.013,5.0,15.4,0.0,0.0,0.0,0.0,3.4,10.0,53.0,0.0,0.0,0.0,-99.0,1.0,0.0,0.0,377.0,217.0,0.506,5.4,2510.0,8.0,2.0,57.0,109.0,0.256,28.0,1.0,-99.0,14.0,63.0
25%,23.0,28.0,0.7985,0.0,1.0,0.857143,0.666667,0.5,0.0,52.0,0.0,2013.0,21.2,27.0,604.75,0.0,17.0,1.0,31.0,0.0,0.0,0.0,0.0,75.0,4724.75,34.0,0.442,25.0,21.2,1.0,0.0,0.0,0.0,7.2,26.0,208.0,0.0,1.0,0.0,-3.5,14.0,1.0,0.0,518.0,320.0,0.601,6.7,3599.75,20.0,11.0,81.175,171.0,0.31875,43.0,6.0,-75.0,31.0,196.5
50%,26.0,31.0,0.846,0.0,1.0,0.923077,0.781746,0.666667,0.0,54.0,0.0,2017.0,22.9,34.0,786.0,0.0,21.0,2.0,42.0,0.0,0.0,0.0,0.0,83.0,5201.0,45.0,0.544,34.0,23.0,2.0,0.0,1.0,0.0,8.9,31.0,267.0,0.0,2.0,0.0,25.0,19.0,2.0,0.0,563.0,355.0,0.63,7.1,3992.0,24.0,13.0,88.45,192.0,0.345,51.0,9.0,-60.0,38.0,246.0
75%,30.0,36.0,0.8935,0.0,1.0,1.0,0.888889,0.833333,0.0,56.0,1.0,2021.0,24.9,42.0,1003.25,0.0,28.0,3.0,54.0,0.0,1.0,1.0,0.0,90.25,5783.75,55.0,0.6525,44.25,25.025,3.0,0.0,1.0,0.0,10.5,36.0,337.25,1.0,4.0,1.0,38.0,22.0,3.0,1.0,602.25,382.0,0.66,7.6,4366.25,29.0,17.0,95.825,211.0,0.36825,57.0,11.0,67.25,46.0,304.0
max,44.0,52.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,66.0,5.0,2024.0,32.1,84.0,2084.0,3.0,55.0,10.0,108.0,18.0,4.0,5.0,3.0,115.0,7450.0,99.0,0.908,84.0,39.4,9.0,4.0,4.0,3.0,17.4,59.0,696.0,4.0,12.0,5.0,97.0,38.0,8.0,4.0,751.0,499.0,0.734,9.4,5572.0,55.0,30.0,122.6,293.0,0.445,80.0,21.0,98.0,85.0,504.0


### Cleaning historical data

In [33]:
score_historical = score_historical.loc[score_historical['schedule_season'] >= 2011]

In [34]:
score_historical.isna().sum()

Unnamed: 0,0
schedule_date,0
schedule_season,0
schedule_week,0
schedule_playoff,0
team_home,0
score_home,0
score_away,0
team_away,0
team_favorite_id,0
spread_favorite,0


In [35]:
score_historical['schedule_date'] = score_historical['schedule_date'].astype('datetime64[ns]')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  score_historical['schedule_date'] = score_historical['schedule_date'].astype('datetime64[ns]')


In [36]:
score_historical = score_historical[['schedule_date',
                                     'schedule_season',
                                     'schedule_playoff',
                                     'team_home',
                                     'score_home',
                                     'score_away',
                                     'team_away',
                                     'stadium_neutral'
                                     ]]

In [37]:
def home_winner(score_home, score_away):
  if score_home > score_away:
    return True
  else:
    return False # we will not be working on ties here since they are rare on NFL

score_historical['home_winner'] = score_historical.apply(lambda row: home_winner(row['score_home'], row['score_away']), axis=1)

In [38]:
score_historical.head()

Unnamed: 0,schedule_date,schedule_season,schedule_playoff,team_home,score_home,score_away,team_away,stadium_neutral,home_winner
10275,2011-09-08,2011,False,Green Bay Packers,42,34,New Orleans Saints,False,True
10276,2011-09-11,2011,False,Arizona Cardinals,28,21,Carolina Panthers,False,True
10277,2011-09-11,2011,False,Baltimore Ravens,35,7,Pittsburgh Steelers,False,True
10278,2011-09-11,2011,False,Chicago Bears,30,12,Atlanta Falcons,False,True
10279,2011-09-11,2011,False,Cleveland Browns,17,27,Cincinnati Bengals,False,False


## Creating final df for next steps (EDA and modeling)

We need to create a dataframe where all the historic data is related to last stats we have. So we must:
* Join together all stats dataframe
* Join togehter with a 1year lag (stats from 2010 must reflect on 2011 games)
* Clean data if needed
* Save dataframe

In [39]:
stats_dataframe = defense_dataframe.merge(offense_dataframe,
                                          on=['year', 'team'],
                                          how='left')

stats_dataframe = stats_dataframe.merge(special_teams_dataframe,
                                        on=['year', 'team'],
                                        how='left')

stats_dataframe.head()

Unnamed: 0,team,defense_downs_3rd_att,defense_downs_3rd_md,defense_downs_4th_att,defense_downs_4th_md,defense_downs_rec_1st,defense_downs_rec_1st_perc,defense_downs_rush_1st,defense_downs_rush_1st_perc,defense_downs_scrm_plys,year,defense_fumbles_ff,defense_fumbles_fr,defense_fumbles_fr_td,defense_fumbles_rec_fum,defense_fumbles_rush_fum,defense_interceptions_int,defense_interceptions_int_td,defense_interceptions_int_yds,defense_interceptions_lng,defense_passing_att,defense_passing_cmp,defense_passing_cmp__perc,defense_passing_yds/att,defense_passing_yds,defense_passing_td,defense_passing_int,defense_passing_rate,defense_passing_1st,defense_passing_1st_perc,defense_passing_20+,defense_passing_40+,defense_passing_lng,defense_passing_sck,defense_receiving_rec,defense_receiving_yds,defense_receiving_yds/rec,defense_receiving_td,defense_receiving_20+,defense_receiving_40+,defense_receiving_lng,defense_receiving_rec_1st,defense_receiving_rec_1st_perc,defense_receiving_rec_fum,defense_receiving_pdef,defense_rushing_att,defense_rushing_rush_yds,defense_rushing_ypc,defense_rushing_td,defense_rushing_20+,defense_rushing_40+,defense_rushing_lng,defense_rushing_rush_1st,defense_rushing_rush_1st_perc,defense_rushing_rush_fum,defense_scoring_fr_td,defense_scoring_sfty,defense_scoring_int_td,defense_tackles_sck,defense_tackles_comb,defense_tackles_asst,defense_tackles_solo,offense_downs_3rd_att,offense_downs_3rd_md,offense_downs_4th_att,offense_downs_4th_md,offense_downs_rec_1st,offense_downs_rec_1st_perc,offense_downs_rush_1st,offense_downs_rush_1st_perc,offense_downs_scrm_plys,offense_passing_att,offense_passing_cmp,offense_passing_cmp__perc,offense_passing_yds/att,offense_passing_pass_yds,offense_passing_td,offense_passing_int,offense_passing_rate,offense_passing_1st,offense_passing_1st_perc,offense_passing_20+,offense_passing_40+,offense_passing_lng,offense_passing_sck,offense_passing_scky,offense_receiving_rec,offense_receiving_yds,offense_receiving_yds/rec,offense_receiving_td,offense_receiving_20+,offense_receiving_40+,offense_receiving_lng,offense_receiving_rec_1st,offense_receiving_rec_1st_perc,offense_receiving_rec_fum,offense_rushing_att,offense_rushing_rush_yds,offense_rushing_ypc,offense_rushing_td,offense_rushing_20+,offense_rushing_40+,offense_rushing_lng,offense_rushing_rush_1st,offense_rushing_rush_1st_perc,offense_rushing_rush_fum,offense_scoring_rsh_td,offense_scoring_rec_td,offense_scoring_tot_td,offense_scoring_2-pt,special_teams_field_goals_fgm,special_teams_field_goals_att,special_teams_field_goals_fg__perc,special_teams_field_goals_1-19_>_percent_a_m,special_teams_field_goals_20-29_>_percent_a_m,special_teams_field_goals_30-39_>_percent_a_m,special_teams_field_goals_40-49_>_percent_a_m,special_teams_field_goals_50-59_>_percent_a_m,special_teams_field_goals_60+_>_percent_a_m,special_teams_field_goals_lng,special_teams_field_goals_fg_blk,special_teams_kickoff_returns_avg,special_teams_kickoff_returns_ret,special_teams_kickoff_returns_yds,special_teams_kickoff_returns_kret_td,special_teams_kickoff_returns_20+,special_teams_kickoff_returns_40+,special_teams_kickoff_returns_lng,special_teams_kickoff_returns_fc,special_teams_kickoff_returns_fum,special_teams_kickoff_returns_fg_blk,special_teams_kickoff_returns_xp_blk,special_teams_kickoffs_ko,special_teams_kickoffs_yds,special_teams_kickoffs_tb,special_teams_kickoffs_tb__perc,special_teams_kickoffs_ret,special_teams_kickoffs_ret_avg,special_teams_kickoffs_osk,special_teams_kickoffs_osk_rec,special_teams_kickoffs_oob,special_teams_kickoffs_td,special_teams_punt_returns_avg,special_teams_punt_returns_ret,special_teams_punt_returns_yds,special_teams_punt_returns_pret_t,special_teams_punt_returns_20+,special_teams_punt_returns_40+,special_teams_punt_returns_lng,special_teams_punt_returns_fc,special_teams_punt_returns_fum,special_teams_punt_returns_p_blk,special_teams_punting_att,special_teams_punting_cmp,special_teams_punting_cmp__perc,special_teams_punting_yds/att,special_teams_punting_pass_yds,special_teams_punting_td,special_teams_punting_int,special_teams_punting_rate,special_teams_punting_1st,special_teams_punting_1st_perc,special_teams_punting_20+,special_teams_punting_40+,special_teams_punting_lng,special_teams_punting_sck,special_teams_punting_scky
0,Lions,198,77,12,5,187,0.567,98,0.221,1005,2010,21,13,1,8,8,14,2,194,42,518,330,0.637,7.3,3786,23,14,89.2,187,0.361,43,9,-89,44,330,3786,11.5,23,43,9,-89,187,0.567,8,66,443,1999,4.5,18,11,3,-80,98,0.221,8,1,1,2,44,949,246,743,242,97,16,10,199,0.52,83,0.205,1064,633,383,0.605,6.3,4001,26,16,82.0,199,0.314,43,6,87,27,191,383,4001,10.4,26,43,6,87,199,0.52,4,404,1613,4.0,11,9,2,45,83,0.205,6,11,26,41,2,25,30,0.833,0.0,1.0,1.0,0.692308,0.833333,0.0,55,0,24.9,61,1519,1,43,5,-105,0,1,0,0,77,4775,3,0.039,72,20.6,2,1,0,1,12.1,30,362,0,4,1,71,21,2,0,633,383,0.605,6.3,4001,26,16,82.0,199,0.314,43,6,87,27,191
1,Falcons,201,79,17,9,183,0.503,87,0.238,957,2010,14,9,1,4,8,22,2,337,40,560,364,0.65,6.9,3846,23,22,82.2,183,0.327,38,6,-83,31,364,3846,10.6,23,38,6,-83,183,0.503,4,71,366,1694,4.6,9,10,5,-80,87,0.238,8,1,0,2,31,873,179,686,240,112,15,11,200,0.554,111,0.223,1097,577,361,0.626,6.5,3725,28,9,90.8,200,0.347,32,6,46,23,158,361,3725,10.3,28,32,6,-46,200,0.554,3,497,1891,3.8,14,12,1,55,111,0.223,4,14,28,47,2,28,31,0.903,0.0,1.0,0.888889,0.8,1.0,0.0,51,1,26.5,46,1221,1,35,5,-102,0,0,0,0,88,5945,23,0.261,64,21.3,1,0,0,1,12.1,19,230,1,1,1,-55,19,1,0,577,361,0.626,6.5,3725,28,9,90.8,200,0.347,32,6,46,23,158
2,Rams,221,74,12,6,191,0.582,88,0.218,1017,2010,23,12,0,5,5,14,0,161,34,570,328,0.575,6.8,3868,21,14,80.4,191,0.335,51,5,65,43,328,3868,11.8,21,51,5,65,191,0.582,5,81,404,1810,4.5,7,15,1,80,88,0.218,5,0,0,0,43,861,127,770,235,78,15,8,179,0.506,84,0.196,1053,590,354,0.6,6.0,3512,18,15,76.5,179,0.303,36,4,49,34,244,354,3512,9.9,18,36,4,49,179,0.506,2,429,1578,3.7,9,9,1,-42,84,0.196,2,9,18,27,0,33,39,0.846,0.0,0.916667,0.857143,0.777778,0.75,0.0,53,2,21.8,74,1614,0,47,1,84,0,2,1,0,73,4866,5,0.068,65,23.7,2,0,2,1,11.3,40,452,0,7,2,42,19,0,1,590,354,0.6,6.0,3512,18,15,76.5,179,0.303,36,4,49,34,244
3,Jets,219,81,6,4,169,0.628,70,0.172,979,2010,17,17,0,2,12,12,3,214,-66,531,269,0.507,6.5,3454,24,12,77.0,169,0.318,48,5,50,40,269,3454,12.8,24,48,5,50,169,0.628,2,87,408,1454,3.6,11,8,0,32,70,0.172,12,0,0,3,40,835,181,633,235,93,14,5,171,0.594,118,0.221,1087,525,288,0.549,6.5,3420,20,14,76.5,171,0.326,44,11,74,28,178,288,3420,11.9,20,44,11,-74,171,0.594,3,534,2374,4.4,14,11,2,53,118,0.221,11,14,20,39,1,30,39,0.769,1.0,0.909091,0.875,0.5,0.5,0.0,56,1,25.2,63,1588,2,35,6,-97,0,0,0,0,81,5025,7,0.086,73,19.6,0,0,1,0,9.4,56,529,0,5,0,32,25,3,1,525,288,0.549,6.5,3420,20,14,76.5,171,0.326,44,11,74,28,178
4,Dolphins,226,84,22,8,166,0.576,84,0.188,988,2010,13,8,1,1,4,11,0,66,21,502,288,0.574,7.1,3573,22,11,85.0,166,0.331,50,11,-86,39,288,3573,12.4,22,50,11,-86,166,0.576,1,82,447,1601,3.6,8,4,0,30,84,0.188,4,1,1,0,39,869,156,739,230,92,10,3,189,0.564,91,0.204,1040,557,335,0.601,6.7,3755,17,21,74.8,189,0.339,44,4,-57,38,228,335,3755,11.2,17,44,4,57,189,0.564,3,445,1643,3.7,8,5,2,51,91,0.204,12,8,17,26,0,30,41,0.732,1.0,1.0,1.0,0.611111,0.5,0.5,60,2,21.4,56,1199,0,30,2,46,1,1,1,0,70,4603,14,0.2,53,24.6,0,0,3,2,10.5,28,293,0,2,1,47,22,4,0,557,335,0.601,6.7,3755,17,21,74.8,189,0.339,44,4,-57,38,228


In [40]:
# Checking if any null value was left behind
stats_dataframe.isna().any().any()

False

We will be joining by ```year``` <> ```schedule_seanson``` and ```team```.

**Note:** The team names might be different, so we need to check it before the join.

In [41]:
set([*score_historical['team_home']] + [*score_historical['team_away']])

{'Arizona Cardinals',
 'Atlanta Falcons',
 'Baltimore Ravens',
 'Buffalo Bills',
 'Carolina Panthers',
 'Chicago Bears',
 'Cincinnati Bengals',
 'Cleveland Browns',
 'Dallas Cowboys',
 'Denver Broncos',
 'Detroit Lions',
 'Green Bay Packers',
 'Houston Texans',
 'Indianapolis Colts',
 'Jacksonville Jaguars',
 'Kansas City Chiefs',
 'Las Vegas Raiders',
 'Los Angeles Chargers',
 'Los Angeles Rams',
 'Miami Dolphins',
 'Minnesota Vikings',
 'New England Patriots',
 'New Orleans Saints',
 'New York Giants',
 'New York Jets',
 'Oakland Raiders',
 'Philadelphia Eagles',
 'Pittsburgh Steelers',
 'San Diego Chargers',
 'San Francisco 49ers',
 'Seattle Seahawks',
 'St. Louis Rams',
 'Tampa Bay Buccaneers',
 'Tennessee Titans',
 'Washington Commanders',
 'Washington Football Team',
 'Washington Redskins'}

In [42]:
stats_dataframe['team'].sort_values().unique()

array(['49ers', 'Bears', 'Bengals', 'Bills', 'Broncos', 'Browns',
       'Buccaneers', 'Cardinals', 'Chargers', 'Chiefs', 'Colts',
       'Commanders', 'Cowboys', 'Dolphins', 'Eagles', 'Falcons',
       'FootballTeam', 'Giants', 'Jaguars', 'Jets', 'Lions', 'Niners',
       'Packers', 'Panthers', 'Patriots', 'Raiders', 'Rams', 'Ravens',
       'Redskins', 'Saints', 'Seahawks', 'Steelers', 'Texans', 'Titans',
       'Vikings'], dtype=object)

In [43]:
teams_to_translate = {
    'Niners' : '49ers',
    'Redskins' : 'Commanders',
    'FootballTeam' : 'Commanders'
}

def translate_team_names(team_name, teams_to_translate):
  if team_name in teams_to_translate.keys():
    return teams_to_translate.get(team_name)
  else:
    return team_name

stats_dataframe['team'] = stats_dataframe.apply(lambda row: translate_team_names(row['team'], teams_to_translate), axis=1)

First, we need to make get the city out of the score dataframe.

In [44]:
score_historical

Unnamed: 0,schedule_date,schedule_season,schedule_playoff,team_home,score_home,score_away,team_away,stadium_neutral,home_winner
10275,2011-09-08,2011,False,Green Bay Packers,42,34,New Orleans Saints,False,True
10276,2011-09-11,2011,False,Arizona Cardinals,28,21,Carolina Panthers,False,True
10277,2011-09-11,2011,False,Baltimore Ravens,35,7,Pittsburgh Steelers,False,True
10278,2011-09-11,2011,False,Chicago Bears,30,12,Atlanta Falcons,False,True
10279,2011-09-11,2011,False,Cleveland Browns,17,27,Cincinnati Bengals,False,False
...,...,...,...,...,...,...,...,...,...
14081,2025-01-19,2024,True,Buffalo Bills,27,25,Baltimore Ravens,False,True
14082,2025-01-19,2024,True,Philadelphia Eagles,28,22,Los Angeles Rams,False,True
14083,2025-01-26,2024,True,Philadelphia Eagles,55,23,Washington Commanders,False,True
14084,2025-01-26,2024,True,Kansas City Chiefs,32,29,Buffalo Bills,False,True


In [45]:
# Change Commanders name to get historic
commanders_name = [
    'Washington Commanders',
    'Washington Football Team',
    'Washington Redskins'
]

def change_commanders_name(team_name, commanders_name):
  if team_name in commanders_name:
    return 'Washington Commanders'
  else:
    return team_name

score_historical['team_home'] = score_historical.apply(lambda row: change_commanders_name(row['team_home'], commanders_name), axis=1)
score_historical['team_away'] = score_historical.apply(lambda row: change_commanders_name(row['team_away'], commanders_name), axis=1)

In [46]:
score_historical['team_home'] = score_historical['team_home'].str.split(' ').str[-1]
score_historical['team_away'] = score_historical['team_away'].str.split(' ').str[-1]

Then, we need to create the one year lag on the stats data (sending it one year to tha past).

In [47]:
stats_dataframe['year'] = stats_dataframe['year'] + 1

In [48]:
# Now let's try the first merge

final_df = score_historical.merge(stats_dataframe,
                                  left_on=['schedule_season', 'team_home'],
                                  right_on=['year', 'team'],
                                  how='left').merge(stats_dataframe,
                                                    left_on=['schedule_season', 'team_away'],
                                                    right_on=['year', 'team'],
                                                    how='left',
                                                    suffixes=('_home_', '_away_')).drop(['team_home_',
                                                                                          'team_away_'],
                                                                                        axis=1)

In [49]:
final_df.isna().any().any()

False

This is the DF where we will make our EDA, feature engineering, and modeling :)

In [50]:
final_df.head()

Unnamed: 0,schedule_date,schedule_season,schedule_playoff,team_home,score_home,score_away,team_away,stadium_neutral,home_winner,defense_downs_3rd_att_home_,defense_downs_3rd_md_home_,defense_downs_4th_att_home_,defense_downs_4th_md_home_,defense_downs_rec_1st_home_,defense_downs_rec_1st_perc_home_,defense_downs_rush_1st_home_,defense_downs_rush_1st_perc_home_,defense_downs_scrm_plys_home_,year_home_,defense_fumbles_ff_home_,defense_fumbles_fr_home_,defense_fumbles_fr_td_home_,defense_fumbles_rec_fum_home_,defense_fumbles_rush_fum_home_,defense_interceptions_int_home_,defense_interceptions_int_td_home_,defense_interceptions_int_yds_home_,defense_interceptions_lng_home_,defense_passing_att_home_,defense_passing_cmp_home_,defense_passing_cmp__perc_home_,defense_passing_yds/att_home_,defense_passing_yds_home_,defense_passing_td_home_,defense_passing_int_home_,defense_passing_rate_home_,defense_passing_1st_home_,defense_passing_1st_perc_home_,defense_passing_20+_home_,defense_passing_40+_home_,defense_passing_lng_home_,defense_passing_sck_home_,defense_receiving_rec_home_,defense_receiving_yds_home_,defense_receiving_yds/rec_home_,defense_receiving_td_home_,defense_receiving_20+_home_,defense_receiving_40+_home_,defense_receiving_lng_home_,defense_receiving_rec_1st_home_,defense_receiving_rec_1st_perc_home_,defense_receiving_rec_fum_home_,defense_receiving_pdef_home_,defense_rushing_att_home_,defense_rushing_rush_yds_home_,defense_rushing_ypc_home_,defense_rushing_td_home_,defense_rushing_20+_home_,defense_rushing_40+_home_,defense_rushing_lng_home_,defense_rushing_rush_1st_home_,defense_rushing_rush_1st_perc_home_,defense_rushing_rush_fum_home_,defense_scoring_fr_td_home_,defense_scoring_sfty_home_,defense_scoring_int_td_home_,defense_tackles_sck_home_,defense_tackles_comb_home_,defense_tackles_asst_home_,defense_tackles_solo_home_,offense_downs_3rd_att_home_,offense_downs_3rd_md_home_,offense_downs_4th_att_home_,offense_downs_4th_md_home_,offense_downs_rec_1st_home_,offense_downs_rec_1st_perc_home_,offense_downs_rush_1st_home_,offense_downs_rush_1st_perc_home_,offense_downs_scrm_plys_home_,offense_passing_att_home_,offense_passing_cmp_home_,offense_passing_cmp__perc_home_,offense_passing_yds/att_home_,offense_passing_pass_yds_home_,offense_passing_td_home_,offense_passing_int_home_,offense_passing_rate_home_,offense_passing_1st_home_,offense_passing_1st_perc_home_,offense_passing_20+_home_,offense_passing_40+_home_,offense_passing_lng_home_,offense_passing_sck_home_,offense_passing_scky_home_,offense_receiving_rec_home_,offense_receiving_yds_home_,offense_receiving_yds/rec_home_,offense_receiving_td_home_,offense_receiving_20+_home_,offense_receiving_40+_home_,offense_receiving_lng_home_,offense_receiving_rec_1st_home_,offense_receiving_rec_1st_perc_home_,offense_receiving_rec_fum_home_,offense_rushing_att_home_,offense_rushing_rush_yds_home_,offense_rushing_ypc_home_,offense_rushing_td_home_,offense_rushing_20+_home_,offense_rushing_40+_home_,offense_rushing_lng_home_,offense_rushing_rush_1st_home_,offense_rushing_rush_1st_perc_home_,offense_rushing_rush_fum_home_,offense_scoring_rsh_td_home_,offense_scoring_rec_td_home_,offense_scoring_tot_td_home_,offense_scoring_2-pt_home_,special_teams_field_goals_fgm_home_,special_teams_field_goals_att_home_,special_teams_field_goals_fg__perc_home_,special_teams_field_goals_1-19_>_percent_a_m_home_,special_teams_field_goals_20-29_>_percent_a_m_home_,special_teams_field_goals_30-39_>_percent_a_m_home_,special_teams_field_goals_40-49_>_percent_a_m_home_,special_teams_field_goals_50-59_>_percent_a_m_home_,special_teams_field_goals_60+_>_percent_a_m_home_,special_teams_field_goals_lng_home_,special_teams_field_goals_fg_blk_home_,special_teams_kickoff_returns_avg_home_,special_teams_kickoff_returns_ret_home_,special_teams_kickoff_returns_yds_home_,special_teams_kickoff_returns_kret_td_home_,special_teams_kickoff_returns_20+_home_,special_teams_kickoff_returns_40+_home_,special_teams_kickoff_returns_lng_home_,special_teams_kickoff_returns_fc_home_,special_teams_kickoff_returns_fum_home_,special_teams_kickoff_returns_fg_blk_home_,special_teams_kickoff_returns_xp_blk_home_,special_teams_kickoffs_ko_home_,special_teams_kickoffs_yds_home_,special_teams_kickoffs_tb_home_,special_teams_kickoffs_tb__perc_home_,special_teams_kickoffs_ret_home_,special_teams_kickoffs_ret_avg_home_,special_teams_kickoffs_osk_home_,special_teams_kickoffs_osk_rec_home_,special_teams_kickoffs_oob_home_,special_teams_kickoffs_td_home_,special_teams_punt_returns_avg_home_,special_teams_punt_returns_ret_home_,special_teams_punt_returns_yds_home_,special_teams_punt_returns_pret_t_home_,special_teams_punt_returns_20+_home_,special_teams_punt_returns_40+_home_,special_teams_punt_returns_lng_home_,special_teams_punt_returns_fc_home_,special_teams_punt_returns_fum_home_,special_teams_punt_returns_p_blk_home_,special_teams_punting_att_home_,special_teams_punting_cmp_home_,special_teams_punting_cmp__perc_home_,special_teams_punting_yds/att_home_,special_teams_punting_pass_yds_home_,special_teams_punting_td_home_,special_teams_punting_int_home_,special_teams_punting_rate_home_,special_teams_punting_1st_home_,special_teams_punting_1st_perc_home_,special_teams_punting_20+_home_,special_teams_punting_40+_home_,special_teams_punting_lng_home_,special_teams_punting_sck_home_,special_teams_punting_scky_home_,defense_downs_3rd_att_away_,defense_downs_3rd_md_away_,defense_downs_4th_att_away_,defense_downs_4th_md_away_,defense_downs_rec_1st_away_,defense_downs_rec_1st_perc_away_,defense_downs_rush_1st_away_,defense_downs_rush_1st_perc_away_,defense_downs_scrm_plys_away_,year_away_,defense_fumbles_ff_away_,defense_fumbles_fr_away_,defense_fumbles_fr_td_away_,defense_fumbles_rec_fum_away_,defense_fumbles_rush_fum_away_,defense_interceptions_int_away_,defense_interceptions_int_td_away_,defense_interceptions_int_yds_away_,defense_interceptions_lng_away_,defense_passing_att_away_,defense_passing_cmp_away_,defense_passing_cmp__perc_away_,defense_passing_yds/att_away_,defense_passing_yds_away_,defense_passing_td_away_,defense_passing_int_away_,defense_passing_rate_away_,defense_passing_1st_away_,defense_passing_1st_perc_away_,defense_passing_20+_away_,defense_passing_40+_away_,defense_passing_lng_away_,defense_passing_sck_away_,defense_receiving_rec_away_,defense_receiving_yds_away_,defense_receiving_yds/rec_away_,defense_receiving_td_away_,defense_receiving_20+_away_,defense_receiving_40+_away_,defense_receiving_lng_away_,defense_receiving_rec_1st_away_,defense_receiving_rec_1st_perc_away_,defense_receiving_rec_fum_away_,defense_receiving_pdef_away_,defense_rushing_att_away_,defense_rushing_rush_yds_away_,defense_rushing_ypc_away_,defense_rushing_td_away_,defense_rushing_20+_away_,defense_rushing_40+_away_,defense_rushing_lng_away_,defense_rushing_rush_1st_away_,defense_rushing_rush_1st_perc_away_,defense_rushing_rush_fum_away_,defense_scoring_fr_td_away_,defense_scoring_sfty_away_,defense_scoring_int_td_away_,defense_tackles_sck_away_,defense_tackles_comb_away_,defense_tackles_asst_away_,defense_tackles_solo_away_,offense_downs_3rd_att_away_,offense_downs_3rd_md_away_,offense_downs_4th_att_away_,offense_downs_4th_md_away_,offense_downs_rec_1st_away_,offense_downs_rec_1st_perc_away_,offense_downs_rush_1st_away_,offense_downs_rush_1st_perc_away_,offense_downs_scrm_plys_away_,offense_passing_att_away_,offense_passing_cmp_away_,offense_passing_cmp__perc_away_,offense_passing_yds/att_away_,offense_passing_pass_yds_away_,offense_passing_td_away_,offense_passing_int_away_,offense_passing_rate_away_,offense_passing_1st_away_,offense_passing_1st_perc_away_,offense_passing_20+_away_,offense_passing_40+_away_,offense_passing_lng_away_,offense_passing_sck_away_,offense_passing_scky_away_,offense_receiving_rec_away_,offense_receiving_yds_away_,offense_receiving_yds/rec_away_,offense_receiving_td_away_,offense_receiving_20+_away_,offense_receiving_40+_away_,offense_receiving_lng_away_,offense_receiving_rec_1st_away_,offense_receiving_rec_1st_perc_away_,offense_receiving_rec_fum_away_,offense_rushing_att_away_,offense_rushing_rush_yds_away_,offense_rushing_ypc_away_,offense_rushing_td_away_,offense_rushing_20+_away_,offense_rushing_40+_away_,offense_rushing_lng_away_,offense_rushing_rush_1st_away_,offense_rushing_rush_1st_perc_away_,offense_rushing_rush_fum_away_,offense_scoring_rsh_td_away_,offense_scoring_rec_td_away_,offense_scoring_tot_td_away_,offense_scoring_2-pt_away_,special_teams_field_goals_fgm_away_,special_teams_field_goals_att_away_,special_teams_field_goals_fg__perc_away_,special_teams_field_goals_1-19_>_percent_a_m_away_,special_teams_field_goals_20-29_>_percent_a_m_away_,special_teams_field_goals_30-39_>_percent_a_m_away_,special_teams_field_goals_40-49_>_percent_a_m_away_,special_teams_field_goals_50-59_>_percent_a_m_away_,special_teams_field_goals_60+_>_percent_a_m_away_,special_teams_field_goals_lng_away_,special_teams_field_goals_fg_blk_away_,special_teams_kickoff_returns_avg_away_,special_teams_kickoff_returns_ret_away_,special_teams_kickoff_returns_yds_away_,special_teams_kickoff_returns_kret_td_away_,special_teams_kickoff_returns_20+_away_,special_teams_kickoff_returns_40+_away_,special_teams_kickoff_returns_lng_away_,special_teams_kickoff_returns_fc_away_,special_teams_kickoff_returns_fum_away_,special_teams_kickoff_returns_fg_blk_away_,special_teams_kickoff_returns_xp_blk_away_,special_teams_kickoffs_ko_away_,special_teams_kickoffs_yds_away_,special_teams_kickoffs_tb_away_,special_teams_kickoffs_tb__perc_away_,special_teams_kickoffs_ret_away_,special_teams_kickoffs_ret_avg_away_,special_teams_kickoffs_osk_away_,special_teams_kickoffs_osk_rec_away_,special_teams_kickoffs_oob_away_,special_teams_kickoffs_td_away_,special_teams_punt_returns_avg_away_,special_teams_punt_returns_ret_away_,special_teams_punt_returns_yds_away_,special_teams_punt_returns_pret_t_away_,special_teams_punt_returns_20+_away_,special_teams_punt_returns_40+_away_,special_teams_punt_returns_lng_away_,special_teams_punt_returns_fc_away_,special_teams_punt_returns_fum_away_,special_teams_punt_returns_p_blk_away_,special_teams_punting_att_away_,special_teams_punting_cmp_away_,special_teams_punting_cmp__perc_away_,special_teams_punting_yds/att_away_,special_teams_punting_pass_yds_away_,special_teams_punting_td_away_,special_teams_punting_int_away_,special_teams_punting_rate_away_,special_teams_punting_1st_away_,special_teams_punting_1st_perc_away_,special_teams_punting_20+_away_,special_teams_punting_40+_away_,special_teams_punting_lng_away_,special_teams_punting_sck_away_,special_teams_punting_scky_away_
0,2011-09-08,2011,False,Packers,42,34,Saints,False,True,213,77,20,6,163,0.551,82,0.208,969,2011,15,8,1,5,6,24,3,318,64,527,296,0.562,6.5,3440,16,24,67.2,163,0.309,44,8,-85,47,296,3440,11.6,16,44,8,-85,163,0.551,5,89,395,1838,4.6,6,10,1,40,82,0.208,6,1,0,3,47,877,203,676,205,85,13,5,204,0.58,88,0.209,1000,541,352,0.651,8.0,4355,31,13,98.9,204,0.377,57,11,86,38,231,352,4355,12.4,31,57,11,86,204,0.58,9,421,1606,3.8,11,3,1,71,88,0.209,4,11,31,46,0,22,28,0.786,1.0,0.875,0.8,0.8,0.5,0.0,56,2,20.2,62,1249,0,31,5,51,0,2,0,0,84,5173,4,0.048,76,21.8,1,1,3,0,7.9,41,325,0,3,2,52,19,2,0,541,352,0.651,8.0,4355,31,13,98.9,204,0.377,57,11,86,38,231,203,70,23,15,150,0.49,94,0.223,948,2011,25,16,0,10,8,9,2,196,-96,494,306,0.619,6.8,3353,13,9,83.2,150,0.304,42,7,68,33,306,3353,11.0,13,42,7,68,150,0.49,10,69,421,1797,4.3,13,13,3,68,94,0.223,8,0,1,2,33,918,211,673,217,106,11,5,236,0.524,94,0.247,1067,661,450,0.681,7.0,4636,33,22,90.8,236,0.357,47,10,-80,26,195,450,4636,10.3,33,47,10,-80,236,0.524,2,380,1519,4.0,9,7,2,-55,94,0.247,7,9,33,44,0,25,31,0.806,1.0,0.625,0.846154,0.857143,1.0,0.0,52,0,22.7,52,1181,0,39,0,39,0,1,0,1,84,5481,14,0.167,66,24.1,2,0,2,0,7.8,27,210,0,2,2,72,18,3,0,661,450,0.681,7.0,4636,33,22,90.8,236,0.357,47,10,-80,26,195
1,2011-09-11,2011,False,Cardinals,28,21,Panthers,False,True,233,87,17,9,178,0.549,123,0.234,1092,2011,13,13,4,4,10,17,3,424,66,533,324,0.608,7.3,3881,22,17,83.6,178,0.334,55,7,70,33,324,3881,12.0,22,55,7,70,178,0.549,4,81,526,2323,4.4,19,19,1,48,123,0.234,10,4,0,3,33,984,173,797,209,58,18,7,154,0.54,67,0.209,931,561,285,0.508,5.8,3264,10,19,60.5,154,0.274,44,3,-74,50,343,285,3264,11.4,10,44,3,-74,154,0.54,4,320,1388,4.3,9,11,2,80,67,0.209,7,9,10,31,1,24,27,0.889,0.0,1.0,1.0,0.8,0.666667,0.0,55,0,24.8,84,2084,2,53,7,-102,0,1,3,0,70,4552,16,0.229,51,24.2,3,0,0,0,7.0,42,294,0,2,0,25,8,5,0,561,285,0.508,5.8,3264,10,19,60.5,154,0.274,44,3,-74,50,343,213,81,9,4,176,0.54,121,0.241,1060,2011,14,12,0,5,9,17,1,285,-66,526,326,0.62,6.9,3617,19,17,81.0,176,0.335,45,7,-56,31,326,3617,11.1,19,45,7,-56,176,0.54,5,71,503,1981,3.9,20,11,3,-68,121,0.241,9,0,0,1,31,985,219,775,227,69,23,10,125,0.488,78,0.182,962,484,256,0.529,5.4,2635,9,21,57.0,125,0.258,30,3,88,50,346,256,2635,10.3,9,30,3,88,125,0.488,2,428,1846,4.3,7,14,4,-60,78,0.182,16,7,9,17,0,25,29,0.862,0.0,1.0,1.0,0.785714,0.75,0.0,55,0,21.9,74,1619,0,40,5,64,0,2,0,0,58,3811,11,0.19,45,23.6,3,0,1,0,9.6,39,373,0,7,0,37,6,4,1,484,256,0.529,5.4,2635,9,21,57.0,125,0.258,30,3,88,50,346
2,2011-09-11,2011,False,Ravens,35,7,Steelers,False,True,229,87,12,6,187,0.536,79,0.206,1007,2011,15,8,0,7,10,19,3,289,44,596,349,0.586,6.4,3789,22,19,76.4,187,0.314,52,5,-88,27,349,3789,10.9,22,52,5,-88,187,0.536,7,83,384,1503,3.9,5,6,2,48,79,0.206,10,0,0,3,27,950,235,615,210,82,10,4,175,0.568,97,0.199,1018,491,308,0.627,7.4,3629,25,10,93.6,175,0.356,40,7,67,40,294,308,3629,11.8,25,40,7,67,175,0.568,3,487,1831,3.8,11,7,1,-50,97,0.199,6,11,25,40,0,26,29,0.897,0.0,1.0,0.888889,0.875,0.0,0.0,49,0,24.7,55,1357,1,36,2,-103,1,2,0,0,79,5620,40,0.506,38,26.0,0,0,1,0,7.4,43,319,0,4,0,35,26,3,0,491,308,0.627,7.4,3629,25,10,93.6,175,0.356,40,7,67,40,294,209,70,16,10,182,0.501,61,0.183,974,2011,24,14,0,4,5,21,3,248,79,593,363,0.612,6.3,3744,15,21,73.1,182,0.307,35,7,67,48,363,3744,10.3,15,35,7,67,182,0.501,4,91,333,1004,3.0,5,1,0,24,61,0.183,5,0,0,3,48,937,252,629,218,94,5,2,175,0.587,106,0.225,993,479,298,0.622,8.1,3890,22,9,95.2,175,0.365,62,11,56,43,289,298,3890,13.0,22,62,11,56,175,0.587,3,471,1924,4.1,15,16,1,-50,106,0.225,9,15,22,41,2,29,37,0.784,1.0,0.909091,1.0,0.615385,0.5,0.0,53,0,23.5,49,1153,1,32,3,-89,0,2,0,0,86,5213,7,0.081,77,20.0,2,0,1,1,6.1,37,226,0,3,0,38,19,3,1,479,298,0.622,8.1,3890,22,9,95.2,175,0.365,62,11,56,43,289
3,2011-09-11,2011,False,Bears,30,12,Falcons,False,True,216,75,13,6,183,0.504,88,0.228,1002,2011,23,13,0,6,15,21,1,332,-56,582,363,0.624,6.6,3820,14,21,74.4,183,0.314,44,5,-59,34,363,3820,10.5,14,44,5,-59,183,0.504,6,81,386,1441,3.7,14,13,1,45,88,0.228,15,0,0,1,34,888,172,749,195,64,7,2,163,0.591,76,0.184,936,466,276,0.592,7.3,3397,23,21,79.5,163,0.35,42,6,-89,56,382,276,3397,12.3,23,42,6,89,163,0.591,7,414,1616,3.9,10,11,2,-68,76,0.184,1,10,23,37,1,25,30,0.833,1.0,1.0,0.888889,0.571429,0.75,0.0,54,1,25.4,59,1497,0,38,10,79,1,1,1,1,78,5018,16,0.205,59,23.8,1,0,2,0,17.1,33,564,3,9,5,-89,10,0,0,466,276,0.592,7.3,3397,23,21,79.5,163,0.35,42,6,-89,56,382,201,79,17,9,183,0.503,87,0.238,957,2011,14,9,1,4,8,22,2,337,40,560,364,0.65,6.9,3846,23,22,82.2,183,0.327,38,6,-83,31,364,3846,10.6,23,38,6,-83,183,0.503,4,71,366,1694,4.6,9,10,5,-80,87,0.238,8,1,0,2,31,873,179,686,240,112,15,11,200,0.554,111,0.223,1097,577,361,0.626,6.5,3725,28,9,90.8,200,0.347,32,6,46,23,158,361,3725,10.3,28,32,6,-46,200,0.554,3,497,1891,3.8,14,12,1,55,111,0.223,4,14,28,47,2,28,31,0.903,0.0,1.0,0.888889,0.8,1.0,0.0,51,1,26.5,46,1221,1,35,5,-102,0,0,0,0,88,5945,23,0.261,64,21.3,1,0,0,1,12.1,19,230,1,1,1,-55,19,1,0,577,361,0.626,6.5,3725,28,9,90.8,200,0.347,32,6,46,23,158
4,2011-09-11,2011,False,Browns,17,27,Bengals,False,False,224,96,15,11,195,0.631,102,0.202,1041,2011,9,9,1,1,4,19,2,320,-64,507,309,0.61,7.3,3709,26,19,84.8,195,0.385,49,8,-78,29,309,3709,12.0,26,49,8,-78,195,0.631,1,81,505,2070,4.1,7,10,1,55,102,0.202,4,1,0,2,29,1030,257,690,202,75,7,5,154,0.52,87,0.211,927,478,296,0.619,6.7,3203,13,18,75.0,154,0.322,42,6,65,36,214,296,3203,10.8,13,42,6,65,154,0.52,4,413,1646,4.0,13,7,2,-68,87,0.211,13,13,13,29,0,23,28,0.821,1.0,1.0,1.0,0.6,0.0,0.0,48,0,17.0,70,1191,0,25,0,37,0,3,3,0,67,4057,7,0.104,54,17.8,4,0,2,0,10.7,22,235,0,1,1,62,11,4,0,478,296,0.619,6.7,3203,13,18,75.0,154,0.322,42,6,65,36,214,198,75,7,2,170,0.522,97,0.233,957,2011,12,10,1,5,8,16,1,200,-56,514,326,0.634,7.1,3648,21,16,85.2,170,0.331,47,10,54,27,326,3648,11.2,21,47,10,54,170,0.522,5,69,416,1843,4.4,15,17,3,61,97,0.233,8,1,0,1,27,926,294,640,224,87,19,13,206,0.564,85,0.199,1046,590,365,0.619,6.8,3988,26,20,82.4,206,0.349,45,9,-78,28,221,365,3988,10.9,26,45,9,78,206,0.564,4,428,1522,3.6,8,3,1,42,85,0.199,11,8,26,36,2,24,31,0.774,1.0,0.833333,1.0,0.4,0.5,0.0,54,1,21.8,71,1551,0,40,4,60,0,2,0,0,76,4692,8,0.105,64,23.5,3,1,1,2,7.2,36,258,0,2,0,27,21,1,0,590,365,0.619,6.8,3988,26,20,82.4,206,0.349,45,9,-78,28,221


## Saving the DF as parquet (a lighter format that preserves the fields).

In [51]:
final_df.to_parquet('games_stats_nfl.parquet')