<a href="https://colab.research.google.com/github/rafabandoni/nfl-predict/blob/main/notebooks/00_nfl_predict_class_loading_cleaning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd

In [2]:
pd.set_option('display.max_columns', None)

# 00. Loading and Cleaning Data
Steps of this process:
1. Create one big raw dataset and split into train and test: this way we might have a real world scenario: data is gathered and then must be cleaned, but our test dataset might not be cleaned yet - as the to predict dataset won't arrive cleaned either.
2. Clean and document cleaning process on train dataset.
3. Create final dataset for EDA.

## Creating dataframes

In [3]:
PATH = 'https://raw.githubusercontent.com/rafabandoni/nfl-predict/refs/heads/main/data/input/'

In [4]:
score_historical = pd.read_csv(PATH + 'spreadspoke_scores.csv')
score_historical.head()

Unnamed: 0,schedule_date,schedule_season,schedule_week,schedule_playoff,team_home,score_home,score_away,team_away,team_favorite_id,spread_favorite,over_under_line,stadium,stadium_neutral,weather_temperature,weather_wind_mph,weather_humidity,weather_detail
0,9/2/1966,1966,1,False,Miami Dolphins,14,23,Oakland Raiders,,,,Orange Bowl,False,83.0,6.0,71.0,
1,9/3/1966,1966,1,False,Houston Oilers,45,7,Denver Broncos,,,,Rice Stadium,False,81.0,7.0,70.0,
2,9/4/1966,1966,1,False,San Diego Chargers,27,7,Buffalo Bills,,,,Balboa Stadium,False,70.0,7.0,82.0,
3,9/9/1966,1966,2,False,Miami Dolphins,14,19,New York Jets,,,,Orange Bowl,False,82.0,11.0,78.0,
4,9/10/1966,1966,1,False,Green Bay Packers,24,3,Baltimore Colts,,,,Lambeau Field,False,64.0,8.0,62.0,


In [5]:
# Iter over a list so we don't need to call each df individually
data_list = [
  'defense_downs',
  'defense_fumbles',
  'defense_interceptions',
  'defense_passing',
  'defense_receiving',
  'defense_rushing',
  'defense_scoring',
  'defense_tackles',
  'offense_downs',
  'offense_passing',
  'offense_receiving',
  'offense_rushing',
  'offense_scoring',
  'special-teams_field-goals',
  'special-teams_kickoff-returns',
  'special-teams_kickoffs',
  'special-teams_punt-returns',
  'special-teams_punting',
  'special-teams_scoring',
]

In [6]:
dataframe_dict = {}
for item in data_list:
  data = pd.read_csv(PATH + f'{item}.csv')
  dataframe_dict[item] = data

In [7]:
# Unpack dict into each of the dataframe variables
(
  defense_downs,
  defense_fumbles,
  defense_interceptions,
  defense_passing,
  defense_receiving,
  defense_rushing,
  defense_scoring,
  defense_tackles,
  offense_downs,
  offense_passing,
  offense_receiving,
  offense_rushing,
  offense_scoring,
  special_teams_field_goals,
  special_teams_kickoff_returns,
  special_teams_kickoffs,
  special_teams_punt_returns,
  special_teams_punting,
  special_teams_scoring
) = tuple(dataframe_dict.values())

In [8]:
# Checking result
offense_rushing.head()

Unnamed: 0,Team,Att,Rush Yds,YPC,TD,20+,40+,Lng,Rush 1st,Rush 1st%,Rush FUM,year
0,Chiefs,556,2627,4.7,13,15,4,80T,129,23.2,7,2010
1,Jets,534,2374,4.4,14,11,2,53,118,22.1,11,2010
2,Jaguars,512,2395,4.7,14,14,1,74,148,28.9,8,2010
3,Raiders,504,2494,5.0,19,27,6,71,113,22.4,9,2010
4,Falcons,497,1891,3.8,14,12,1,55,111,22.3,4,2010


## Joining dataframes

So we can work with all as one.

In order to do better code and avoid repetitions, let's create a function!

**Note**: In class, let's first create the code and then the function

In [9]:
def get_df_name(df):
  name =[x for x in globals() if globals()[x] is df][0]
  return name

def change_dataframes_columns(dataframes_list):
  keep_names = ['year', 'Team']
  for df in dataframes_list:
    df_name = get_df_name(df)
    new_columns = []
    for column in df.columns:
      if not column in (keep_names):
        new_name = f'{df_name}_{column}'
      else:
        new_name = column
      new_columns.append(new_name)
    df.columns = new_columns

def merge_dataframes(dataframes_list):
  new_dataframe = dataframes_list[0].copy()
  for df in dataframes_list[1:]:
    new_dataframe = new_dataframe.merge(df,
                                        on=['year', 'Team'],
                                        how='left')
  return new_dataframe

In [10]:
defense_dfs = [
  defense_downs,
  defense_fumbles,
  defense_interceptions,
  defense_passing,
  defense_receiving,
  defense_rushing,
  defense_scoring,
  defense_tackles,
]

change_dataframes_columns(defense_dfs)
defense_dataframe = merge_dataframes(defense_dfs)

In [11]:
offense_dfs = [
  offense_downs,
  offense_passing,
  offense_receiving,
  offense_rushing,
  offense_scoring,
]

change_dataframes_columns(offense_dfs)
offense_dataframe = merge_dataframes(offense_dfs)

In [12]:
special_teams_dfs = [
  special_teams_field_goals,
  special_teams_kickoff_returns,
  special_teams_kickoffs,
  special_teams_punt_returns,
  special_teams_punting,
  special_teams_scoring
]

change_dataframes_columns(special_teams_dfs)
special_teams_dataframe = merge_dataframes(special_teams_dfs)

## Bringing all together as one

In [13]:
stats_dataframe = defense_dataframe.merge(offense_dataframe,
                                          on=['year', 'Team'],
                                          how='left')

stats_dataframe = stats_dataframe.merge(special_teams_dataframe,
                                        on=['year', 'Team'],
                                        how='left')

### Translating team names

In [14]:
set([*score_historical['team_home']] + [*score_historical['team_away']])

{'Arizona Cardinals',
 'Atlanta Falcons',
 'Baltimore Colts',
 'Baltimore Ravens',
 'Boston Patriots',
 'Buffalo Bills',
 'Carolina Panthers',
 'Chicago Bears',
 'Cincinnati Bengals',
 'Cleveland Browns',
 'Dallas Cowboys',
 'Denver Broncos',
 'Detroit Lions',
 'Green Bay Packers',
 'Houston Oilers',
 'Houston Texans',
 'Indianapolis Colts',
 'Jacksonville Jaguars',
 'Kansas City Chiefs',
 'Las Vegas Raiders',
 'Los Angeles Chargers',
 'Los Angeles Raiders',
 'Los Angeles Rams',
 'Miami Dolphins',
 'Minnesota Vikings',
 'New England Patriots',
 'New Orleans Saints',
 'New York Giants',
 'New York Jets',
 'Oakland Raiders',
 'Philadelphia Eagles',
 'Phoenix Cardinals',
 'Pittsburgh Steelers',
 'San Diego Chargers',
 'San Francisco 49ers',
 'Seattle Seahawks',
 'St. Louis Cardinals',
 'St. Louis Rams',
 'Tampa Bay Buccaneers',
 'Tennessee Oilers',
 'Tennessee Titans',
 'Washington Commanders',
 'Washington Football Team',
 'Washington Redskins'}

In [15]:
stats_dataframe['Team'].sort_values().unique()

array(['49ers', 'Bears', 'Bengals', 'Bills', 'Broncos', 'Browns',
       'Buccaneers', 'Cardinals', 'Chargers', 'Chiefs', 'Colts',
       'Commanders', 'Cowboys', 'Dolphins', 'Eagles', 'Falcons',
       'FootballTeam', 'Giants', 'Jaguars', 'Jets', 'Lions', 'Niners',
       'Packers', 'Panthers', 'Patriots', 'Raiders', 'Rams', 'Ravens',
       'Redskins', 'Saints', 'Seahawks', 'Steelers', 'Texans', 'Titans',
       'Vikings'], dtype=object)

In [16]:
teams_to_translate = {
    'Niners' : '49ers',
    'Redskins' : 'Commanders',
    'FootballTeam' : 'Commanders'
}

def translate_team_names(team_name, teams_to_translate):
  if team_name in teams_to_translate.keys():
    return teams_to_translate.get(team_name)
  else:
    return team_name

stats_dataframe['Team'] = stats_dataframe.apply(lambda row: translate_team_names(row['Team'], teams_to_translate), axis=1)

In [17]:
# Change Commanders name to get historic
commanders_name = [
    'Washington Commanders',
    'Washington Football Team',
    'Washington Redskins'
]

def change_commanders_name(team_name, commanders_name):
  if team_name in commanders_name:
    return 'Washington Commanders'
  else:
    return team_name

score_historical['team_home'] = score_historical.apply(lambda row: change_commanders_name(row['team_home'], commanders_name), axis=1)
score_historical['team_away'] = score_historical.apply(lambda row: change_commanders_name(row['team_away'], commanders_name), axis=1)

In [18]:
score_historical['team_home'] = score_historical['team_home'].str.split(' ').str[-1]
score_historical['team_away'] = score_historical['team_away'].str.split(' ').str[-1]

### Creating unique dataset

In [19]:
stats_dataframe['year'] = stats_dataframe['year'] + 1 # the last year stats refers to this years game
stats_dataframe.rename(columns={'year' : 'stats_year'}, inplace=True)

In [20]:
final_df = score_historical.merge(stats_dataframe,
                                  left_on=['schedule_season', 'team_home'],
                                  right_on=['stats_year', 'Team'],
                                  how='left').merge(stats_dataframe,
                                                    left_on=['schedule_season', 'team_away'],
                                                    right_on=['stats_year', 'Team'],
                                                    how='left',
                                                    suffixes=('_home_', '_away_'))

In [21]:
final_df.head()

Unnamed: 0,schedule_date,schedule_season,schedule_week,schedule_playoff,team_home,score_home,score_away,team_away,team_favorite_id,spread_favorite,over_under_line,stadium,stadium_neutral,weather_temperature,weather_wind_mph,weather_humidity,weather_detail,Team_home_,defense_downs_3rd Att_home_,defense_downs_3rd Md_home_,defense_downs_4th Att_home_,defense_downs_4th Md_home_,defense_downs_Rec 1st_home_,defense_downs_Rec 1st%_home_,defense_downs_Rush 1st_home_,defense_downs_Rush 1st%_home_,defense_downs_Scrm Plys_home_,stats_year_home_,defense_fumbles_FF_home_,defense_fumbles_FR_home_,defense_fumbles_FR TD_home_,defense_fumbles_Rec FUM_home_,defense_fumbles_Rush FUM_home_,defense_interceptions_INT_home_,defense_interceptions_INT TD_home_,defense_interceptions_INT Yds_home_,defense_interceptions_Lng_home_,defense_passing_Att_home_,defense_passing_Cmp_home_,defense_passing_Cmp %_home_,defense_passing_Yds/Att_home_,defense_passing_Yds_home_,defense_passing_TD_home_,defense_passing_INT_home_,defense_passing_Rate_home_,defense_passing_1st_home_,defense_passing_1st%_home_,defense_passing_20+_home_,defense_passing_40+_home_,defense_passing_Lng_home_,defense_passing_Sck_home_,defense_receiving_Rec_home_,defense_receiving_Yds_home_,defense_receiving_Yds/Rec_home_,defense_receiving_TD_home_,defense_receiving_20+_home_,defense_receiving_40+_home_,defense_receiving_Lng_home_,defense_receiving_Rec 1st_home_,defense_receiving_Rec 1st%_home_,defense_receiving_Rec FUM_home_,defense_receiving_PDef_home_,defense_rushing_Att_home_,defense_rushing_Rush Yds_home_,defense_rushing_YPC_home_,defense_rushing_TD_home_,defense_rushing_20+_home_,defense_rushing_40+_home_,defense_rushing_Lng_home_,defense_rushing_Rush 1st_home_,defense_rushing_Rush 1st%_home_,defense_rushing_Rush FUM_home_,defense_scoring_FR TD_home_,defense_scoring_SFTY_home_,defense_scoring_INT TD_home_,defense_tackles_Sck_home_,defense_tackles_Comb_home_,defense_tackles_Asst_home_,defense_tackles_Solo_home_,offense_downs_3rd Att_home_,offense_downs_3rd Md_home_,offense_downs_4th Att_home_,offense_downs_4th Md_home_,offense_downs_Rec 1st_home_,offense_downs_Rec 1st%_home_,offense_downs_Rush 1st_home_,offense_downs_Rush 1st%_home_,offense_downs_Scrm Plys_home_,offense_passing_Att_home_,offense_passing_Cmp_home_,offense_passing_Cmp %_home_,offense_passing_Yds/Att_home_,offense_passing_Pass Yds_home_,offense_passing_TD_home_,offense_passing_INT_home_,offense_passing_Rate_home_,offense_passing_1st_home_,offense_passing_1st%_home_,offense_passing_20+_home_,offense_passing_40+_home_,offense_passing_Lng_home_,offense_passing_Sck_home_,offense_passing_SckY_home_,offense_receiving_Rec_home_,offense_receiving_Yds_home_,offense_receiving_Yds/Rec_home_,offense_receiving_TD_home_,offense_receiving_20+_home_,offense_receiving_40+_home_,offense_receiving_Lng_home_,offense_receiving_Rec 1st_home_,offense_receiving_Rec 1st%_home_,offense_receiving_Rec FUM_home_,offense_rushing_Att_home_,offense_rushing_Rush Yds_home_,offense_rushing_YPC_home_,offense_rushing_TD_home_,offense_rushing_20+_home_,offense_rushing_40+_home_,offense_rushing_Lng_home_,offense_rushing_Rush 1st_home_,offense_rushing_Rush 1st%_home_,offense_rushing_Rush FUM_home_,offense_scoring_Rsh TD_home_,offense_scoring_Rec TD_home_,offense_scoring_Tot TD_home_,offense_scoring_2-PT_home_,special_teams_field_goals_FGM_home_,special_teams_field_goals_Att_home_,special_teams_field_goals_FG %_home_,special_teams_field_goals_1-19 > A-M_home_,special_teams_field_goals_20-29 > A-M_home_,special_teams_field_goals_30-39 > A-M_home_,special_teams_field_goals_40-49 > A-M_home_,special_teams_field_goals_50-59 > A-M_home_,special_teams_field_goals_60+ > A-M_home_,special_teams_field_goals_Lng_home_,special_teams_field_goals_FG Blk_home_,special_teams_kickoff_returns_Avg_home_,special_teams_kickoff_returns_Ret_home_,special_teams_kickoff_returns_Yds_home_,special_teams_kickoff_returns_KRet TD_home_,special_teams_kickoff_returns_20+_home_,special_teams_kickoff_returns_40+_home_,special_teams_kickoff_returns_Lng_home_,special_teams_kickoff_returns_FC_home_,special_teams_kickoff_returns_FUM_home_,special_teams_kickoff_returns_FG Blk_home_,special_teams_kickoff_returns_XP Blk_home_,special_teams_kickoffs_KO_home_,special_teams_kickoffs_Yds_home_,special_teams_kickoffs_TB_home_,special_teams_kickoffs_TB %_home_,special_teams_kickoffs_Ret_home_,special_teams_kickoffs_Ret Avg_home_,special_teams_kickoffs_OSK_home_,special_teams_kickoffs_OSK Rec_home_,special_teams_kickoffs_OOB_home_,special_teams_kickoffs_TD_home_,special_teams_punt_returns_Avg_home_,special_teams_punt_returns_Ret_home_,special_teams_punt_returns_Yds_home_,special_teams_punt_returns_PRet T_home_,special_teams_punt_returns_20+_home_,special_teams_punt_returns_40+_home_,special_teams_punt_returns_Lng_home_,special_teams_punt_returns_FC_home_,special_teams_punt_returns_FUM_home_,special_teams_punt_returns_P Blk_home_,special_teams_punting_Att_home_,special_teams_punting_Cmp_home_,special_teams_punting_Cmp %_home_,special_teams_punting_Yds/Att_home_,special_teams_punting_Pass Yds_home_,special_teams_punting_TD_home_,special_teams_punting_INT_home_,special_teams_punting_Rate_home_,special_teams_punting_1st_home_,special_teams_punting_1st%_home_,special_teams_punting_20+_home_,special_teams_punting_40+_home_,special_teams_punting_Lng_home_,special_teams_punting_Sck_home_,special_teams_punting_SckY_home_,data_FGM_home_,data_FG %_home_,data_XPM_home_,data_XP Pct_home_,data_KRet TD_home_,data_PRet T_home_,Team_away_,defense_downs_3rd Att_away_,defense_downs_3rd Md_away_,defense_downs_4th Att_away_,defense_downs_4th Md_away_,defense_downs_Rec 1st_away_,defense_downs_Rec 1st%_away_,defense_downs_Rush 1st_away_,defense_downs_Rush 1st%_away_,defense_downs_Scrm Plys_away_,stats_year_away_,defense_fumbles_FF_away_,defense_fumbles_FR_away_,defense_fumbles_FR TD_away_,defense_fumbles_Rec FUM_away_,defense_fumbles_Rush FUM_away_,defense_interceptions_INT_away_,defense_interceptions_INT TD_away_,defense_interceptions_INT Yds_away_,defense_interceptions_Lng_away_,defense_passing_Att_away_,defense_passing_Cmp_away_,defense_passing_Cmp %_away_,defense_passing_Yds/Att_away_,defense_passing_Yds_away_,defense_passing_TD_away_,defense_passing_INT_away_,defense_passing_Rate_away_,defense_passing_1st_away_,defense_passing_1st%_away_,defense_passing_20+_away_,defense_passing_40+_away_,defense_passing_Lng_away_,defense_passing_Sck_away_,defense_receiving_Rec_away_,defense_receiving_Yds_away_,defense_receiving_Yds/Rec_away_,defense_receiving_TD_away_,defense_receiving_20+_away_,defense_receiving_40+_away_,defense_receiving_Lng_away_,defense_receiving_Rec 1st_away_,defense_receiving_Rec 1st%_away_,defense_receiving_Rec FUM_away_,defense_receiving_PDef_away_,defense_rushing_Att_away_,defense_rushing_Rush Yds_away_,defense_rushing_YPC_away_,defense_rushing_TD_away_,defense_rushing_20+_away_,defense_rushing_40+_away_,defense_rushing_Lng_away_,defense_rushing_Rush 1st_away_,defense_rushing_Rush 1st%_away_,defense_rushing_Rush FUM_away_,defense_scoring_FR TD_away_,defense_scoring_SFTY_away_,defense_scoring_INT TD_away_,defense_tackles_Sck_away_,defense_tackles_Comb_away_,defense_tackles_Asst_away_,defense_tackles_Solo_away_,offense_downs_3rd Att_away_,offense_downs_3rd Md_away_,offense_downs_4th Att_away_,offense_downs_4th Md_away_,offense_downs_Rec 1st_away_,offense_downs_Rec 1st%_away_,offense_downs_Rush 1st_away_,offense_downs_Rush 1st%_away_,offense_downs_Scrm Plys_away_,offense_passing_Att_away_,offense_passing_Cmp_away_,offense_passing_Cmp %_away_,offense_passing_Yds/Att_away_,offense_passing_Pass Yds_away_,offense_passing_TD_away_,offense_passing_INT_away_,offense_passing_Rate_away_,offense_passing_1st_away_,offense_passing_1st%_away_,offense_passing_20+_away_,offense_passing_40+_away_,offense_passing_Lng_away_,offense_passing_Sck_away_,offense_passing_SckY_away_,offense_receiving_Rec_away_,offense_receiving_Yds_away_,offense_receiving_Yds/Rec_away_,offense_receiving_TD_away_,offense_receiving_20+_away_,offense_receiving_40+_away_,offense_receiving_Lng_away_,offense_receiving_Rec 1st_away_,offense_receiving_Rec 1st%_away_,offense_receiving_Rec FUM_away_,offense_rushing_Att_away_,offense_rushing_Rush Yds_away_,offense_rushing_YPC_away_,offense_rushing_TD_away_,offense_rushing_20+_away_,offense_rushing_40+_away_,offense_rushing_Lng_away_,offense_rushing_Rush 1st_away_,offense_rushing_Rush 1st%_away_,offense_rushing_Rush FUM_away_,offense_scoring_Rsh TD_away_,offense_scoring_Rec TD_away_,offense_scoring_Tot TD_away_,offense_scoring_2-PT_away_,special_teams_field_goals_FGM_away_,special_teams_field_goals_Att_away_,special_teams_field_goals_FG %_away_,special_teams_field_goals_1-19 > A-M_away_,special_teams_field_goals_20-29 > A-M_away_,special_teams_field_goals_30-39 > A-M_away_,special_teams_field_goals_40-49 > A-M_away_,special_teams_field_goals_50-59 > A-M_away_,special_teams_field_goals_60+ > A-M_away_,special_teams_field_goals_Lng_away_,special_teams_field_goals_FG Blk_away_,special_teams_kickoff_returns_Avg_away_,special_teams_kickoff_returns_Ret_away_,special_teams_kickoff_returns_Yds_away_,special_teams_kickoff_returns_KRet TD_away_,special_teams_kickoff_returns_20+_away_,special_teams_kickoff_returns_40+_away_,special_teams_kickoff_returns_Lng_away_,special_teams_kickoff_returns_FC_away_,special_teams_kickoff_returns_FUM_away_,special_teams_kickoff_returns_FG Blk_away_,special_teams_kickoff_returns_XP Blk_away_,special_teams_kickoffs_KO_away_,special_teams_kickoffs_Yds_away_,special_teams_kickoffs_TB_away_,special_teams_kickoffs_TB %_away_,special_teams_kickoffs_Ret_away_,special_teams_kickoffs_Ret Avg_away_,special_teams_kickoffs_OSK_away_,special_teams_kickoffs_OSK Rec_away_,special_teams_kickoffs_OOB_away_,special_teams_kickoffs_TD_away_,special_teams_punt_returns_Avg_away_,special_teams_punt_returns_Ret_away_,special_teams_punt_returns_Yds_away_,special_teams_punt_returns_PRet T_away_,special_teams_punt_returns_20+_away_,special_teams_punt_returns_40+_away_,special_teams_punt_returns_Lng_away_,special_teams_punt_returns_FC_away_,special_teams_punt_returns_FUM_away_,special_teams_punt_returns_P Blk_away_,special_teams_punting_Att_away_,special_teams_punting_Cmp_away_,special_teams_punting_Cmp %_away_,special_teams_punting_Yds/Att_away_,special_teams_punting_Pass Yds_away_,special_teams_punting_TD_away_,special_teams_punting_INT_away_,special_teams_punting_Rate_away_,special_teams_punting_1st_away_,special_teams_punting_1st%_away_,special_teams_punting_20+_away_,special_teams_punting_40+_away_,special_teams_punting_Lng_away_,special_teams_punting_Sck_away_,special_teams_punting_SckY_away_,data_FGM_away_,data_FG %_away_,data_XPM_away_,data_XP Pct_away_,data_KRet TD_away_,data_PRet T_away_
0,9/2/1966,1966,1,False,Dolphins,14,23,Raiders,,,,Orange Bowl,False,83.0,6.0,71.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,9/3/1966,1966,1,False,Oilers,45,7,Broncos,,,,Rice Stadium,False,81.0,7.0,70.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,9/4/1966,1966,1,False,Chargers,27,7,Bills,,,,Balboa Stadium,False,70.0,7.0,82.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,9/9/1966,1966,2,False,Dolphins,14,19,Jets,,,,Orange Bowl,False,82.0,11.0,78.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,9/10/1966,1966,1,False,Packers,24,3,Colts,,,,Lambeau Field,False,64.0,8.0,62.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


### Spliting data to be used as test
Let's create the test dataset, which should reflect real world and avoid data leackage.

In [22]:
final_df['schedule_season'].unique()

array([1966, 1967, 1968, 1969, 1970, 1971, 1972, 1973, 1974, 1975, 1976,
       1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987,
       1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
       1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009,
       2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020,
       2021, 2022, 2023, 2024])

In [23]:
train_data = final_df[final_df['schedule_season'] < 2024]
test_data = final_df[final_df['schedule_season'] == 2024]

From now on, we might work only with train data. We do this to work our train data as close as possible with the real world.  
When we get our data to predict, we might only translate the names if needed (as done above), and then we need to take care of all of the next processes, so we might create a **pipeline** after all to take care of it also in the test data.

## Cleaning data

## Cleaning data

Now that we already have 3 main datasets, we can start cleaning data! Cleaning data goes through some process, as such:
1. Check null values
2. Drop unused columns
3. Rename columns and/or values if needed
4. Check overall data

In [24]:
# removing data without stats
train_data = train_data[~train_data['Team_home_'].isna()]

### Creating functions

In [25]:
# Cleaning % data
def clean_percent_data(df):
  for column in df.columns:
    if '%' in column:
      df[column] = df[column] / 100

In [26]:
# Remove special characters from columns
def remove_special_char_columns(df):
  for column in df.columns:
    new_name = column.lower().replace(' ','_').replace('%', '_perc')
    df.rename(columns={
        column : new_name
    }, inplace=True)

In [27]:
# Transform turnover in negative data
def negative_turnover_number(turnover_columns, df):
  for column in turnover_columns:
    new_column = []
    for item in df[column]:
      if 'T' in item:
        new_value = item.replace('T', '')
        new_value = int(new_value)
        new_value = new_value * -1
      else:
        new_value = int(item)
      new_column.append(new_value)
    df[column] = new_column

In [28]:
def creating_home_and_away_columns(columns: list) -> list:
  new_named_list = []
  for item in columns:
    new_named_list.append(item + '_home_')
    new_named_list.append(item + '_away_')
  return new_named_list

### Applying functions

In [29]:
turnover_columns = [
    'defense_passing_lng',
    'defense_receiving_lng',
    'defense_rushing_lng',
    'defense_interceptions_lng',
    'offense_passing_lng',
    'offense_receiving_lng',
    'offense_rushing_lng',
    'special_teams_kickoff_returns_lng',
    'special_teams_punt_returns_lng',
    'special_teams_punting_lng'
]

turnover_columns = creating_home_and_away_columns(turnover_columns)

In [30]:
clean_percent_data(train_data)
remove_special_char_columns(train_data)
negative_turnover_number(turnover_columns, train_data)

In [31]:
# Fix columns with A_M (attemps_made)
columns_list = [
    'special_teams_field_goals_1-19_>_a-m',
    'special_teams_field_goals_20-29_>_a-m',
    'special_teams_field_goals_30-39_>_a-m',
    'special_teams_field_goals_40-49_>_a-m',
    'special_teams_field_goals_50-59_>_a-m',
    'special_teams_field_goals_60+_>_a-m',
]
columns_list = creating_home_and_away_columns(columns_list)

# We will transform it in a percent so we don't need to create a new column for each case
for column in columns_list:
  attps_list = train_data[column].str.split('_').str[0]
  matches_list = train_data[column].str.split('_').str[1]

  percent_matches = matches_list.astype('int') / attps_list.astype('int')
  percent_matches.fillna(0, inplace=True)
  train_data[column] = percent_matches
  train_data.rename(columns={
      column : column.replace('a-m', 'percent_a_m')
  }, inplace=True)

In [32]:
# Removing non important columns (trash from the origin)
columns_to_drop = ['data_fgm',
                   'data_fg__perc',
                   'data_xpm',
                   'data_xp_pct',
                   'data_kret_td',
                   'data_pret_t']
columns_to_drop = creating_home_and_away_columns(columns_to_drop)

train_data.drop(columns_to_drop,
                axis=1,
                inplace=True)

### Fixing data types

In [33]:
train_data['schedule_date'] = train_data['schedule_date'].astype('datetime64[ns]')

## Creating final df for next steps (EDA and modeling)

In [34]:
# create our Y, the data we want to predict
def home_winner(score_home, score_away):
  if score_home > score_away:
    return True
  else:
    return False # we will not be working on ties here since they are rare on NFL

train_data['home_winner'] = train_data.apply(lambda row: home_winner(row['score_home'], row['score_away']), axis=1)
test_data['home_winner'] = test_data.apply(lambda row: home_winner(row['score_home'], row['score_away']), axis=1)

  train_data['home_winner'] = train_data.apply(lambda row: home_winner(row['score_home'], row['score_away']), axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_data['home_winner'] = test_data.apply(lambda row: home_winner(row['score_home'], row['score_away']), axis=1)


In [35]:
# droping unused data
train_data.drop(['weather_temperature',
                 'weather_wind_mph',
                 'weather_humidity',
                 'weather_detail'],
                axis=1,
                inplace=True)

In [36]:
# Checking if any null value was left behind
train_data.isna().any().sort_values()

Unnamed: 0,0
schedule_date,False
defense_scoring_sfty_away_,False
defense_scoring_fr_td_away_,False
defense_rushing_rush_fum_away_,False
defense_rushing_rush_1st_perc_away_,False
...,...
offense_rushing_td_home_,False
offense_rushing_ypc_home_,False
offense_rushing_rush_yds_home_,False
offense_rushing_rush_fum_home_,False


This is the DF where we will make our EDA, feature engineering, and modeling :)

In [37]:
train_data.head()

Unnamed: 0,schedule_date,schedule_season,schedule_week,schedule_playoff,team_home,score_home,score_away,team_away,team_favorite_id,spread_favorite,over_under_line,stadium,stadium_neutral,team_home_,defense_downs_3rd_att_home_,defense_downs_3rd_md_home_,defense_downs_4th_att_home_,defense_downs_4th_md_home_,defense_downs_rec_1st_home_,defense_downs_rec_1st_perc_home_,defense_downs_rush_1st_home_,defense_downs_rush_1st_perc_home_,defense_downs_scrm_plys_home_,stats_year_home_,defense_fumbles_ff_home_,defense_fumbles_fr_home_,defense_fumbles_fr_td_home_,defense_fumbles_rec_fum_home_,defense_fumbles_rush_fum_home_,defense_interceptions_int_home_,defense_interceptions_int_td_home_,defense_interceptions_int_yds_home_,defense_interceptions_lng_home_,defense_passing_att_home_,defense_passing_cmp_home_,defense_passing_cmp__perc_home_,defense_passing_yds/att_home_,defense_passing_yds_home_,defense_passing_td_home_,defense_passing_int_home_,defense_passing_rate_home_,defense_passing_1st_home_,defense_passing_1st_perc_home_,defense_passing_20+_home_,defense_passing_40+_home_,defense_passing_lng_home_,defense_passing_sck_home_,defense_receiving_rec_home_,defense_receiving_yds_home_,defense_receiving_yds/rec_home_,defense_receiving_td_home_,defense_receiving_20+_home_,defense_receiving_40+_home_,defense_receiving_lng_home_,defense_receiving_rec_1st_home_,defense_receiving_rec_1st_perc_home_,defense_receiving_rec_fum_home_,defense_receiving_pdef_home_,defense_rushing_att_home_,defense_rushing_rush_yds_home_,defense_rushing_ypc_home_,defense_rushing_td_home_,defense_rushing_20+_home_,defense_rushing_40+_home_,defense_rushing_lng_home_,defense_rushing_rush_1st_home_,defense_rushing_rush_1st_perc_home_,defense_rushing_rush_fum_home_,defense_scoring_fr_td_home_,defense_scoring_sfty_home_,defense_scoring_int_td_home_,defense_tackles_sck_home_,defense_tackles_comb_home_,defense_tackles_asst_home_,defense_tackles_solo_home_,offense_downs_3rd_att_home_,offense_downs_3rd_md_home_,offense_downs_4th_att_home_,offense_downs_4th_md_home_,offense_downs_rec_1st_home_,offense_downs_rec_1st_perc_home_,offense_downs_rush_1st_home_,offense_downs_rush_1st_perc_home_,offense_downs_scrm_plys_home_,offense_passing_att_home_,offense_passing_cmp_home_,offense_passing_cmp__perc_home_,offense_passing_yds/att_home_,offense_passing_pass_yds_home_,offense_passing_td_home_,offense_passing_int_home_,offense_passing_rate_home_,offense_passing_1st_home_,offense_passing_1st_perc_home_,offense_passing_20+_home_,offense_passing_40+_home_,offense_passing_lng_home_,offense_passing_sck_home_,offense_passing_scky_home_,offense_receiving_rec_home_,offense_receiving_yds_home_,offense_receiving_yds/rec_home_,offense_receiving_td_home_,offense_receiving_20+_home_,offense_receiving_40+_home_,offense_receiving_lng_home_,offense_receiving_rec_1st_home_,offense_receiving_rec_1st_perc_home_,offense_receiving_rec_fum_home_,offense_rushing_att_home_,offense_rushing_rush_yds_home_,offense_rushing_ypc_home_,offense_rushing_td_home_,offense_rushing_20+_home_,offense_rushing_40+_home_,offense_rushing_lng_home_,offense_rushing_rush_1st_home_,offense_rushing_rush_1st_perc_home_,offense_rushing_rush_fum_home_,offense_scoring_rsh_td_home_,offense_scoring_rec_td_home_,offense_scoring_tot_td_home_,offense_scoring_2-pt_home_,special_teams_field_goals_fgm_home_,special_teams_field_goals_att_home_,special_teams_field_goals_fg__perc_home_,special_teams_field_goals_1-19_>_percent_a_m_home_,special_teams_field_goals_20-29_>_percent_a_m_home_,special_teams_field_goals_30-39_>_percent_a_m_home_,special_teams_field_goals_40-49_>_percent_a_m_home_,special_teams_field_goals_50-59_>_percent_a_m_home_,special_teams_field_goals_60+_>_percent_a_m_home_,special_teams_field_goals_lng_home_,special_teams_field_goals_fg_blk_home_,special_teams_kickoff_returns_avg_home_,special_teams_kickoff_returns_ret_home_,special_teams_kickoff_returns_yds_home_,special_teams_kickoff_returns_kret_td_home_,special_teams_kickoff_returns_20+_home_,special_teams_kickoff_returns_40+_home_,special_teams_kickoff_returns_lng_home_,special_teams_kickoff_returns_fc_home_,special_teams_kickoff_returns_fum_home_,special_teams_kickoff_returns_fg_blk_home_,special_teams_kickoff_returns_xp_blk_home_,special_teams_kickoffs_ko_home_,special_teams_kickoffs_yds_home_,special_teams_kickoffs_tb_home_,special_teams_kickoffs_tb__perc_home_,special_teams_kickoffs_ret_home_,special_teams_kickoffs_ret_avg_home_,special_teams_kickoffs_osk_home_,special_teams_kickoffs_osk_rec_home_,special_teams_kickoffs_oob_home_,special_teams_kickoffs_td_home_,special_teams_punt_returns_avg_home_,special_teams_punt_returns_ret_home_,special_teams_punt_returns_yds_home_,special_teams_punt_returns_pret_t_home_,special_teams_punt_returns_20+_home_,special_teams_punt_returns_40+_home_,special_teams_punt_returns_lng_home_,special_teams_punt_returns_fc_home_,special_teams_punt_returns_fum_home_,special_teams_punt_returns_p_blk_home_,special_teams_punting_att_home_,special_teams_punting_cmp_home_,special_teams_punting_cmp__perc_home_,special_teams_punting_yds/att_home_,special_teams_punting_pass_yds_home_,special_teams_punting_td_home_,special_teams_punting_int_home_,special_teams_punting_rate_home_,special_teams_punting_1st_home_,special_teams_punting_1st_perc_home_,special_teams_punting_20+_home_,special_teams_punting_40+_home_,special_teams_punting_lng_home_,special_teams_punting_sck_home_,special_teams_punting_scky_home_,team_away_,defense_downs_3rd_att_away_,defense_downs_3rd_md_away_,defense_downs_4th_att_away_,defense_downs_4th_md_away_,defense_downs_rec_1st_away_,defense_downs_rec_1st_perc_away_,defense_downs_rush_1st_away_,defense_downs_rush_1st_perc_away_,defense_downs_scrm_plys_away_,stats_year_away_,defense_fumbles_ff_away_,defense_fumbles_fr_away_,defense_fumbles_fr_td_away_,defense_fumbles_rec_fum_away_,defense_fumbles_rush_fum_away_,defense_interceptions_int_away_,defense_interceptions_int_td_away_,defense_interceptions_int_yds_away_,defense_interceptions_lng_away_,defense_passing_att_away_,defense_passing_cmp_away_,defense_passing_cmp__perc_away_,defense_passing_yds/att_away_,defense_passing_yds_away_,defense_passing_td_away_,defense_passing_int_away_,defense_passing_rate_away_,defense_passing_1st_away_,defense_passing_1st_perc_away_,defense_passing_20+_away_,defense_passing_40+_away_,defense_passing_lng_away_,defense_passing_sck_away_,defense_receiving_rec_away_,defense_receiving_yds_away_,defense_receiving_yds/rec_away_,defense_receiving_td_away_,defense_receiving_20+_away_,defense_receiving_40+_away_,defense_receiving_lng_away_,defense_receiving_rec_1st_away_,defense_receiving_rec_1st_perc_away_,defense_receiving_rec_fum_away_,defense_receiving_pdef_away_,defense_rushing_att_away_,defense_rushing_rush_yds_away_,defense_rushing_ypc_away_,defense_rushing_td_away_,defense_rushing_20+_away_,defense_rushing_40+_away_,defense_rushing_lng_away_,defense_rushing_rush_1st_away_,defense_rushing_rush_1st_perc_away_,defense_rushing_rush_fum_away_,defense_scoring_fr_td_away_,defense_scoring_sfty_away_,defense_scoring_int_td_away_,defense_tackles_sck_away_,defense_tackles_comb_away_,defense_tackles_asst_away_,defense_tackles_solo_away_,offense_downs_3rd_att_away_,offense_downs_3rd_md_away_,offense_downs_4th_att_away_,offense_downs_4th_md_away_,offense_downs_rec_1st_away_,offense_downs_rec_1st_perc_away_,offense_downs_rush_1st_away_,offense_downs_rush_1st_perc_away_,offense_downs_scrm_plys_away_,offense_passing_att_away_,offense_passing_cmp_away_,offense_passing_cmp__perc_away_,offense_passing_yds/att_away_,offense_passing_pass_yds_away_,offense_passing_td_away_,offense_passing_int_away_,offense_passing_rate_away_,offense_passing_1st_away_,offense_passing_1st_perc_away_,offense_passing_20+_away_,offense_passing_40+_away_,offense_passing_lng_away_,offense_passing_sck_away_,offense_passing_scky_away_,offense_receiving_rec_away_,offense_receiving_yds_away_,offense_receiving_yds/rec_away_,offense_receiving_td_away_,offense_receiving_20+_away_,offense_receiving_40+_away_,offense_receiving_lng_away_,offense_receiving_rec_1st_away_,offense_receiving_rec_1st_perc_away_,offense_receiving_rec_fum_away_,offense_rushing_att_away_,offense_rushing_rush_yds_away_,offense_rushing_ypc_away_,offense_rushing_td_away_,offense_rushing_20+_away_,offense_rushing_40+_away_,offense_rushing_lng_away_,offense_rushing_rush_1st_away_,offense_rushing_rush_1st_perc_away_,offense_rushing_rush_fum_away_,offense_scoring_rsh_td_away_,offense_scoring_rec_td_away_,offense_scoring_tot_td_away_,offense_scoring_2-pt_away_,special_teams_field_goals_fgm_away_,special_teams_field_goals_att_away_,special_teams_field_goals_fg__perc_away_,special_teams_field_goals_1-19_>_percent_a_m_away_,special_teams_field_goals_20-29_>_percent_a_m_away_,special_teams_field_goals_30-39_>_percent_a_m_away_,special_teams_field_goals_40-49_>_percent_a_m_away_,special_teams_field_goals_50-59_>_percent_a_m_away_,special_teams_field_goals_60+_>_percent_a_m_away_,special_teams_field_goals_lng_away_,special_teams_field_goals_fg_blk_away_,special_teams_kickoff_returns_avg_away_,special_teams_kickoff_returns_ret_away_,special_teams_kickoff_returns_yds_away_,special_teams_kickoff_returns_kret_td_away_,special_teams_kickoff_returns_20+_away_,special_teams_kickoff_returns_40+_away_,special_teams_kickoff_returns_lng_away_,special_teams_kickoff_returns_fc_away_,special_teams_kickoff_returns_fum_away_,special_teams_kickoff_returns_fg_blk_away_,special_teams_kickoff_returns_xp_blk_away_,special_teams_kickoffs_ko_away_,special_teams_kickoffs_yds_away_,special_teams_kickoffs_tb_away_,special_teams_kickoffs_tb__perc_away_,special_teams_kickoffs_ret_away_,special_teams_kickoffs_ret_avg_away_,special_teams_kickoffs_osk_away_,special_teams_kickoffs_osk_rec_away_,special_teams_kickoffs_oob_away_,special_teams_kickoffs_td_away_,special_teams_punt_returns_avg_away_,special_teams_punt_returns_ret_away_,special_teams_punt_returns_yds_away_,special_teams_punt_returns_pret_t_away_,special_teams_punt_returns_20+_away_,special_teams_punt_returns_40+_away_,special_teams_punt_returns_lng_away_,special_teams_punt_returns_fc_away_,special_teams_punt_returns_fum_away_,special_teams_punt_returns_p_blk_away_,special_teams_punting_att_away_,special_teams_punting_cmp_away_,special_teams_punting_cmp__perc_away_,special_teams_punting_yds/att_away_,special_teams_punting_pass_yds_away_,special_teams_punting_td_away_,special_teams_punting_int_away_,special_teams_punting_rate_away_,special_teams_punting_1st_away_,special_teams_punting_1st_perc_away_,special_teams_punting_20+_away_,special_teams_punting_40+_away_,special_teams_punting_lng_away_,special_teams_punting_sck_away_,special_teams_punting_scky_away_,home_winner
10275,2011-09-08,2011,1,False,Packers,42,34,Saints,GB,-5.0,48.0,Lambeau Field,False,Packers,213.0,77.0,20.0,6.0,163.0,0.551,82.0,0.208,969.0,2011.0,15.0,8.0,1.0,5.0,6.0,24.0,3.0,318.0,64,527.0,296.0,0.562,6.5,3440.0,16.0,24.0,67.2,163.0,0.309,44.0,8.0,-85,47.0,296.0,3440.0,11.6,16.0,44.0,8.0,-85,163.0,0.551,5.0,89.0,395.0,1838.0,4.6,6.0,10.0,1.0,40,82.0,0.208,6.0,1.0,0.0,3.0,47.0,877.0,203.0,676.0,205.0,85.0,13.0,5.0,204.0,0.58,88.0,0.209,1000.0,541.0,352.0,0.651,8.0,4355.0,31.0,13.0,98.9,204.0,0.377,57.0,11.0,86,38.0,231.0,352.0,4355.0,12.4,31.0,57.0,11.0,86,204.0,0.58,9.0,421.0,1606.0,3.8,11.0,3.0,1.0,71,88.0,0.209,4.0,11.0,31.0,46.0,0.0,22.0,28.0,0.786,1.0,0.875,0.8,0.8,0.5,0.0,56.0,2.0,20.2,62.0,1249.0,0.0,31.0,5.0,51,0.0,2.0,0.0,0.0,84.0,5173.0,4.0,0.048,76.0,21.8,1.0,1.0,3.0,0.0,7.9,41.0,325.0,0.0,3.0,2.0,52,19.0,2.0,0.0,541.0,352.0,0.651,8.0,4355.0,31.0,13.0,98.9,204.0,0.377,57.0,11.0,86,38.0,231.0,Saints,203.0,70.0,23.0,15.0,150.0,0.49,94.0,0.223,948.0,2011.0,25.0,16.0,0.0,10.0,8.0,9.0,2.0,196.0,-96,494.0,306.0,0.619,6.8,3353.0,13.0,9.0,83.2,150.0,0.304,42.0,7.0,68,33.0,306.0,3353.0,11.0,13.0,42.0,7.0,68,150.0,0.49,10.0,69.0,421.0,1797.0,4.3,13.0,13.0,3.0,68,94.0,0.223,8.0,0.0,1.0,2.0,33.0,918.0,211.0,673.0,217.0,106.0,11.0,5.0,236.0,0.524,94.0,0.247,1067.0,661.0,450.0,0.681,7.0,4636.0,33.0,22.0,90.8,236.0,0.357,47.0,10.0,-80,26.0,195.0,450.0,4636.0,10.3,33.0,47.0,10.0,-80,236.0,0.524,2.0,380.0,1519.0,4.0,9.0,7.0,2.0,-55,94.0,0.247,7.0,9.0,33.0,44.0,0.0,25.0,31.0,0.806,1.0,0.625,0.846154,0.857143,1.0,0.0,52.0,0.0,22.7,52.0,1181.0,0.0,39.0,0.0,39,0.0,1.0,0.0,1.0,84.0,5481.0,14.0,0.167,66.0,24.1,2.0,0.0,2.0,0.0,7.8,27.0,210.0,0.0,2.0,2.0,72,18.0,3.0,0.0,661.0,450.0,0.681,7.0,4636.0,33.0,22.0,90.8,236.0,0.357,47.0,10.0,-80,26.0,195.0,True
10276,2011-09-11,2011,1,False,Cardinals,28,21,Panthers,ARI,-7.0,37.5,University of Phoenix Stadium,False,Cardinals,233.0,87.0,17.0,9.0,178.0,0.549,123.0,0.234,1092.0,2011.0,13.0,13.0,4.0,4.0,10.0,17.0,3.0,424.0,66,533.0,324.0,0.608,7.3,3881.0,22.0,17.0,83.6,178.0,0.334,55.0,7.0,70,33.0,324.0,3881.0,12.0,22.0,55.0,7.0,70,178.0,0.549,4.0,81.0,526.0,2323.0,4.4,19.0,19.0,1.0,48,123.0,0.234,10.0,4.0,0.0,3.0,33.0,984.0,173.0,797.0,209.0,58.0,18.0,7.0,154.0,0.54,67.0,0.209,931.0,561.0,285.0,0.508,5.8,3264.0,10.0,19.0,60.5,154.0,0.274,44.0,3.0,-74,50.0,343.0,285.0,3264.0,11.4,10.0,44.0,3.0,-74,154.0,0.54,4.0,320.0,1388.0,4.3,9.0,11.0,2.0,80,67.0,0.209,7.0,9.0,10.0,31.0,1.0,24.0,27.0,0.889,0.0,1.0,1.0,0.8,0.666667,0.0,55.0,0.0,24.8,84.0,2084.0,2.0,53.0,7.0,-102,0.0,1.0,3.0,0.0,70.0,4552.0,16.0,0.229,51.0,24.2,3.0,0.0,0.0,0.0,7.0,42.0,294.0,0.0,2.0,0.0,25,8.0,5.0,0.0,561.0,285.0,0.508,5.8,3264.0,10.0,19.0,60.5,154.0,0.274,44.0,3.0,-74,50.0,343.0,Panthers,213.0,81.0,9.0,4.0,176.0,0.54,121.0,0.241,1060.0,2011.0,14.0,12.0,0.0,5.0,9.0,17.0,1.0,285.0,-66,526.0,326.0,0.62,6.9,3617.0,19.0,17.0,81.0,176.0,0.335,45.0,7.0,-56,31.0,326.0,3617.0,11.1,19.0,45.0,7.0,-56,176.0,0.54,5.0,71.0,503.0,1981.0,3.9,20.0,11.0,3.0,-68,121.0,0.241,9.0,0.0,0.0,1.0,31.0,985.0,219.0,775.0,227.0,69.0,23.0,10.0,125.0,0.488,78.0,0.182,962.0,484.0,256.0,0.529,5.4,2635.0,9.0,21.0,57.0,125.0,0.258,30.0,3.0,88,50.0,346.0,256.0,2635.0,10.3,9.0,30.0,3.0,88,125.0,0.488,2.0,428.0,1846.0,4.3,7.0,14.0,4.0,-60,78.0,0.182,16.0,7.0,9.0,17.0,0.0,25.0,29.0,0.862,0.0,1.0,1.0,0.785714,0.75,0.0,55.0,0.0,21.9,74.0,1619.0,0.0,40.0,5.0,64,0.0,2.0,0.0,0.0,58.0,3811.0,11.0,0.19,45.0,23.6,3.0,0.0,1.0,0.0,9.6,39.0,373.0,0.0,7.0,0.0,37,6.0,4.0,1.0,484.0,256.0,0.529,5.4,2635.0,9.0,21.0,57.0,125.0,0.258,30.0,3.0,88,50.0,346.0,True
10277,2011-09-11,2011,1,False,Ravens,35,7,Steelers,BAL,-1.5,37.0,M&T Bank Stadium,False,Ravens,229.0,87.0,12.0,6.0,187.0,0.536,79.0,0.206,1007.0,2011.0,15.0,8.0,0.0,7.0,10.0,19.0,3.0,289.0,44,596.0,349.0,0.586,6.4,3789.0,22.0,19.0,76.4,187.0,0.314,52.0,5.0,-88,27.0,349.0,3789.0,10.9,22.0,52.0,5.0,-88,187.0,0.536,7.0,83.0,384.0,1503.0,3.9,5.0,6.0,2.0,48,79.0,0.206,10.0,0.0,0.0,3.0,27.0,950.0,235.0,615.0,210.0,82.0,10.0,4.0,175.0,0.568,97.0,0.199,1018.0,491.0,308.0,0.627,7.4,3629.0,25.0,10.0,93.6,175.0,0.356,40.0,7.0,67,40.0,294.0,308.0,3629.0,11.8,25.0,40.0,7.0,67,175.0,0.568,3.0,487.0,1831.0,3.8,11.0,7.0,1.0,-50,97.0,0.199,6.0,11.0,25.0,40.0,0.0,26.0,29.0,0.897,0.0,1.0,0.888889,0.875,0.0,0.0,49.0,0.0,24.7,55.0,1357.0,1.0,36.0,2.0,-103,1.0,2.0,0.0,0.0,79.0,5620.0,40.0,0.506,38.0,26.0,0.0,0.0,1.0,0.0,7.4,43.0,319.0,0.0,4.0,0.0,35,26.0,3.0,0.0,491.0,308.0,0.627,7.4,3629.0,25.0,10.0,93.6,175.0,0.356,40.0,7.0,67,40.0,294.0,Steelers,209.0,70.0,16.0,10.0,182.0,0.501,61.0,0.183,974.0,2011.0,24.0,14.0,0.0,4.0,5.0,21.0,3.0,248.0,79,593.0,363.0,0.612,6.3,3744.0,15.0,21.0,73.1,182.0,0.307,35.0,7.0,67,48.0,363.0,3744.0,10.3,15.0,35.0,7.0,67,182.0,0.501,4.0,91.0,333.0,1004.0,3.0,5.0,1.0,0.0,24,61.0,0.183,5.0,0.0,0.0,3.0,48.0,937.0,252.0,629.0,218.0,94.0,5.0,2.0,175.0,0.587,106.0,0.225,993.0,479.0,298.0,0.622,8.1,3890.0,22.0,9.0,95.2,175.0,0.365,62.0,11.0,56,43.0,289.0,298.0,3890.0,13.0,22.0,62.0,11.0,56,175.0,0.587,3.0,471.0,1924.0,4.1,15.0,16.0,1.0,-50,106.0,0.225,9.0,15.0,22.0,41.0,2.0,29.0,37.0,0.784,1.0,0.909091,1.0,0.615385,0.5,0.0,53.0,0.0,23.5,49.0,1153.0,1.0,32.0,3.0,-89,0.0,2.0,0.0,0.0,86.0,5213.0,7.0,0.081,77.0,20.0,2.0,0.0,1.0,1.0,6.1,37.0,226.0,0.0,3.0,0.0,38,19.0,3.0,1.0,479.0,298.0,0.622,8.1,3890.0,22.0,9.0,95.2,175.0,0.365,62.0,11.0,56,43.0,289.0,True
10278,2011-09-11,2011,1,False,Bears,30,12,Falcons,ATL,-1.5,41.0,Soldier Field,False,Bears,216.0,75.0,13.0,6.0,183.0,0.504,88.0,0.228,1002.0,2011.0,23.0,13.0,0.0,6.0,15.0,21.0,1.0,332.0,-56,582.0,363.0,0.624,6.6,3820.0,14.0,21.0,74.4,183.0,0.314,44.0,5.0,-59,34.0,363.0,3820.0,10.5,14.0,44.0,5.0,-59,183.0,0.504,6.0,81.0,386.0,1441.0,3.7,14.0,13.0,1.0,45,88.0,0.228,15.0,0.0,0.0,1.0,34.0,888.0,172.0,749.0,195.0,64.0,7.0,2.0,163.0,0.591,76.0,0.184,936.0,466.0,276.0,0.592,7.3,3397.0,23.0,21.0,79.5,163.0,0.35,42.0,6.0,-89,56.0,382.0,276.0,3397.0,12.3,23.0,42.0,6.0,89,163.0,0.591,7.0,414.0,1616.0,3.9,10.0,11.0,2.0,-68,76.0,0.184,1.0,10.0,23.0,37.0,1.0,25.0,30.0,0.833,1.0,1.0,0.888889,0.571429,0.75,0.0,54.0,1.0,25.4,59.0,1497.0,0.0,38.0,10.0,79,1.0,1.0,1.0,1.0,78.0,5018.0,16.0,0.205,59.0,23.8,1.0,0.0,2.0,0.0,17.1,33.0,564.0,3.0,9.0,5.0,-89,10.0,0.0,0.0,466.0,276.0,0.592,7.3,3397.0,23.0,21.0,79.5,163.0,0.35,42.0,6.0,-89,56.0,382.0,Falcons,201.0,79.0,17.0,9.0,183.0,0.503,87.0,0.238,957.0,2011.0,14.0,9.0,1.0,4.0,8.0,22.0,2.0,337.0,40,560.0,364.0,0.65,6.9,3846.0,23.0,22.0,82.2,183.0,0.327,38.0,6.0,-83,31.0,364.0,3846.0,10.6,23.0,38.0,6.0,-83,183.0,0.503,4.0,71.0,366.0,1694.0,4.6,9.0,10.0,5.0,-80,87.0,0.238,8.0,1.0,0.0,2.0,31.0,873.0,179.0,686.0,240.0,112.0,15.0,11.0,200.0,0.554,111.0,0.223,1097.0,577.0,361.0,0.626,6.5,3725.0,28.0,9.0,90.8,200.0,0.347,32.0,6.0,46,23.0,158.0,361.0,3725.0,10.3,28.0,32.0,6.0,-46,200.0,0.554,3.0,497.0,1891.0,3.8,14.0,12.0,1.0,55,111.0,0.223,4.0,14.0,28.0,47.0,2.0,28.0,31.0,0.903,0.0,1.0,0.888889,0.8,1.0,0.0,51.0,1.0,26.5,46.0,1221.0,1.0,35.0,5.0,-102,0.0,0.0,0.0,0.0,88.0,5945.0,23.0,0.261,64.0,21.3,1.0,0.0,0.0,1.0,12.1,19.0,230.0,1.0,1.0,1.0,-55,19.0,1.0,0.0,577.0,361.0,0.626,6.5,3725.0,28.0,9.0,90.8,200.0,0.347,32.0,6.0,46,23.0,158.0,True
10279,2011-09-11,2011,1,False,Browns,17,27,Bengals,CLE,-7.0,36.5,FirstEnergy Stadium,False,Browns,224.0,96.0,15.0,11.0,195.0,0.631,102.0,0.202,1041.0,2011.0,9.0,9.0,1.0,1.0,4.0,19.0,2.0,320.0,-64,507.0,309.0,0.61,7.3,3709.0,26.0,19.0,84.8,195.0,0.385,49.0,8.0,-78,29.0,309.0,3709.0,12.0,26.0,49.0,8.0,-78,195.0,0.631,1.0,81.0,505.0,2070.0,4.1,7.0,10.0,1.0,55,102.0,0.202,4.0,1.0,0.0,2.0,29.0,1030.0,257.0,690.0,202.0,75.0,7.0,5.0,154.0,0.52,87.0,0.211,927.0,478.0,296.0,0.619,6.7,3203.0,13.0,18.0,75.0,154.0,0.322,42.0,6.0,65,36.0,214.0,296.0,3203.0,10.8,13.0,42.0,6.0,65,154.0,0.52,4.0,413.0,1646.0,4.0,13.0,7.0,2.0,-68,87.0,0.211,13.0,13.0,13.0,29.0,0.0,23.0,28.0,0.821,1.0,1.0,1.0,0.6,0.0,0.0,48.0,0.0,17.0,70.0,1191.0,0.0,25.0,0.0,37,0.0,3.0,3.0,0.0,67.0,4057.0,7.0,0.104,54.0,17.8,4.0,0.0,2.0,0.0,10.7,22.0,235.0,0.0,1.0,1.0,62,11.0,4.0,0.0,478.0,296.0,0.619,6.7,3203.0,13.0,18.0,75.0,154.0,0.322,42.0,6.0,65,36.0,214.0,Bengals,198.0,75.0,7.0,2.0,170.0,0.522,97.0,0.233,957.0,2011.0,12.0,10.0,1.0,5.0,8.0,16.0,1.0,200.0,-56,514.0,326.0,0.634,7.1,3648.0,21.0,16.0,85.2,170.0,0.331,47.0,10.0,54,27.0,326.0,3648.0,11.2,21.0,47.0,10.0,54,170.0,0.522,5.0,69.0,416.0,1843.0,4.4,15.0,17.0,3.0,61,97.0,0.233,8.0,1.0,0.0,1.0,27.0,926.0,294.0,640.0,224.0,87.0,19.0,13.0,206.0,0.564,85.0,0.199,1046.0,590.0,365.0,0.619,6.8,3988.0,26.0,20.0,82.4,206.0,0.349,45.0,9.0,-78,28.0,221.0,365.0,3988.0,10.9,26.0,45.0,9.0,78,206.0,0.564,4.0,428.0,1522.0,3.6,8.0,3.0,1.0,42,85.0,0.199,11.0,8.0,26.0,36.0,2.0,24.0,31.0,0.774,1.0,0.833333,1.0,0.4,0.5,0.0,54.0,1.0,21.8,71.0,1551.0,0.0,40.0,4.0,60,0.0,2.0,0.0,0.0,76.0,4692.0,8.0,0.105,64.0,23.5,3.0,1.0,1.0,2.0,7.2,36.0,258.0,0.0,2.0,0.0,27,21.0,1.0,0.0,590.0,365.0,0.619,6.8,3988.0,26.0,20.0,82.4,206.0,0.349,45.0,9.0,-78,28.0,221.0,False


## Saving the DF as parquet (a lighter format that preserves the fields).

In [38]:
train_data.to_parquet('train_data.parquet')
test_data.to_parquet('test_data.parquet')