<a href="https://colab.research.google.com/github/rafabandoni/nfl-predict/blob/main/notebooks/00_nfl_predict_class.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [48]:
import pandas as pd

# 00. Loading and Cleaning Data

## Creating dataframes

In [49]:
# Iter over a list so we don't need to call each df individually
data_list = [
  'defense_downs',
  'defense_fumbles',
  'defense_interceptions',
  'defense_passing',
  'defense_receiving',
  'defense_rushing',
  'defense_scoring',
  'defense_tackles',
  'offense_downs',
  'offense_passing',
  'offense_receiving',
  'offense_rushing',
  'offense_scoring',
  'special-teams_field-goals',
  'special-teams_kickoff-returns',
  'special-teams_kickoffs',
  'special-teams_punt-returns',
  'special-teams_punting',
  'special-teams_scoring',
]

In [50]:
dataframe_dict = {}
for item in data_list:
  data = pd.read_csv(f'https://raw.githubusercontent.com/rafabandoni/nfl-predict/refs/heads/main/data/{item}.csv')
  dataframe_dict[item] = data

In [51]:
# Unpack dict into each of the dataframe variables
(
  defense_downs,
  defense_fumbles,
  defense_interceptions,
  defense_passing,
  defense_receiving,
  defense_rushing,
  defense_scoring,
  defense_tackles,
  offense_downs,
  offense_passing,
  offense_receiving,
  offense_rushing,
  offense_scoring,
  special_teams_field_goals,
  special_teams_kickoff_returns,
  special_teams_kickoffs,
  special_teams_punt_returns,
  special_teams_punting,
  special_teams_scoring
) = tuple(dataframe_dict.values())

In [52]:
# Test
offense_rushing.head()

Unnamed: 0,Team,Att,Rush Yds,YPC,TD,20+,40+,Lng,Rush 1st,Rush 1st%,Rush FUM,year
0,Chiefs,556,2627,4.7,13,15,4,80T,129,23.2,7,2010
1,Jets,534,2374,4.4,14,11,2,53,118,22.1,11,2010
2,Jaguars,512,2395,4.7,14,14,1,74,148,28.9,8,2010
3,Raiders,504,2494,5.0,19,27,6,71,113,22.4,9,2010
4,Falcons,497,1891,3.8,14,12,1,55,111,22.3,4,2010


## Cleaning data

### Joining dataframes

So we can work with all as one.

Let's start joining and treating all defense data.

In [53]:
defense_dfs = [
  defense_fumbles,
  defense_interceptions,
  defense_passing,
  defense_receiving,
  defense_rushing,
  defense_scoring,
  defense_tackles,
]

In [54]:
def get_df_name(df):
  name =[x for x in globals() if globals()[x] is df][0]
  return name

In [55]:
# First let's rename all columns so we can keep track of where they came from
def get_df_name(df):
  name =[x for x in globals() if globals()[x] is df][0]
  return name

keep_names = ['year', 'Team']
for df in defense_dfs:
  df_name = get_df_name(df)
  new_columns = []
  for column in df.columns:
    if not column in (keep_names):
      new_name = f'{df_name}_{column}'
    else:
      new_name = column
    new_columns.append(new_name)
  df.columns = new_columns

In [57]:
defense_dataframe = defense_downs.copy()
for df in defense_dfs:
  defense_dataframe = defense_dataframe.merge(df,
                                              on=['year', 'Team'],
                                              how='left')

defense_dataframe.head()

Unnamed: 0,Team,3rd Att,3rd Md,4th Att,4th Md,Rec 1st,Rec 1st%,Rush 1st,Rush 1st%,Scrm Plys,...,defense_rushing_Rush 1st,defense_rushing_Rush 1st%,defense_rushing_Rush FUM,defense_scoring_FR TD,defense_scoring_SFTY,defense_scoring_INT TD,defense_tackles_Sck,defense_tackles_Comb,defense_tackles_Asst,defense_tackles_Solo
0,Lions,198,77,12,5,187,56.7,98,22.1,1005,...,98,22.1,8,1,1,2,44,949,246,743
1,Falcons,201,79,17,9,183,50.3,87,23.8,957,...,87,23.8,8,1,0,2,31,873,179,686
2,Rams,221,74,12,6,191,58.2,88,21.8,1017,...,88,21.8,5,0,0,0,43,861,127,770
3,Jets,219,81,6,4,169,62.8,70,17.2,979,...,70,17.2,12,0,0,3,40,835,181,633
4,Dolphins,226,84,22,8,166,57.6,84,18.8,988,...,84,18.8,4,1,1,0,39,869,156,739


In [63]:
defense_dataframe['Team'].sort_values().unique()

array(['49ers', 'Bears', 'Bengals', 'Bills', 'Broncos', 'Browns',
       'Buccaneers', 'Cardinals', 'Chargers', 'Chiefs', 'Colts',
       'Commanders', 'Cowboys', 'Dolphins', 'Eagles', 'Falcons',
       'FootballTeam', 'Giants', 'Jaguars', 'Jets', 'Lions', 'Niners',
       'Packers', 'Panthers', 'Patriots', 'Raiders', 'Rams', 'Ravens',
       'Redskins', 'Saints', 'Seahawks', 'Steelers', 'Texans', 'Titans',
       'Vikings'], dtype=object)