In [1]:
import pandas as pd
import numpy as np

### Source: Kaggle 
* https://www.kaggle.com/tobycrabtree/nfl-scores-and-betting-data
* downloaded teams, scores, and stadiums data from Kaggle

In [2]:
# Files to Load 
team_data = pd.read_csv('teams.csv')
stadium_data = pd.read_csv('stadiums.csv')
score_data = pd.read_csv('scores.csv')

In [3]:
team_data.drop(['team_id_pfr'],axis='columns',inplace=True)

In [4]:
team_data = team_data.replace(np.nan, '', regex=True)
team_data.head()

Unnamed: 0,team_name,team_name_short,team_id,team_conference,team_division,team_conference_pre2002,team_division_pre2002
0,Arizona Cardinals,Cardinals,ARI,NFC,NFC West,NFC,NFC West
1,Phoenix Cardinals,Cardinals,ARI,NFC,,NFC,NFC East
2,St. Louis Cardinals,Cardinals,ARI,NFC,,NFC,NFC East
3,Atlanta Falcons,Falcons,ATL,NFC,NFC South,NFC,NFC West
4,Baltimore Ravens,Ravens,BAL,AFC,AFC North,AFC,AFC Central


In [5]:
stadium_cols = ['stadium_name', 'stadium_location']
stadium_df = stadium_data[stadium_cols].copy()
stadium_df.head()

Unnamed: 0,stadium_name,stadium_location
0,Alamo Dome,"San Antonio, TX"
1,Alltel Stadium,"Jacksonville, FL"
2,Alumni Stadium,"Chestnut Hill, MA"
3,Anaheim Stadium,"Anaheim, CA"
4,Arrowhead Stadium,"Kansas City, MO"


In [6]:
def get_city(stadium_location):
    return stadium_location.split(",")[0]

def get_state(stadium_location):
    return stadium_location.split(",")[1]

stadium_df['city'] = stadium_df['stadium_location'].apply(lambda x: f"{get_city(x)}")
stadium_df['state'] = stadium_df['stadium_location'].apply(lambda x: f"{get_state(x)}")
stadium_df.head()

Unnamed: 0,stadium_name,stadium_location,city,state
0,Alamo Dome,"San Antonio, TX",San Antonio,TX
1,Alltel Stadium,"Jacksonville, FL",Jacksonville,FL
2,Alumni Stadium,"Chestnut Hill, MA",Chestnut Hill,MA
3,Anaheim Stadium,"Anaheim, CA",Anaheim,CA
4,Arrowhead Stadium,"Kansas City, MO",Kansas City,MO


In [7]:
stadium_df.drop(['stadium_location'],axis='columns',inplace=True)
stadium_df.head()

Unnamed: 0,stadium_name,city,state
0,Alamo Dome,San Antonio,TX
1,Alltel Stadium,Jacksonville,FL
2,Alumni Stadium,Chestnut Hill,MA
3,Anaheim Stadium,Anaheim,CA
4,Arrowhead Stadium,Kansas City,MO


In [8]:
# set index to sb_no
stadium_df.set_index("stadium_name", inplace=True)
stadium_df.tail()

Unnamed: 0_level_0,city,state
stadium_name,Unnamed: 1_level_1,Unnamed: 2_level_1
War Memorial Stadium,Buffalo,NY
Wembley Stadium,London,UK
Wrigley Field,Chicago,IL
Yale Bowl,New Haven,CT
Yankee Stadium,Bronx,NY


In [9]:
score_cols = ['schedule_season', 'schedule_week', 'schedule_playoff', 'team_home', 'score_home', 
              'score_away', 'team_away',  'stadium']
score_df = score_data[score_cols].copy()

score_df

Unnamed: 0,schedule_season,schedule_week,schedule_playoff,team_home,score_home,score_away,team_away,stadium
0,1966,1,False,Miami Dolphins,14,23,Oakland Raiders,Orange Bowl
1,1966,1,False,Houston Oilers,45,7,Denver Broncos,Rice Stadium
2,1966,1,False,San Diego Chargers,27,7,Buffalo Bills,Balboa Stadium
3,1966,2,False,Miami Dolphins,14,19,New York Jets,Orange Bowl
4,1966,1,False,Green Bay Packers,24,3,Baltimore Colts,Lambeau Field
...,...,...,...,...,...,...,...,...
12673,2019,Division,True,Green Bay Packers,28,23,Seattle Seahawks,Lambeau Field
12674,2019,Division,True,Kansas City Chiefs,51,31,Houston Texans,Arrowhead Stadium
12675,2019,Conference,True,Kansas City Chiefs,35,24,Tennessee Titans,Arrowhead Stadium
12676,2019,Conference,True,San Francisco 49ers,37,20,Green Bay Packers,Levi's Stadium


In [10]:
# add the winner column based on scores between home team and opponent
score_df['winner'] = np.where(score_df['score_home'] > score_df['score_away'], score_df['team_home'], score_df['team_away']) 
score_df.head()

Unnamed: 0,schedule_season,schedule_week,schedule_playoff,team_home,score_home,score_away,team_away,stadium,winner
0,1966,1,False,Miami Dolphins,14,23,Oakland Raiders,Orange Bowl,Oakland Raiders
1,1966,1,False,Houston Oilers,45,7,Denver Broncos,Rice Stadium,Houston Oilers
2,1966,1,False,San Diego Chargers,27,7,Buffalo Bills,Balboa Stadium,San Diego Chargers
3,1966,2,False,Miami Dolphins,14,19,New York Jets,Orange Bowl,New York Jets
4,1966,1,False,Green Bay Packers,24,3,Baltimore Colts,Lambeau Field,Green Bay Packers


In [11]:
score_df.dtypes

schedule_season      int64
schedule_week       object
schedule_playoff      bool
team_home           object
score_home           int64
score_away           int64
team_away           object
stadium             object
winner              object
dtype: object