In [1]:
import pandas as pd
import os

In [2]:
#Read in csv
csv_file_path = os.path.join("..","..","Resources","nfl-2023-UTC.csv")
nfl_df = pd.read_csv(csv_file_path)

In [3]:
nfl_df.head()

Unnamed: 0,Match Number,Round Number,Date,Location,Home Team,Away Team,Result
0,16,1,08/09/2023 00:20,GEHA Field at Arrowhead Stadium,Kansas City Chiefs,Detroit Lions,
1,8,1,10/09/2023 17:00,Mercedes-Benz Stadium,Atlanta Falcons,Carolina Panthers,
2,9,1,10/09/2023 17:00,M&T Bank Stadium,Baltimore Ravens,Houston Texans,
3,10,1,10/09/2023 17:00,Cleveland Browns Stadium,Cleveland Browns,Cincinnati Bengals,
4,11,1,10/09/2023 17:00,Lucas Oil Stadium,Indianapolis Colts,Jacksonville Jaguars,


In [4]:
#Split date column into date and time
nfl_df[['Date', 'Time']] = nfl_df['Date'].str.split(' ', 1, expand=True)
nfl_df.head()

Unnamed: 0,Match Number,Round Number,Date,Location,Home Team,Away Team,Result,Time
0,16,1,08/09/2023,GEHA Field at Arrowhead Stadium,Kansas City Chiefs,Detroit Lions,,00:20
1,8,1,10/09/2023,Mercedes-Benz Stadium,Atlanta Falcons,Carolina Panthers,,17:00
2,9,1,10/09/2023,M&T Bank Stadium,Baltimore Ravens,Houston Texans,,17:00
3,10,1,10/09/2023,Cleveland Browns Stadium,Cleveland Browns,Cincinnati Bengals,,17:00
4,11,1,10/09/2023,Lucas Oil Stadium,Indianapolis Colts,Jacksonville Jaguars,,17:00


In [5]:
#change date format
nfl_df['Date'] = pd.to_datetime(nfl_df['Date'], format='%d/%m/%Y')
nfl_df['Date'] = nfl_df['Date'].dt.strftime('%m/%d/%Y')
nfl_df.head()

Unnamed: 0,Match Number,Round Number,Date,Location,Home Team,Away Team,Result,Time
0,16,1,09/08/2023,GEHA Field at Arrowhead Stadium,Kansas City Chiefs,Detroit Lions,,00:20
1,8,1,09/10/2023,Mercedes-Benz Stadium,Atlanta Falcons,Carolina Panthers,,17:00
2,9,1,09/10/2023,M&T Bank Stadium,Baltimore Ravens,Houston Texans,,17:00
3,10,1,09/10/2023,Cleveland Browns Stadium,Cleveland Browns,Cincinnati Bengals,,17:00
4,11,1,09/10/2023,Lucas Oil Stadium,Indianapolis Colts,Jacksonville Jaguars,,17:00


In [6]:
# Rename column
new_column_name = 'Week Number'
nfl_df.rename(columns={'Round Number': new_column_name}, inplace=True)
nfl_df.head()

Unnamed: 0,Match Number,Week Number,Date,Location,Home Team,Away Team,Result,Time
0,16,1,09/08/2023,GEHA Field at Arrowhead Stadium,Kansas City Chiefs,Detroit Lions,,00:20
1,8,1,09/10/2023,Mercedes-Benz Stadium,Atlanta Falcons,Carolina Panthers,,17:00
2,9,1,09/10/2023,M&T Bank Stadium,Baltimore Ravens,Houston Texans,,17:00
3,10,1,09/10/2023,Cleveland Browns Stadium,Cleveland Browns,Cincinnati Bengals,,17:00
4,11,1,09/10/2023,Lucas Oil Stadium,Indianapolis Colts,Jacksonville Jaguars,,17:00


In [7]:
#changing time zone to central time zone (CST)
nfl_df['Time'] = pd.to_datetime(nfl_df['Time'], utc=True)
# Convert the time column to CST time zone
nfl_df['CST Gametime'] = nfl_df['Time'].dt.tz_convert('America/Chicago')

# Only show the time in 12 hr format
nfl_df['CST Gametime'] = nfl_df['CST Gametime'].apply(lambda x: x.strftime('%I:%M %p'))

# Print the DataFrame
nfl_df.head()


Unnamed: 0,Match Number,Week Number,Date,Location,Home Team,Away Team,Result,Time,CST Gametime
0,16,1,09/08/2023,GEHA Field at Arrowhead Stadium,Kansas City Chiefs,Detroit Lions,,2023-08-29 00:20:00+00:00,07:20 PM
1,8,1,09/10/2023,Mercedes-Benz Stadium,Atlanta Falcons,Carolina Panthers,,2023-08-29 17:00:00+00:00,12:00 PM
2,9,1,09/10/2023,M&T Bank Stadium,Baltimore Ravens,Houston Texans,,2023-08-29 17:00:00+00:00,12:00 PM
3,10,1,09/10/2023,Cleveland Browns Stadium,Cleveland Browns,Cincinnati Bengals,,2023-08-29 17:00:00+00:00,12:00 PM
4,11,1,09/10/2023,Lucas Oil Stadium,Indianapolis Colts,Jacksonville Jaguars,,2023-08-29 17:00:00+00:00,12:00 PM


In [8]:
#drop old time column
nfl_df = nfl_df.drop(columns=['Time'])
nfl_df.head()

Unnamed: 0,Match Number,Week Number,Date,Location,Home Team,Away Team,Result,CST Gametime
0,16,1,09/08/2023,GEHA Field at Arrowhead Stadium,Kansas City Chiefs,Detroit Lions,,07:20 PM
1,8,1,09/10/2023,Mercedes-Benz Stadium,Atlanta Falcons,Carolina Panthers,,12:00 PM
2,9,1,09/10/2023,M&T Bank Stadium,Baltimore Ravens,Houston Texans,,12:00 PM
3,10,1,09/10/2023,Cleveland Browns Stadium,Cleveland Browns,Cincinnati Bengals,,12:00 PM
4,11,1,09/10/2023,Lucas Oil Stadium,Indianapolis Colts,Jacksonville Jaguars,,12:00 PM


In [9]:
#drop Match Number column
nfl_df = nfl_df.drop(columns=["Match Number"])
nfl_df.head()

Unnamed: 0,Week Number,Date,Location,Home Team,Away Team,Result,CST Gametime
0,1,09/08/2023,GEHA Field at Arrowhead Stadium,Kansas City Chiefs,Detroit Lions,,07:20 PM
1,1,09/10/2023,Mercedes-Benz Stadium,Atlanta Falcons,Carolina Panthers,,12:00 PM
2,1,09/10/2023,M&T Bank Stadium,Baltimore Ravens,Houston Texans,,12:00 PM
3,1,09/10/2023,Cleveland Browns Stadium,Cleveland Browns,Cincinnati Bengals,,12:00 PM
4,1,09/10/2023,Lucas Oil Stadium,Indianapolis Colts,Jacksonville Jaguars,,12:00 PM


In [10]:
team_path = os.path.join("..","..","Resources","team.csv")
team_df = pd.read_csv(team_path)

# Take only nfl teams and reset index

nfl_teams_df = team_df[team_df['league_id'] == 2].reset_index()

nfl_teams_df.head()

Unnamed: 0,index,team_id,team,venue_id,league_id
0,30,31,Las Vegas Raiders,30,2
1,31,32,Kansas City Chiefs,31,2
2,32,33,Dallas Cowboys,32,2
3,33,34,Carolina Panthers,33,2
4,34,35,New Orleans Saints,34,2


In [11]:
# Confirm all team names match with mlb_teams_df
team_nf = []

for i in range(len(nfl_df)):
    home_team = nfl_df.loc[i, 'Home Team']
    away_team = nfl_df.loc[i, 'Away Team']
    
    if home_team not in list(nfl_teams_df['team']):
        if home_team not in team_nf:
            print(f"{home_team} not found. Adding to tracker...")
            team_nf.append(home_team)
    
    if away_team not in list(nfl_teams_df['team']):
        if away_team not in team_nf:
            print(f"{away_team} not found. Adding to tracker...")
            team_nf.append(away_team)
            
print(team_nf)

[]


In [12]:
# Assign each row a 'home_id', 'away_id', and 'venue_id'
nfl_df['home_id'] = ''
nfl_df['away_id'] = ''
nfl_df['venue_id'] = ''

for i in range(len(nfl_df)):
    home_team = nfl_teams_df[nfl_teams_df['team'] == nfl_df.loc[i, 'Home Team']]
    away_team = nfl_teams_df[nfl_teams_df['team'] == nfl_df.loc[i, 'Away Team']]
    
    nfl_df.loc[i, 'home_id'] = int(home_team['team_id'])
    nfl_df.loc[i, 'venue_id'] = int(home_team['venue_id'])
    nfl_df.loc[i, 'away_id'] = int(away_team['team_id'])

nfl_df

Unnamed: 0,Week Number,Date,Location,Home Team,Away Team,Result,CST Gametime,home_id,away_id,venue_id
0,1,09/08/2023,GEHA Field at Arrowhead Stadium,Kansas City Chiefs,Detroit Lions,,07:20 PM,32,39,31
1,1,09/10/2023,Mercedes-Benz Stadium,Atlanta Falcons,Carolina Panthers,,12:00 PM,50,34,49
2,1,09/10/2023,M&T Bank Stadium,Baltimore Ravens,Houston Texans,,12:00 PM,49,54,48
3,1,09/10/2023,Cleveland Browns Stadium,Cleveland Browns,Cincinnati Bengals,,12:00 PM,38,55,37
4,1,09/10/2023,Lucas Oil Stadium,Indianapolis Colts,Jacksonville Jaguars,,12:00 PM,47,61,46
...,...,...,...,...,...,...,...,...,...,...
267,18,01/05/2024,Caesars Superdome,New Orleans Saints,Atlanta Falcons,,01:00 PM,35,50,34
268,18,01/05/2024,MetLife Stadium,New York Giants,Philadelphia Eagles,,01:00 PM,51,46,50
269,18,01/05/2024,Levi's® Stadium,San Francisco 49ers,Los Angeles Rams,,01:00 PM,45,57,44
270,18,01/05/2024,Nissan Stadium,Tennessee Titans,Jacksonville Jaguars,,01:00 PM,53,61,51


In [13]:
nfl_formatted_df = nfl_df[["Date", "CST Gametime", "home_id", "away_id", "venue_id"]]
nfl_formatted_df = nfl_formatted_df.rename(columns={"Date":"event_date",
                                                   "CST Gametime": "event_time_cst"})
nfl_formatted_df.head()

Unnamed: 0,event_date,event_time_cst,home_id,away_id,venue_id
0,09/08/2023,07:20 PM,32,39,31
1,09/10/2023,12:00 PM,50,34,49
2,09/10/2023,12:00 PM,49,54,48
3,09/10/2023,12:00 PM,38,55,37
4,09/10/2023,12:00 PM,47,61,46


In [14]:
output_path = os.path.join("..","..","Resources","nfl_events.csv")
nfl_formatted_df.to_csv(output_path)