In [1]:
import pandas as pd
import os

In [2]:
# Read in the CSV file
csv_file_path = os.path.join("..","..","Resources","nhl-2023-UTC.csv")
nhl_df = pd.read_csv(csv_file_path)
nhl_df.head()

Unnamed: 0,Match Number,Round Number,Date,Location,Home Team,Away Team,Result
0,1,1,10/10/2023 21:30,Amalie Arena,Tampa Bay Lightning,Nashville Predators,
1,2,1,11/10/2023 00:00,PPG Paints Arena,Pittsburgh Penguins,Chicago Blackhawks,
2,3,1,11/10/2023 02:30,T-Mobile Arena,Vegas Golden Knights,Seattle Kraken,
3,4,1,11/10/2023 23:00,PNC Arena,Carolina Hurricanes,Ottawa Senators,
4,5,1,11/10/2023 23:00,Scotiabank Arena,Toronto Maple Leafs,Montréal Canadiens,


In [3]:
#Split date column into date and time
nhl_df[['Date', 'Time']] = nhl_df['Date'].str.split(pat=' ', n=1, expand=True)

nhl_df.head()

Unnamed: 0,Match Number,Round Number,Date,Location,Home Team,Away Team,Result,Time
0,1,1,10/10/2023,Amalie Arena,Tampa Bay Lightning,Nashville Predators,,21:30
1,2,1,11/10/2023,PPG Paints Arena,Pittsburgh Penguins,Chicago Blackhawks,,00:00
2,3,1,11/10/2023,T-Mobile Arena,Vegas Golden Knights,Seattle Kraken,,02:30
3,4,1,11/10/2023,PNC Arena,Carolina Hurricanes,Ottawa Senators,,23:00
4,5,1,11/10/2023,Scotiabank Arena,Toronto Maple Leafs,Montréal Canadiens,,23:00


In [4]:
#change date format
nhl_df['Date'] = pd.to_datetime(nhl_df['Date'], format='%d/%m/%Y')
nhl_df['Date'] = nhl_df['Date'].dt.strftime('%m/%d/%Y')
nhl_df.head()

Unnamed: 0,Match Number,Round Number,Date,Location,Home Team,Away Team,Result,Time
0,1,1,10/10/2023,Amalie Arena,Tampa Bay Lightning,Nashville Predators,,21:30
1,2,1,10/11/2023,PPG Paints Arena,Pittsburgh Penguins,Chicago Blackhawks,,00:00
2,3,1,10/11/2023,T-Mobile Arena,Vegas Golden Knights,Seattle Kraken,,02:30
3,4,1,10/11/2023,PNC Arena,Carolina Hurricanes,Ottawa Senators,,23:00
4,5,1,10/11/2023,Scotiabank Arena,Toronto Maple Leafs,Montréal Canadiens,,23:00


In [5]:
#changing time zone to central time zone (CST)
nhl_df['Time'] = pd.to_datetime(nhl_df['Time'], utc=True)
# Convert the time column to CST time zone
nhl_df['CST Gametime'] = nhl_df['Time'].dt.tz_convert('America/Chicago')

# Only show the time in 12 hr format
nhl_df['CST Gametime'] = nhl_df['CST Gametime'].apply(lambda x: x.strftime('%I:%M %p'))

# Print the DataFrame
nhl_df.head()

Unnamed: 0,Match Number,Round Number,Date,Location,Home Team,Away Team,Result,Time,CST Gametime
0,1,1,10/10/2023,Amalie Arena,Tampa Bay Lightning,Nashville Predators,,2023-08-29 21:30:00+00:00,04:30 PM
1,2,1,10/11/2023,PPG Paints Arena,Pittsburgh Penguins,Chicago Blackhawks,,2023-08-29 00:00:00+00:00,07:00 PM
2,3,1,10/11/2023,T-Mobile Arena,Vegas Golden Knights,Seattle Kraken,,2023-08-29 02:30:00+00:00,09:30 PM
3,4,1,10/11/2023,PNC Arena,Carolina Hurricanes,Ottawa Senators,,2023-08-29 23:00:00+00:00,06:00 PM
4,5,1,10/11/2023,Scotiabank Arena,Toronto Maple Leafs,Montréal Canadiens,,2023-08-29 23:00:00+00:00,06:00 PM


In [6]:
#Drop old time column
nhl_df = nhl_df.drop(columns=['Time'])
nhl_df.head()

Unnamed: 0,Match Number,Round Number,Date,Location,Home Team,Away Team,Result,CST Gametime
0,1,1,10/10/2023,Amalie Arena,Tampa Bay Lightning,Nashville Predators,,04:30 PM
1,2,1,10/11/2023,PPG Paints Arena,Pittsburgh Penguins,Chicago Blackhawks,,07:00 PM
2,3,1,10/11/2023,T-Mobile Arena,Vegas Golden Knights,Seattle Kraken,,09:30 PM
3,4,1,10/11/2023,PNC Arena,Carolina Hurricanes,Ottawa Senators,,06:00 PM
4,5,1,10/11/2023,Scotiabank Arena,Toronto Maple Leafs,Montréal Canadiens,,06:00 PM


In [7]:
nhl_df = nhl_df.drop(columns=["Match Number"])
nhl_df.head()

Unnamed: 0,Round Number,Date,Location,Home Team,Away Team,Result,CST Gametime
0,1,10/10/2023,Amalie Arena,Tampa Bay Lightning,Nashville Predators,,04:30 PM
1,1,10/11/2023,PPG Paints Arena,Pittsburgh Penguins,Chicago Blackhawks,,07:00 PM
2,1,10/11/2023,T-Mobile Arena,Vegas Golden Knights,Seattle Kraken,,09:30 PM
3,1,10/11/2023,PNC Arena,Carolina Hurricanes,Ottawa Senators,,06:00 PM
4,1,10/11/2023,Scotiabank Arena,Toronto Maple Leafs,Montréal Canadiens,,06:00 PM


In [8]:
# Rename column
new_column_name = 'Week Number'
nhl_df.rename(columns={'Round Number': new_column_name}, inplace=True)
nhl_df.head()

Unnamed: 0,Week Number,Date,Location,Home Team,Away Team,Result,CST Gametime
0,1,10/10/2023,Amalie Arena,Tampa Bay Lightning,Nashville Predators,,04:30 PM
1,1,10/11/2023,PPG Paints Arena,Pittsburgh Penguins,Chicago Blackhawks,,07:00 PM
2,1,10/11/2023,T-Mobile Arena,Vegas Golden Knights,Seattle Kraken,,09:30 PM
3,1,10/11/2023,PNC Arena,Carolina Hurricanes,Ottawa Senators,,06:00 PM
4,1,10/11/2023,Scotiabank Arena,Toronto Maple Leafs,Montréal Canadiens,,06:00 PM


In [9]:
team_path = os.path.join("..","..","Resources","team.csv")
team_df = pd.read_csv(team_path)

# Take only nfl teams and reset index

nhl_teams_df = team_df[team_df['league_id'] == 3].reset_index()

nhl_teams_df.head()

Unnamed: 0,index,team_id,team,venue_id,league_id
0,62,63,Tampa Bay Lightning,60,3
1,63,64,Dallas Stars,1,3
2,64,65,Colorado Avalanche,4,3
3,65,66,Montréal Canadiens,61,3
4,66,67,Nashville Predators,62,3


In [10]:
# Confirm all team names match with mlb_teams_df
team_nf = []

for i in range(len(nhl_df)):
    home_team = nhl_df.loc[i, 'Home Team']
    away_team = nhl_df.loc[i, 'Away Team']
    
    if home_team not in list(nhl_teams_df['team']):
        if home_team not in team_nf:
            print(f"{home_team} not found. Adding to tracker...")
            team_nf.append(home_team)
    
    if away_team not in list(nhl_teams_df['team']):
        if away_team not in team_nf:
            print(f"{away_team} not found. Adding to tracker...")
            team_nf.append(away_team)
            
print(team_nf)

[]


In [11]:
# Assign each row a 'home_id', 'away_id', and 'venue_id'
nhl_df['home_id'] = ''
nhl_df['away_id'] = ''
nhl_df['venue_id'] = ''

for i in range(len(nhl_df)):
    home_team = nhl_teams_df[nhl_teams_df['team'] == nhl_df.loc[i, 'Home Team']]
    away_team = nhl_teams_df[nhl_teams_df['team'] == nhl_df.loc[i, 'Away Team']]
    
    nhl_df.loc[i, 'home_id'] = int(home_team['team_id'])
    nhl_df.loc[i, 'venue_id'] = int(home_team['venue_id'])
    nhl_df.loc[i, 'away_id'] = int(away_team['team_id'])

nhl_df

Unnamed: 0,Week Number,Date,Location,Home Team,Away Team,Result,CST Gametime,home_id,away_id,venue_id
0,1,10/10/2023,Amalie Arena,Tampa Bay Lightning,Nashville Predators,,04:30 PM,63,67,60
1,1,10/11/2023,PPG Paints Arena,Pittsburgh Penguins,Chicago Blackhawks,,07:00 PM,82,92,73
2,1,10/11/2023,T-Mobile Arena,Vegas Golden Knights,Seattle Kraken,,09:30 PM,90,71,79
3,1,10/11/2023,PNC Arena,Carolina Hurricanes,Ottawa Senators,,06:00 PM,81,69,72
4,1,10/11/2023,Scotiabank Arena,Toronto Maple Leafs,Montréal Canadiens,,06:00 PM,87,66,20
...,...,...,...,...,...,...,...,...,...,...
1307,26,04/19/2024,Canada Life Centre,Winnipeg Jets,Vancouver Canucks,,07:00 PM,68,84,63
1308,26,04/19/2024,Scotiabank Saddledome,Calgary Flames,San Jose Sharks,,08:00 PM,88,86,78
1309,26,04/19/2024,Ball Arena,Colorado Avalanche,Edmonton Oilers,,08:30 PM,65,85,4
1310,26,04/19/2024,T-Mobile Arena,Vegas Golden Knights,Anaheim Ducks,,09:00 PM,90,76,79


In [12]:
nhl_formatted_df = nhl_df[["Date", "CST Gametime", "home_id", "away_id", "venue_id"]]
nhl_formatted_df = nhl_formatted_df.rename(columns={"Date":"event_date",
                                                   "CST Gametime": "event_time_cst"})
nhl_formatted_df.head()

Unnamed: 0,event_date,event_time_cst,home_id,away_id,venue_id
0,10/10/2023,04:30 PM,63,67,60
1,10/11/2023,07:00 PM,82,92,73
2,10/11/2023,09:30 PM,90,71,79
3,10/11/2023,06:00 PM,81,69,72
4,10/11/2023,06:00 PM,87,66,20


In [13]:
output_path = os.path.join("..","..","Resources","nhl_events.csv")
nhl_formatted_df.to_csv(output_path)