In [1]:
import pandas as pd 

In [2]:
#read in csv
csv_file_path = "Resources/mlb-2023-UTC.csv"
mlb_df = pd.read_csv(csv_file_path)
mlb_df.head()

Unnamed: 0,Match Number,Round Number,Date,Location,Home Team,Away Team,Result
0,1,1,30/03/2023 17:05,Nationals Park,Washington Nationals,Atlanta Braves,2 - 7
1,2,1,30/03/2023 17:05,Yankee Stadium,New York Yankees,San Francisco Giants,5 - 0
2,3,1,30/03/2023 18:10,Fenway Park,Boston Red Sox,Baltimore Orioles,9 - 10
3,4,1,30/03/2023 18:20,Wrigley Field,Chicago Cubs,Milwaukee Brewers,4 - 0
4,5,1,30/03/2023 19:10,Tropicana Field,Tampa Bay Rays,Detroit Tigers,4 - 0


In [3]:
#Split date column into date and time
mlb_df[['Date', 'Time']] = mlb_df['Date'].str.split(pat=' ', n=1, expand=True)

mlb_df.head()

Unnamed: 0,Match Number,Round Number,Date,Location,Home Team,Away Team,Result,Time
0,1,1,30/03/2023,Nationals Park,Washington Nationals,Atlanta Braves,2 - 7,17:05
1,2,1,30/03/2023,Yankee Stadium,New York Yankees,San Francisco Giants,5 - 0,17:05
2,3,1,30/03/2023,Fenway Park,Boston Red Sox,Baltimore Orioles,9 - 10,18:10
3,4,1,30/03/2023,Wrigley Field,Chicago Cubs,Milwaukee Brewers,4 - 0,18:20
4,5,1,30/03/2023,Tropicana Field,Tampa Bay Rays,Detroit Tigers,4 - 0,19:10


In [4]:
#change date format
mlb_df['Date'] = pd.to_datetime(mlb_df['Date'], format='%d/%m/%Y')
mlb_df['Date'] = mlb_df['Date'].dt.strftime('%m/%d/%Y')
mlb_df.head()

Unnamed: 0,Match Number,Round Number,Date,Location,Home Team,Away Team,Result,Time
0,1,1,03/30/2023,Nationals Park,Washington Nationals,Atlanta Braves,2 - 7,17:05
1,2,1,03/30/2023,Yankee Stadium,New York Yankees,San Francisco Giants,5 - 0,17:05
2,3,1,03/30/2023,Fenway Park,Boston Red Sox,Baltimore Orioles,9 - 10,18:10
3,4,1,03/30/2023,Wrigley Field,Chicago Cubs,Milwaukee Brewers,4 - 0,18:20
4,5,1,03/30/2023,Tropicana Field,Tampa Bay Rays,Detroit Tigers,4 - 0,19:10


In [5]:
#changing time zone to central time zone (CST)
mlb_df['Time'] = pd.to_datetime(mlb_df['Time'], utc=True)
# Convert the time column to CST time zone
mlb_df['CST Gametime'] = mlb_df['Time'].dt.tz_convert('America/Chicago')

# Only show the time in 12 hr format
mlb_df['CST Gametime'] = mlb_df['CST Gametime'].apply(lambda x: x.strftime('%I:%M %p'))

# Print the DataFrame
mlb_df.head()

Unnamed: 0,Match Number,Round Number,Date,Location,Home Team,Away Team,Result,Time,CST Gametime
0,1,1,03/30/2023,Nationals Park,Washington Nationals,Atlanta Braves,2 - 7,2023-08-07 17:05:00+00:00,12:05 PM
1,2,1,03/30/2023,Yankee Stadium,New York Yankees,San Francisco Giants,5 - 0,2023-08-07 17:05:00+00:00,12:05 PM
2,3,1,03/30/2023,Fenway Park,Boston Red Sox,Baltimore Orioles,9 - 10,2023-08-07 18:10:00+00:00,01:10 PM
3,4,1,03/30/2023,Wrigley Field,Chicago Cubs,Milwaukee Brewers,4 - 0,2023-08-07 18:20:00+00:00,01:20 PM
4,5,1,03/30/2023,Tropicana Field,Tampa Bay Rays,Detroit Tigers,4 - 0,2023-08-07 19:10:00+00:00,02:10 PM


In [6]:
#Drop old time column
mlb_df = mlb_df.drop(columns=['Time'])
mlb_df.head()

Unnamed: 0,Match Number,Round Number,Date,Location,Home Team,Away Team,Result,CST Gametime
0,1,1,03/30/2023,Nationals Park,Washington Nationals,Atlanta Braves,2 - 7,12:05 PM
1,2,1,03/30/2023,Yankee Stadium,New York Yankees,San Francisco Giants,5 - 0,12:05 PM
2,3,1,03/30/2023,Fenway Park,Boston Red Sox,Baltimore Orioles,9 - 10,01:10 PM
3,4,1,03/30/2023,Wrigley Field,Chicago Cubs,Milwaukee Brewers,4 - 0,01:20 PM
4,5,1,03/30/2023,Tropicana Field,Tampa Bay Rays,Detroit Tigers,4 - 0,02:10 PM


In [7]:
# Rename column
new_column_name = 'Week Number'
mlb_df.rename(columns={'Round Number': new_column_name}, inplace=True)
mlb_df.head()

Unnamed: 0,Match Number,Week Number,Date,Location,Home Team,Away Team,Result,CST Gametime
0,1,1,03/30/2023,Nationals Park,Washington Nationals,Atlanta Braves,2 - 7,12:05 PM
1,2,1,03/30/2023,Yankee Stadium,New York Yankees,San Francisco Giants,5 - 0,12:05 PM
2,3,1,03/30/2023,Fenway Park,Boston Red Sox,Baltimore Orioles,9 - 10,01:10 PM
3,4,1,03/30/2023,Wrigley Field,Chicago Cubs,Milwaukee Brewers,4 - 0,01:20 PM
4,5,1,03/30/2023,Tropicana Field,Tampa Bay Rays,Detroit Tigers,4 - 0,02:10 PM


In [8]:
#drop Match Number column
mlb_df = mlb_df.drop(columns=["Match Number"])
mlb_df.head()

Unnamed: 0,Week Number,Date,Location,Home Team,Away Team,Result,CST Gametime
0,1,03/30/2023,Nationals Park,Washington Nationals,Atlanta Braves,2 - 7,12:05 PM
1,1,03/30/2023,Yankee Stadium,New York Yankees,San Francisco Giants,5 - 0,12:05 PM
2,1,03/30/2023,Fenway Park,Boston Red Sox,Baltimore Orioles,9 - 10,01:10 PM
3,1,03/30/2023,Wrigley Field,Chicago Cubs,Milwaukee Brewers,4 - 0,01:20 PM
4,1,03/30/2023,Tropicana Field,Tampa Bay Rays,Detroit Tigers,4 - 0,02:10 PM
