In [150]:
import pandas as pd

In [151]:
glogs = pd.read_csv('https://raw.githubusercontent.com/tmarchok1/DS440_project/refs/heads/main/game_logs.csv')

# Convert date column to datetime, filters out dates before 2005 
glogs['date'] = pd.to_datetime(glogs['date'], format=r'%Y%m%d')
glogs = glogs[glogs['date'].dt.year >= 2005]
glogs['year'] = glogs['date'].dt.year

# Specify columns to keep for analysis
glogs = glogs[['date', 'year', 'day_of_week', 'v_name', 'v_league', 'v_game_number', 'h_name', 'h_league', 
               'h_game_number', 'v_score', 'h_score', 'day_night', 'park_id', 'attendance']]

# Remove international ballparks that were only used for one random game
glogs = glogs[~glogs['park_id'].isin(['SYD01', 'TOK01', "SJU01", 'FTB01', 'LBV01'])]

# Create week number variable
glogs['week'] = glogs['h_game_number']//7+1
glogs = glogs[glogs['attendance'] > 0]

# Change day/night column to binary variable
glogs['day_night'] = glogs['day_night'].map({'D': 0, 'N': 1})

glogs

Unnamed: 0,date,year,day_of_week,v_name,v_league,v_game_number,h_name,h_league,h_game_number,v_score,h_score,day_night,park_id,attendance,week
0,2005-04-03,2005,Sun,BOS,AL,1,NYA,AL,1,2,9,1,NYC16,54818.0,1
1,2005-04-04,2005,Mon,OAK,AL,1,BAL,AL,1,0,4,0,BAL12,48271.0,1
2,2005-04-04,2005,Mon,CLE,AL,1,CHA,AL,1,0,1,0,CHI12,38141.0,1
3,2005-04-04,2005,Mon,KCA,AL,1,DET,AL,1,2,11,0,DET05,44105.0,1
4,2005-04-04,2005,Mon,MIN,AL,1,SEA,AL,1,1,5,0,SEA03,46249.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29151,2016-10-02,2016,Sun,MIL,NL,162,COL,NL,162,6,4,0,DEN02,27762.0,24
29152,2016-10-02,2016,Sun,NYN,NL,162,PHI,NL,162,2,5,0,PHI13,36935.0,24
29153,2016-10-02,2016,Sun,LAN,NL,162,SFN,NL,162,1,7,0,SFO03,41445.0,24
29154,2016-10-02,2016,Sun,PIT,NL,162,SLN,NL,162,4,10,0,STL10,44615.0,24


In [152]:
# Rename stadium names in park_id column
# STL09 = Busch Stadium II
# STL10 = Busch Stadium III
# NYC16 = Old Yankee Stadium
# NYC17 = Shea Stadium
# NYC20 = Citi Field
# NYC21 = New Yankee Stadium
# WAS10 = RFK Stadium
# WAS11 = Nationals Park
# MIA01 = Sun Life Stadium
# MIA02 = Marlins Park
# MIN03 = Metrodome
# MIN04 = Target Field
glogs['park_id'] = glogs['park_id'].replace({'ANA01': 'Angel Stadium',
                                             'ARL02': 'Globe Life Field',
                                             'ATL02': 'Turner Field',
                                             'BAL12': 'Camden Yards',
                                             'BOS07': 'Fenway Park',
                                             'CHI11': 'Wrigley Field',
                                             'CHI12': 'US Cellular Field',
                                             'CIN09': 'Great American Ball Park',
                                             'CLE08': 'Progressive Field',
                                             'DEN02': 'Coors Field',
                                             'DET05': 'Comerica Park',
                                             'HOU03': 'Minute Maid Park',
                                             'KAN06': 'Kauffman Stadium',
                                             'LOS03': 'Dodger Stadium',
                                             'MIA01': 'Sun Life Stadium',
                                             'MIA02': 'Marlins Park',
                                             'MIL06': 'Miller Park',
                                             'MIN03': 'Metrodome',
                                             'MIN04': 'Target Field',
                                             'NYC16': 'Old Yankee Stadium',
                                             'NYC17': 'Shea Stadium',
                                             'NYC20': 'Citi Field',
                                             'NYC21': 'New Yankee Stadium',
                                             'OAK01': 'Coliseum',
                                             'PHI13': 'Citizens Bank Park',
                                             'PHO01': 'Chase Field',
                                             'PIT08': 'PNC Park',
                                             'SAN02': 'Petco Park',
                                             'SEA03': 'Safeco Field',
                                             'SFO03': 'AT&T Park',
                                             'STL09': 'Busch Stadium II',
                                             'STL10': 'Busch Stadium III',
                                             'STP01': 'Tropicana Field',
                                             'TOR02': 'Rogers Centre',
                                             'WAS10': 'RFK Stadium',
                                             'WAS11': 'Nationals Park',
                                             })
glogs.head()

Unnamed: 0,date,year,day_of_week,v_name,v_league,v_game_number,h_name,h_league,h_game_number,v_score,h_score,day_night,park_id,attendance,week
0,2005-04-03,2005,Sun,BOS,AL,1,NYA,AL,1,2,9,1,Old Yankee Stadium,54818.0,1
1,2005-04-04,2005,Mon,OAK,AL,1,BAL,AL,1,0,4,0,Camden Yards,48271.0,1
2,2005-04-04,2005,Mon,CLE,AL,1,CHA,AL,1,0,1,0,US Cellular Field,38141.0,1
3,2005-04-04,2005,Mon,KCA,AL,1,DET,AL,1,2,11,0,Comerica Park,44105.0,1
4,2005-04-04,2005,Mon,MIN,AL,1,SEA,AL,1,1,5,0,Safeco Field,46249.0,1


In [153]:
# Rename team abbreviations for consistency with conventional abbreviations
glogs['v_name'] = glogs['v_name'].replace({'ANA':'LAA',
                         'CHA':'CHW',
                         'CHN':'CHC',
                         'FLO':'MIA',
                         'KCA':'KC',
                         'LAN':'LAD',
                         'NYA':'NYY',
                         'NYN':'NYM',
                         'SDN':'SD',
                         'SFN':'SF',
                         'SLN':'STL',
                         'TBA':'TB',
                         'WAS':'WSH'})
glogs['h_name'] = glogs['h_name'].replace({'ANA':'LAA',
                         'CHA':'CHW',
                         'CHN':'CHC',
                         'FLO':'MIA',
                         'KCA':'KC',
                         'LAN':'LAD',
                         'NYA':'NYY',
                         'NYN':'NYM',
                         'SDN':'SD',
                         'SFN':'SF',
                         'SLN':'STL',
                         'TBA':'TB',
                         'WAS':'WSH'})
glogs['h_name'].sort_values().unique()

array(['ARI', 'ATL', 'BAL', 'BOS', 'CHC', 'CHW', 'CIN', 'CLE', 'COL',
       'DET', 'HOU', 'KC', 'LAA', 'LAD', 'MIA', 'MIL', 'MIN', 'NYM',
       'NYY', 'OAK', 'PHI', 'PIT', 'SD', 'SEA', 'SF', 'STL', 'TB', 'TEX',
       'TOR', 'WSH'], dtype=object)

In [154]:
# Add column 'location'
glogs['location'] = glogs['h_name'].map({'ARI': 'Arizona', 'ATL': 'Atlanta', 'BAL': 'Baltimore', 'BOS': 'Boston', 'CHC': 'Chicago',
                                         'CHW': 'Chicago', 'CIN': 'Cincinatti', 'CLE': 'Cleveland', 'COL': 'Colorado', 'DET': 'Detroit',
                                         'HOU': 'Houston', 'KC': 'Kansas City', 'LAA': 'Los Angeles', 'LAD': 'Los Angeles', 'MIA': 'Miami',
                                         'MIL': 'Miluakee', 'MIN': 'Minnesota', 'NYM': 'New York', 'NYY': 'New York', 'OAK': 'Oakland',
                                         'PHI': 'Philadelphia', 'PIT': 'Pittsburgh', 'SD': 'San Diego', 'SEA': 'Seattle',
                                         'SF': 'San Francisco', 'STL': 'St. Louis', 'TB': 'Tampa Bay', 'TEX': 'Texas', 'TOR': 'Toronto',
                                         'WSH': 'Washington, D.C.'
})
glogs.head()

Unnamed: 0,date,year,day_of_week,v_name,v_league,v_game_number,h_name,h_league,h_game_number,v_score,h_score,day_night,park_id,attendance,week,location
0,2005-04-03,2005,Sun,BOS,AL,1,NYY,AL,1,2,9,1,Old Yankee Stadium,54818.0,1,New York
1,2005-04-04,2005,Mon,OAK,AL,1,BAL,AL,1,0,4,0,Camden Yards,48271.0,1,Baltimore
2,2005-04-04,2005,Mon,CLE,AL,1,CHW,AL,1,0,1,0,US Cellular Field,38141.0,1,Chicago
3,2005-04-04,2005,Mon,KC,AL,1,DET,AL,1,2,11,0,Comerica Park,44105.0,1,Detroit
4,2005-04-04,2005,Mon,MIN,AL,1,SEA,AL,1,1,5,0,Safeco Field,46249.0,1,Seattle


In [155]:
# Read in weather data
weather_ATL = pd.read_csv('https://raw.githubusercontent.com/tmarchok1/DS440_project/refs/heads/main/weather_files/weather_ATL.csv')
weather_ATL['location'] = 'Atlanta'
weather_ATL['h_name'] = 'ATL'

weather_BAL = pd.read_csv('https://raw.githubusercontent.com/tmarchok1/DS440_project/refs/heads/main/weather_files/weather_BAL.csv')
weather_BAL['location'] = 'Baltimore'
weather_BAL['h_name'] = 'BAL'

weather_BOS = pd.read_csv('https://raw.githubusercontent.com/tmarchok1/DS440_project/refs/heads/main/weather_files/weather_BOS.csv')
weather_BOS['location'] = 'Boston'
weather_BOS['h_name'] = 'BOS'

weather_CHC = pd.read_csv('https://raw.githubusercontent.com/tmarchok1/DS440_project/refs/heads/main/weather_files/weather_CHC.csv')
weather_CHC['location'] = 'Chicago'
weather_CHC['h_name'] = 'CHC'

weather_CHW = pd.read_csv('https://raw.githubusercontent.com/tmarchok1/DS440_project/refs/heads/main/weather_files/weather_CHW.csv')
weather_CHW['location'] = 'Chicago'
weather_CHW['h_name'] = 'CHW'

weather_CIN = pd.read_csv('https://raw.githubusercontent.com/tmarchok1/DS440_project/refs/heads/main/weather_files/weather_CIN.csv')
weather_CIN['location'] = 'Cincinatti'
weather_CIN['h_name'] = 'CIN'

weather_CLE = pd.read_csv('https://raw.githubusercontent.com/tmarchok1/DS440_project/refs/heads/main/weather_files/weather_CLE.csv')
weather_CLE['location'] = 'Cleveland'
weather_CLE['h_name'] = 'CLE'

weather_COL = pd.read_csv('https://raw.githubusercontent.com/tmarchok1/DS440_project/refs/heads/main/weather_files/weather_COL.csv')
weather_COL['location'] = 'Colorado'
weather_COL['h_name'] = 'COL'

weather_DET = pd.read_csv('https://raw.githubusercontent.com/tmarchok1/DS440_project/refs/heads/main/weather_files/weather_DET.csv')
weather_DET['location'] = 'Detroit'
weather_DET['h_name'] = 'DET'

weather_HOU = pd.read_csv('https://raw.githubusercontent.com/tmarchok1/DS440_project/refs/heads/main/weather_files/weather_HOU.csv')
weather_HOU['location'] = 'Houston'
weather_HOU['h_name'] = 'HOU'

weather_KC = pd.read_csv('https://raw.githubusercontent.com/tmarchok1/DS440_project/refs/heads/main/weather_files/weather_KC.csv')
weather_KC['location'] = 'Kansas City'
weather_KC['h_name'] = 'KC'

weather_LAA = pd.read_csv('https://raw.githubusercontent.com/tmarchok1/DS440_project/refs/heads/main/weather_files/weather_LAA.csv')
weather_LAA['location'] = 'Los Angeles'
weather_LAA['h_name'] = 'LAA'

weather_LAD = pd.read_csv('https://raw.githubusercontent.com/tmarchok1/DS440_project/refs/heads/main/weather_files/weather_LAD.csv')
weather_LAD['location'] = 'Los Angeles'
weather_LAD['h_name'] = 'LAD'

weather_MIA = pd.read_csv('https://raw.githubusercontent.com/tmarchok1/DS440_project/refs/heads/main/weather_files/weather_MIA.csv')
weather_MIA['location'] = 'Miami'
weather_MIA['h_name'] = 'MIA'

weather_NYM = pd.read_csv('https://raw.githubusercontent.com/tmarchok1/DS440_project/refs/heads/main/weather_files/weather_NYM.csv')
weather_NYM['location'] = 'New York'
weather_NYM['h_name'] = 'NYM'

weather_NYY = pd.read_csv('https://raw.githubusercontent.com/tmarchok1/DS440_project/refs/heads/main/weather_files/weather_NYY.csv')
weather_NYY['location'] = 'New York'
weather_NYY['h_name'] = 'NYY'

weather_OAK = pd.read_csv('https://raw.githubusercontent.com/tmarchok1/DS440_project/refs/heads/main/weather_files/weather_OAK.csv')
weather_OAK['location'] = 'Oakland'
weather_OAK['h_name'] = 'OAK'

weather_PHI = pd.read_csv('https://raw.githubusercontent.com/tmarchok1/DS440_project/refs/heads/main/weather_files/weather_PHI.csv')
weather_PHI['location'] = 'Philadelphia'
weather_PHI['h_name'] = 'PHI'

weather_PIT = pd.read_csv('https://raw.githubusercontent.com/tmarchok1/DS440_project/refs/heads/main/weather_files/weather_PIT.csv')
weather_PIT['location'] = 'Pittsburgh'
weather_PIT['h_name'] = 'PIT'

weather_SD = pd.read_csv('https://raw.githubusercontent.com/tmarchok1/DS440_project/refs/heads/main/weather_files/weather_SD.csv')
weather_SD['location'] = 'San Diego'
weather_SD['h_name'] = 'SD'

weather_SEA = pd.read_csv('https://raw.githubusercontent.com/tmarchok1/DS440_project/refs/heads/main/weather_files/weather_SEA.csv')
weather_SEA['location'] = 'Seattle'
weather_SEA['h_name'] = 'SEA'

weather_SF = pd.read_csv('https://raw.githubusercontent.com/tmarchok1/DS440_project/refs/heads/main/weather_files/weather_SF.csv')
weather_SF['location'] = 'San Francisco'
weather_SF['h_name'] = 'SF'

weather_STL = pd.read_csv('https://raw.githubusercontent.com/tmarchok1/DS440_project/refs/heads/main/weather_files/weather_STL.csv')
weather_STL['location'] = 'St. Louis'
weather_STL['h_name'] = 'STL'

weather_TEX = pd.read_csv('https://raw.githubusercontent.com/tmarchok1/DS440_project/refs/heads/main/weather_files/weather_TEX.csv')
weather_TEX['location'] = 'Texas'
weather_TEX['h_name'] = 'TEX'

weather_WSH = pd.read_csv('https://raw.githubusercontent.com/tmarchok1/DS440_project/refs/heads/main/weather_files/weather_WSH.csv')
weather_WSH['location'] = 'Washington, D.C.'
weather_WSH['h_name'] = 'WSH'


# Concatenate NL East weather vertically
weather_concat = pd.concat([weather_ATL, weather_BAL, weather_BOS, weather_CHC, weather_CHW, weather_CIN, weather_CLE, weather_COL,
                             weather_DET, weather_HOU, weather_KC, weather_LAA, weather_LAD, weather_MIA, weather_NYM, weather_NYY, 
                             weather_OAK, weather_PHI, weather_PIT, weather_SD, weather_SEA, weather_SF, weather_STL,
                             weather_TEX, weather_WSH], ignore_index=True)

# Clean column names
weather_concat.columns = ['date', 'temp', 'precip', 'location', 'h_name']
weather_concat['date'] = pd.to_datetime(weather_concat['date'])

weather_concat

Unnamed: 0,date,temp,precip,location,h_name
0,2005-04-03,64.4,0.000,Atlanta,ATL
1,2005-04-04,73.1,0.000,Atlanta,ATL
2,2005-04-05,75.0,0.000,Atlanta,ATL
3,2005-04-06,72.4,0.197,Atlanta,ATL
4,2005-04-07,68.8,1.488,Atlanta,ATL
...,...,...,...,...,...
105045,2016-09-29,70.7,1.461,"Washington, D.C.",WSH
105046,2016-09-30,67.0,0.094,"Washington, D.C.",WSH
105047,2016-10-01,68.0,0.181,"Washington, D.C.",WSH
105048,2016-10-02,71.7,0.000,"Washington, D.C.",WSH


In [156]:
# Merge NL East weather data with game logs
glogs_weather = pd.merge(glogs, weather_concat, on=['date', 'h_name', 'location'], how='left')
glogs_weather[glogs_weather['h_name'] == "KC"].head()

Unnamed: 0,date,year,day_of_week,v_name,v_league,v_game_number,h_name,h_league,h_game_number,v_score,h_score,day_night,park_id,attendance,week,location,temp,precip
90,2005-04-11,2005,Mon,SEA,AL,7,KC,AL,7,8,2,0,Kauffman Stadium,41788.0,2,Kansas City,69.6,0.902
111,2005-04-13,2005,Wed,SEA,AL,8,KC,AL,8,2,1,0,Kauffman Stadium,10577.0,2,Kansas City,62.5,0.0
126,2005-04-14,2005,Thu,SEA,AL,9,KC,AL,9,10,2,0,Kauffman Stadium,10212.0,2,Kansas City,65.5,0.0
136,2005-04-15,2005,Fri,DET,AL,10,KC,AL,10,5,6,1,Kauffman Stadium,29720.0,2,Kansas City,70.5,0.0
150,2005-04-16,2005,Sat,DET,AL,11,KC,AL,11,7,1,0,Kauffman Stadium,22881.0,2,Kansas City,75.4,0.0


In [157]:
# Merge stadium capacity information with glogs
capacities = pd.read_csv('https://raw.githubusercontent.com/tmarchok1/DS440_project/refs/heads/main/stadium_data.csv')
capacities.columns
glogs_weather_cap = pd.merge(glogs_weather, capacities, on=['park_id'], how='left')

# Convert capacity to float
glogs_weather_cap['capacity'] = glogs_weather_cap['capacity'].str.replace(',', '', regex=True)
glogs_weather_cap['capacity'] = glogs_weather_cap['capacity'].astype(float)

# Edit Progressive Field capacity - renovations after 2014 season decreased capacity from 43,000 to 35,225
glogs_weather_cap.loc[(glogs_weather_cap['park_id'] == 'Progressive Field') & (glogs_weather_cap['date'].dt.year<=2014), 'capacity'] = 43000

# Drop games with attendance more than 5000 above capacity
glogs_weather_cap = glogs_weather_cap.drop(glogs_weather_cap[glogs_weather_cap["attendance"] > glogs_weather_cap["capacity"] + 5000].index)

glogs_weather_cap.head()

Unnamed: 0,date,year,day_of_week,v_name,v_league,v_game_number,h_name,h_league,h_game_number,v_score,h_score,day_night,park_id,attendance,week,location,temp,precip,home team,capacity
0,2005-04-03,2005,Sun,BOS,AL,1,NYY,AL,1,2,9,1,Old Yankee Stadium,54818.0,1,New York,51.7,0.587,Yankees,56937.0
1,2005-04-04,2005,Mon,OAK,AL,1,BAL,AL,1,0,4,0,Camden Yards,48271.0,1,Baltimore,60.7,0.0,Orioles,45971.0
2,2005-04-04,2005,Mon,CLE,AL,1,CHW,AL,1,0,1,0,US Cellular Field,38141.0,1,Chicago,56.2,0.0,White Sox,40615.0
3,2005-04-04,2005,Mon,KC,AL,1,DET,AL,1,2,11,0,Comerica Park,44105.0,1,Detroit,59.0,0.0,Tigers,41083.0
4,2005-04-04,2005,Mon,MIN,AL,1,SEA,AL,1,1,5,0,Safeco Field,46249.0,1,Seattle,50.1,0.13,Mariners,47943.0


In [158]:
# Prev year is current year - 1
glogs_weather_cap['prev_year'] = glogs_weather_cap['year'] - 1

In [159]:
wins_data = pd.read_csv('https://raw.githubusercontent.com/tmarchok1/DS440_project/refs/heads/main/MLB_Wins/winsbyYear.csv')
wins_data.head()

Unnamed: 0,Team,Year,Wins,MadePlayoffs,DivisionWinner
0,Yankees,2005,95,1,1
1,Red Sox,2005,95,1,0
2,Blue Jays,2005,80,0,0
3,Orioles,2005,74,0,0
4,Rays,2005,67,0,0


In [160]:
# Merge: glogs 'prev_year' = wins_data 'Year'
glogs_wcp = pd.merge(glogs_weather_cap, wins_data, left_on = ['home team', 'prev_year'], right_on = ['Team', 'Year'], how = 'left')

# Drop extra year columns - now df is in current year and displays wins/playoffs/divisions performance for PREV year
glogs_wcp = glogs_wcp.drop(['prev_year','Year'], axis =1)
glogs_wcp = glogs_wcp.rename(columns = {'Wins': 'prev_year_wins',
                                        'MadePlayoffs': 'made_playoffs',
                                        'DivisionWinner': 'won_division'})
glogs_wcp

Unnamed: 0,date,year,day_of_week,v_name,v_league,v_game_number,h_name,h_league,h_game_number,v_score,...,week,location,temp,precip,home team,capacity,Team,prev_year_wins,made_playoffs,won_division
0,2005-04-03,2005,Sun,BOS,AL,1,NYY,AL,1,2,...,1,New York,51.7,0.587,Yankees,56937.0,Yankees,101,1,1
1,2005-04-04,2005,Mon,OAK,AL,1,BAL,AL,1,0,...,1,Baltimore,60.7,0.000,Orioles,45971.0,Orioles,78,0,0
2,2005-04-04,2005,Mon,CLE,AL,1,CHW,AL,1,0,...,1,Chicago,56.2,0.000,White Sox,40615.0,White Sox,83,0,0
3,2005-04-04,2005,Mon,KC,AL,1,DET,AL,1,2,...,1,Detroit,59.0,0.000,Tigers,41083.0,Tigers,72,0,0
4,2005-04-04,2005,Mon,MIN,AL,1,SEA,AL,1,1,...,1,Seattle,50.1,0.130,Mariners,47943.0,Mariners,63,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29030,2016-10-02,2016,Sun,MIL,NL,162,COL,NL,162,6,...,24,Colorado,78.8,0.000,Rockies,50144.0,Rockies,68,0,0
29031,2016-10-02,2016,Sun,NYM,NL,162,PHI,NL,162,2,...,24,Philadelphia,70.5,0.039,Phillies,43651.0,Phillies,63,0,0
29032,2016-10-02,2016,Sun,LAD,NL,162,SF,NL,162,1,...,24,San Francisco,66.0,0.000,Giants,41915.0,Giants,84,0,0
29033,2016-10-02,2016,Sun,PIT,NL,162,STL,NL,162,4,...,24,St. Louis,69.3,0.028,Cardinals,45494.0,Cardinals,100,1,1


In [161]:
# Drop unnecessary columns
glogs_wcp = glogs_wcp.drop(columns=['v_league', 'v_game_number', 'h_game_number', 'h_league', 'v_score', 'h_score', 'Team'])

# Merge fan engagement data
fanEngagement = pd.read_csv('https://raw.githubusercontent.com/tmarchok1/DS440_project/refs/heads/main/fanEngagement.csv')
glogs_wcpf = pd.merge(glogs_wcp, fanEngagement, left_on='home team', right_on='Team', how='left')

# Rename columns
glogs_wcpf.columns = ['date', 'year', 'day_of_week', 'v_name', 'h_name', 'day_night', 'park_id',
                      'attendance', 'week', 'location', 'temp', 'precip', 'home team', 'capacity',
                      'prev_year_wins', 'made_playoffs', 'won_division', 'Team', 'InstagramFollowers', 'CityPopulation']

# Reorder columns, drop 'location', 'home team', 'Team' variables
glogs_wcpf = glogs_wcpf[['date', 'year', 'week', 'day_of_week', 'v_name', 
       'h_name', 'day_night', 'park_id', 'temp', 'precip', 'capacity', 'prev_year_wins',
       'made_playoffs', 'won_division', 'InstagramFollowers', 'CityPopulation', 'attendance']]
glogs_wcpf

Unnamed: 0,date,year,week,day_of_week,v_name,h_name,day_night,park_id,temp,precip,capacity,prev_year_wins,made_playoffs,won_division,InstagramFollowers,CityPopulation,attendance
0,2005-04-03,2005,1,Sun,BOS,NYY,1,Old Yankee Stadium,51.7,0.587,56937.0,101,1,1,3900000,19940274,54818.0
1,2005-04-04,2005,1,Mon,OAK,BAL,0,Camden Yards,60.7,0.000,45971.0,78,0,0,746000,2859024,48271.0
2,2005-04-04,2005,1,Mon,CLE,CHW,0,US Cellular Field,56.2,0.000,40615.0,83,0,0,664000,9408576,38141.0
3,2005-04-04,2005,1,Mon,KC,DET,0,Comerica Park,59.0,0.000,41083.0,72,0,0,1000000,4400587,44105.0
4,2005-04-04,2005,1,Mon,MIN,SEA,0,Safeco Field,50.1,0.130,47943.0,63,0,0,927000,4145494,46249.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29030,2016-10-02,2016,24,Sun,MIL,COL,0,Coors Field,78.8,0.000,50144.0,68,0,0,552000,3052498,27762.0
29031,2016-10-02,2016,24,Sun,NYM,PHI,0,Citizens Bank Park,70.5,0.039,43651.0,63,0,0,1300000,6330422,36935.0
29032,2016-10-02,2016,24,Sun,LAD,SF,0,AT&T Park,66.0,0.000,41915.0,84,0,0,1600000,4648486,41445.0
29033,2016-10-02,2016,24,Sun,PIT,STL,0,Busch Stadium III,69.3,0.028,45494.0,100,1,1,1100000,2811927,44615.0


In [162]:
glogs_final = glogs_wcpf
glogs_final.to_csv("glogs_final.csv", index=False)