In [1]:
# Import needed dependencies
import requests
import pandas as pd
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
import re
import time
from datetime import date
from bs4 import BeautifulSoup, Comment

In [2]:
team_codes_df = pd.read_csv('team_codes.csv')
games_attended_df = pd.read_csv('sports_attendance.csv')

games_attended_df["URL_Date"] = " "
games_attended_df["Year"] = " "

games_attended_df['Year'] = pd.to_datetime(games_attended_df.Date)
games_attended_df['Year'] = games_attended_df['Year'].dt.strftime('%Y')
games_attended_df['URL_Date'] = pd.to_datetime(games_attended_df.Date)
games_attended_df['URL_Date'] = games_attended_df['URL_Date'].dt.strftime('%Y%m%d0')

nba_attendance_df = games_attended_df[games_attended_df.Sport == ('NBA')].copy()
mlb_attendance_df = games_attended_df[games_attended_df.Sport == ('MLB')].copy()
nfl_attendance_df = games_attended_df[games_attended_df.Sport == ('NFL')].copy()

mlb_df = mlb_attendance_df.merge(team_codes_df, how='left', left_on='Home', right_on='MLB_Teams')
mlb_df = mlb_df.drop(columns=['NBA_Teams', 'NBA_Codes', 'NBA_Teams', 'NFL_Teams', 'NFL_Codes', 'NHL_Teams', 'NHL_Codes', 'MLS_Codes'])

mlb_df['URL_Variable'] = mlb_df['MLB_Codes'] + mlb_df['URL_Date']

away_code_df = mlb_attendance_df.merge(team_codes_df, how='left', left_on='Visitor', right_on='MLB_Teams')
away_code_df = away_code_df.drop(columns=['NBA_Teams', 'NBA_Codes', 'NBA_Teams', 'NFL_Teams', 'NFL_Codes', 'NHL_Teams', 'NHL_Codes', 'MLS_Codes'])

mlb_df['Away_Code'] = away_code_df['MLB_Codes']
mlb_df.rename(columns = {'MLB_Codes':'Home_Code'}, inplace = True)
mlb_df


Unnamed: 0,Date,Home,Visitor,Sport,URL_Date,Year,MLB_Teams,Home_Code,URL_Variable,Away_Code
0,7/27/2007,San Francisco Giants,Florida Marlins,MLB,200707270,2007,San Francisco Giants,SFN,SFN200707270,FLO
1,7/28/2007,San Francisco Giants,Florida Marlins,MLB,200707280,2007,San Francisco Giants,SFN,SFN200707280,FLO
2,9/5/2008,St. Louis Cardinals,Florida Marlins,MLB,200809050,2008,St. Louis Cardinals,SLN,SLN200809050,FLO
3,9/6/2008,St. Louis Cardinals,Florida Marlins,MLB,200809060,2008,St. Louis Cardinals,SLN,SLN200809060,FLO
4,7/25/2009,Los Angeles Angels of Anaheim,Minnesota Twins,MLB,200907250,2009,Los Angeles Angels of Anaheim,ANA,ANA200907250,MIN
...,...,...,...,...,...,...,...,...,...,...
57,5/7/2022,San Francisco Giants,St. Louis Cardinals,MLB,202205070,2022,San Francisco Giants,SFN,SFN202205070,SLN
58,3/30/2023,Oakland Athletics,Los Angeles Angels,MLB,202303300,2023,Oakland Athletics,OAK,OAK202303300,LAA
59,6/17/2023,Seattle Mariners,Chicago White Sox,MLB,202306170,2023,Seattle Mariners,SEA,SEA202306170,CHA
60,7/15/2023,Chicago Cubs,Boston Red Sox,MLB,202307150,2023,Chicago Cubs,CHN,CHN202307150,BOS


In [3]:
URL_list = mlb_df["URL_Variable"].values.tolist()
Home_code_list = mlb_df["Home_Code"].values.tolist()
Away_code_list = mlb_df["Away_Code"].values.tolist()
Home_team_list = mlb_df["Home"].values.tolist()
Away_team_list = mlb_df["Visitor"].values.tolist()
Year_list = mlb_df["Year"].values.tolist()
Date_list = mlb_df["URL_Date"].values.tolist()
Actual_date_list = mlb_df["Date"].values.tolist()

In [4]:
coded_home_teams = []
coded_away_teams = []

for item in Home_team_list: 
    home_code = re.sub(r'[^a-zA-Z0-9]', '', item)
    coded_home_teams.append(home_code)

for item in Away_team_list: 
    away_code = re.sub(r'[^a-zA-Z0-9]', '', item)
    coded_away_teams.append(away_code)

#  LosAngelesAngelsofAnaheim   
coded_home_teams

['SanFranciscoGiants',
 'SanFranciscoGiants',
 'StLouisCardinals',
 'StLouisCardinals',
 'LosAngelesAngelsofAnaheim',
 'LosAngelesDodgers',
 'SanDiegoPadres',
 'SanFranciscoGiants',
 'ArizonaDiamondbacks',
 'SanFranciscoGiants',
 'SanFranciscoGiants',
 'SanDiegoPadres',
 'SanFranciscoGiants',
 'LosAngelesDodgers',
 'SanFranciscoGiants',
 'LosAngelesDodgers',
 'LosAngelesDodgers',
 'OaklandAthletics',
 'PhiladelphiaPhillies',
 'NewYorkYankees',
 'LosAngelesAngelsofAnaheim',
 'LosAngelesAngelsofAnaheim',
 'LosAngelesAngelsofAnaheim',
 'LosAngelesDodgers',
 'LosAngelesDodgers',
 'LosAngelesDodgers',
 'SanDiegoPadres',
 'SanFranciscoGiants',
 'ColoradoRockies',
 'MilwaukeeBrewers',
 'ChicagoWhiteSox',
 'ChicagoWhiteSox',
 'MinnesotaTwins',
 'ChicagoCubs',
 'StLouisCardinals',
 'CincinnatiReds',
 'MilwaukeeBrewers',
 'DetroitTigers',
 'StLouisCardinals',
 'StLouisCardinals',
 'ChicagoCubs',
 'PittsburghPirates',
 'KansasCityRoyals',
 'ClevelandIndians',
 'TorontoBlueJays',
 'StLouisCardinal

In [5]:
# getting length of list
length = len(URL_list)

dataframe_dictionary = {}

box_score_df_list = []
home_stats_df_list = []
away_stats_df_list = []
game_logistics_df_list = []
pitching_line = []

for i in range(length):
    time.sleep(2)
    
    # Create empty lists to hold table data to be scraped
    away_box_score = []
    home_box_score = []
    away_pitching_box_score = []
    home_pitching_box_score = []
    box_score = []

    # input URL and use BeautifulSoup to parse through the page
    url = f'https://www.baseball-reference.com/boxes/{Home_code_list[i]}/{URL_list[i]}.shtml'
    soup = BeautifulSoup(requests.get(url).content, 'html.parser')

    # Grab the table element that has away team statistics
    away_table_placeholder = soup.select_one(f'#all_{coded_away_teams[i]}batting') 
    away_pitching_table_placeholder = soup.select_one(f'#all_{coded_away_teams[i]}pitching') 

    
### ONLY CODE FOUND TO GET PITCHING DATA GETS BOTH TEAMS AT ONCE AND HAS RANDOM NUMBERS ASSOCIATED WITH IT
### WILL NEED TO FIGURE OUT WHAT THE NUMBERS MEAN AND ADJUST CODE TO SCAPE BOTH AT ONCE.
### EXAMPLE FOR FIRST GAME    soup.select_one('#all_2420024094')
    
    # Game box score is commented out in html, so this will grab it out of the comments
    for comment in away_table_placeholder.find_all(text=lambda text: isinstance(text, Comment)):
        if comment.find("<table ") > 0:
            comment_soup = BeautifulSoup(comment, 'html.parser')
            away_table = comment_soup.find("table")

    # Grab data from table and put it into the list created above
    for tr in away_table.select('tr:has(td)'):
        tds = [td.get_text(strip=True) for td in tr.select('td')]
        away_box_score.append(tds)

    # Game pitching box score is commented out in html, so this will grab it out of the comments
    for comment in away_pitching_table_placeholder.find_all(text=lambda text: isinstance(text, Comment)):
        if comment.find("<table ") > 0:
            comment_soup = BeautifulSoup(comment, 'html.parser')
            away_pitching_table = comment_soup.find("table")

    # Grab pitching data from table and put it into the list created above
    for tr in away_pitching_table.select('tr:has(td)'):
        tdsP = [td.get_text(strip=True) for td in tr.select('td')]
        away_pitching_box_score.append(tdsP)

    # Grab the table element that has home team statistics
    home_table_placeholder = soup.select_one(f'#all_{coded_home_teams[i]}batting') 
    home_pitching_table_placeholder = soup.select_one(f'#all_{coded_home_teams[i]}pitching') 

    # Game box score is commented out in html, so this will grab it out of the comments
    for comment in home_table_placeholder.find_all(text=lambda text: isinstance(text, Comment)):
        if comment.find("<table ") > 0:
            comment_soup = BeautifulSoup(comment, 'html.parser')
            home_table = comment_soup.find("table")

    # Grab data from table and put it into the list created above
    for tr in home_table.select('tr:has(td)'):
        tds2 = [td.get_text(strip=True) for td in tr.select('td')]
        home_box_score.append(tds2)

    # Game pitching box score is commented out in html, so this will grab it out of the comments
    for comment in home_pitching_table_placeholder.find_all(text=lambda text: isinstance(text, Comment)):
        if comment.find("<table ") > 0:
            comment_soup = BeautifulSoup(comment, 'html.parser')
            home_pitching_table = comment_soup.find("table")

    # Grab pitching data from table and put it into the list created above
    for tr in home_pitching_table.select('tr:has(td)'):
        tdsP2 = [td.get_text(strip=True) for td in tr.select('td')]
        home_pitching_box_score.append(tdsP2)

    # Grab the table element that has game box score
    box_score_table = soup.select('[class*="linescore_wrap"]')

    # Grab data from table and put it into the list created above
    for tr in box_score_table[0].select('tr:has(td)'):
        tds3 = [td.get_text(strip=True) for td in tr.select('td')]
        box_score.append(tds3)

##########################################################################################
    pitching_line.append(box_score[2][0])
##########################################################################################
    
#     # Grab data from table and put it into the list created above
#     for tr in table.select('tr:has(td)'):
#         tds3 = [td.get_text(strip=True) for td in tr.select('td')]
#         box_score.append(tds3)
       
    # Create dataframe for away batting statistics
    dataframe_dictionary[Away_code_list[i] + Date_list[i]] = pd.DataFrame(away_box_score)

    # Create dataframe for away pitching statistics
    dataframe_dictionary[Away_code_list[i] + Date_list[i]] = pd.DataFrame(away_pitching_box_score)

    # Create an empty list to store away team statistics header information
    away_header_list = []
    away_pitching_header_list = []

    # Grab the table header information to use as column headers in our away batting statistics dataframe
    for tr in away_table.select('tr:has(th)'):
        ths = [th.get_text(strip=True) for th in tr.select('th')]
        away_header_list.append(ths)

    # Grab the table header information to use as column headers in our away pitching statistics dataframe
    for tr in away_pitching_table.select('tr:has(th)'):
        thsP = [th.get_text(strip=True) for th in tr.select('th')]
        away_pitching_header_list.append(thsP)

    away_header_list[0].remove("Batting")    
        
    # Create dataframe for home batting statistics
    dataframe_dictionary[Home_code_list[i] + Date_list[i]] = pd.DataFrame(home_box_score)

    # Create dataframe for home pitching statistics
    dataframe_dictionary[Home_code_list[i] + Date_list[i]] = pd.DataFrame(home_pitching_box_score)

    # Create an empty list to store home team statistics header information
    home_header_list = []
    home_pitching_header_list = []

    # Grab the table header information to use as column headers in our home team statistics dataframe
    for tr in home_table.select('tr:has(th)'):
        ths2 = [th.get_text(strip=True) for th in tr.select('th')]
        home_header_list.append(ths2)    
    
    # Grab the table header information to use as column headers in our home team statistics dataframe
    for tr in home_pitching_table.select('tr:has(th)'):
        thsP2 = [th.get_text(strip=True) for th in tr.select('th')]
        home_pitching_header_list.append(thsP2)    

    home_header_list[0].remove("Batting")
    
    # Create dataframe for game box score
    dataframe_dictionary['Box' + Date_list[i]] = pd.DataFrame(box_score)

    # Create an empty list to store game box score dataframe header information
    box_score_header_list = []

    # Grab the table header information to use as column headers in our game box score dataframe
    for tr in box_score_table[0].select('tr:has(th)'):
        ths3 = [th.get_text(strip=True) for th in tr.select('th')]
        box_score_header_list.append(ths3)

    box_score_header_list[0][1] = 'Team'
    dataframe_dictionary['Box' + Date_list[i]].columns = box_score_header_list

    # box_score_df.columns = box_score_list
#     final_box_score_df = box_score_df.iloc[: , 1:]
#     final_box_score_df.drop(final_box_score_df.tail(1).index,inplace=True) 

#     home_away = ['Away','Home']
#     final_box_score_df.rename(index={0:'Away'},inplace=True)
#     final_box_score_df.rename(index={1:'Home'},inplace=True)


    # If a game goes into extra innings, we need to add to the box score list for each overtime period
    Extra_inning_counter = 10
    while len(box_score[0]) > len(box_score_header_list[0]):
        box_score_header_list.insert(-1, f'{Extra_inning_counter}')
        Extra_inning_counter = Extra_inning_counter + 1
        
    # Update the column and row labels
    dataframe_dictionary['Box' + Date_list[i]] = dataframe_dictionary['Box' + Date_list[i]].set_axis(box_score_header_list, axis=1, inplace=False)
#     dataframe_dictionary['Box' + Date_list[i]] = dataframe_dictionary['Box' + Date_list[i]].set_axis([f'{Away_team_list[i]}', f'{Home_team_list[i]}'])
    
    dataframe_dictionary['Box' + Date_list[i]]['Date'] = Actual_date_list[i]
    box_score_df_list.append(dataframe_dictionary['Box' + Date_list[i]])
    
    # Pull the away team player data and combine the lists into one list
    away_starters = away_header_list[1:-1]
#     away_bench = away_header_list[8:-1]
#     away_players = [away_starters,away_bench]

    # Pull the home team player data and combine the lists into one list
    home_starters = home_header_list[1:-1]
#     home_bench = home_header_list[8:-1]
#     home_players = [home_starters,home_bench]

    # Consolodate the away team list of lists into one list
    away_starters_list = [item for sublist in away_starters for item in sublist]
#     away_bench_list = [item for sublist in away_bench for item in sublist]
#     away_roster = away_starters_list + away_bench_list

    # Consolodate the home team list of lists into one list
    home_starters_list = [item for sublist in home_starters for item in sublist]
#     home_bench_list = [item for sublist in home_bench for item in sublist]
#     home_roster = home_starters_list + home_bench_list

    # Add Team Totals to the end of the away team list
    away_starters.append('Team Totals')
    dataframe_dictionary[Away_code_list[i] + Date_list[i]].index = away_starters

    # Add Team Totals to the end of the home team list
    home_starters.append('Team Totals')
    dataframe_dictionary[Home_code_list[i] + Date_list[i]].index = home_starters

    # Set the home and away dataframe column labels
    column_headers = away_header_list[0]
#     column_headers.remove("Starters")
    dataframe_dictionary[Away_code_list[i] + Date_list[i]].columns = column_headers
    dataframe_dictionary[Home_code_list[i] + Date_list[i]].columns = column_headers

    dataframe_dictionary[Away_code_list[i] + Date_list[i]]['Date'] = Actual_date_list[i]
    dataframe_dictionary[Home_code_list[i] + Date_list[i]]['Date'] = Actual_date_list[i]
    
    dataframe_dictionary[Away_code_list[i] + Date_list[i]]['Team'] = Away_team_list[i]
    dataframe_dictionary[Home_code_list[i] + Date_list[i]]['Team'] = Home_team_list[i]
    
    home_stats_df_list.append(dataframe_dictionary[Home_code_list[i] + Date_list[i]])
    away_stats_df_list.append(dataframe_dictionary[Away_code_list[i] + Date_list[i]])
    
    # Pull logistical information about the game: attendance, length of game, and venue
    text = soup.get_text()
    attendance_string = re.findall(r"Attendance:\s+\d*,\d*", text)
    
    # A failsafe, as one of the games has no attendance listed causing the loop to break
    if(len(attendance_string) != 0):
        
        # Clean up the attendance string that is pulled 
        new_attendance_string = attendance_string[0].strip('([^a-z]xa)')
        attendance_list = list(new_attendance_string)
        attendance_list.pop(11)
    else:
        attendance_string = "No Attendance Available"
    
    # Clean up the game length string that is pulled 
    game_time_string = re.findall(r"Game Duration:\s+\d:\d*", text)    
    new_game_time_string = game_time_string[0].strip('([^a-z]xa)')
    game_time_list = list(new_game_time_string)
    game_time_list.pop(13)

    # join the lists to create final string
    final_attendance_string = "".join(attendance_list)
    final_game_time_sting = "".join(game_time_list)

    # Create throwaway variables to hold split strings so we can pick out what we want later
    x = final_attendance_string.split(":")
    y = final_game_time_sting.split(" ")
    y[0:2] = [' '.join(y[0:2])]

    # Pick out the strings before the : that we split above
    dataframe_dictionary['Attendance' + Date_list[i]] = pd.DataFrame(x, columns = [x[0]])
    dataframe_dictionary['Attendance' + Date_list[i]] = dataframe_dictionary['Attendance' + Date_list[i]].drop([0])
    dataframe_dictionary['Game Duration' + Date_list[i]] = pd.DataFrame(y, columns = [y[0]])
    dataframe_dictionary['Game Duration' + Date_list[i]] = dataframe_dictionary['Game Duration' + Date_list[i]].drop([0])

    # Join the attendance and game time dataframes into one dataframe
    dataframe_dictionary['Game_Logistics' + Date_list[i]] = pd.concat([dataframe_dictionary['Attendance' + Date_list[i]], dataframe_dictionary['Game Duration' + Date_list[i]]], ignore_index=True, sort=False)
    dataframe_dictionary['Game_Logistics' + Date_list[i]]['Game Duration'] = dataframe_dictionary['Game_Logistics' + Date_list[i]]['Game Duration'].shift(-1)
    dataframe_dictionary['Game_Logistics' + Date_list[i]] = dataframe_dictionary['Game_Logistics' + Date_list[i]].dropna()

    # Scrape the name of the vanue and location and add that to the logistics dataframe
    divparent = soup.find('div', attrs={'class':'scorebox_meta'})
    text = divparent.text
    result = re.split(r'\n', text)
    almost_done = result[1]
    final_result = almost_done.partition("Venue: ")[2]
    real_final_result = final_result.split("Game Duration:")[0]
    dataframe_dictionary['Game_Logistics' + Date_list[i]] = dataframe_dictionary['Game_Logistics' + Date_list[i]].assign(Venue=[real_final_result])

    dataframe_dictionary['Game_Logistics' + Date_list[i]]['Date'] = Actual_date_list[i]
    game_logistics_df_list.append(dataframe_dictionary['Game_Logistics' + Date_list[i]])
    

AttributeError: 'NoneType' object has no attribute 'find_all'

In [46]:
############################################################################################################
###########################    Working on Scraping one game of pitching stats    ###########################
############################################################################################################


away_pitching_table_placeholder = soup.select('#all_2420024094')[0]

# soup.find(id="div_FloridaMarlinspitching")
# soup.select('table#FloridaMarlinspitching')
away_pitching_box_score = []
away_pitching_header_list = []


# Game pitching box score is commented out in html, so this will grab it out of the comments
for comment in away_pitching_table_placeholder.find_all(text=lambda text: isinstance(text, Comment)):
    if comment.find("<table ") > 0:
        comment_soup = BeautifulSoup(comment, 'html.parser')
        away_pitching_table = comment_soup.find("table")

# Grab pitching data from table and put it into the list created above
for tr in away_pitching_table.select('tr:has(td)'):
    tdsP = [td.get_text(strip=True) for td in tr.select('td')]
    away_pitching_box_score.append(tdsP)

dataframe_dictionary[Away_code_list[i] + Date_list[i]] = pd.DataFrame(away_pitching_box_score)
    
# Grab the table header information to use as column headers in our away pitching statistics dataframe
for tr in away_pitching_table.select('tr:has(th)'):
    thsP = [th.get_text(strip=True) for th in tr.select('th')]
    away_pitching_header_list.append(thsP)

away_pitching_header_list[0].remove("Pitching")   
pitchers = away_pitching_header_list[1:-1]
pitchers_list = [item for sublist in pitchers for item in sublist]
pitchers_list.append('Team Totals')
dataframe_dictionary[Away_code_list[i] + Date_list[i]].index = pitchers_list


pitcher_column_headers = away_pitching_header_list[0]
dataframe_dictionary[Away_code_list[i] + Date_list[i]].columns = pitcher_column_headers

    

dataframe_dictionary['FLO200707270'] 

Unnamed: 0,IP,H,R,ER,BB,SO,HR,ERA,BF,Pit,...,LD,Unk,GSc,IR,IS,WPA,aLI,cWPA,acLI,RE24
Rick van den Hurk,5.0,8,6,6,5,1,3,7.0,26,101,...,2,0,23.0,,,-0.386,0.93,-0.06%,0.24,-3.3
"Renyel Pinto, BS (5), L (2-4)",0.2,3,4,4,1,1,1,4.24,6,21,...,1,0,,0.0,0.0,-0.425,1.46,-0.07%,0.37,-2.7
Matt Lindstrom,0.1,3,2,1,0,1,0,3.98,4,15,...,2,0,,1.0,1.0,-0.141,0.42,-0.02%,0.11,-2.8
Lee Gardner,1.0,0,0,0,0,0,0,2.85,3,8,...,0,0,,0.0,0.0,0.007,0.09,0.00%,0.02,0.5
Taylor Tankersley,1.0,0,0,0,1,1,0,5.46,3,13,...,0,0,,0.0,0.0,0.003,0.09,0.00%,0.02,0.5
Team Totals,8.0,14,12,11,7,4,4,12.38,42,158,...,5,0,23.0,1.0,1.0,-0.942,0.83,-0.14%,0.21,-7.7


In [38]:
away_pitching_header_list

[['Pitching',
  'IP',
  'H',
  'R',
  'ER',
  'BB',
  'SO',
  'HR',
  'ERA',
  'BF',
  'Pit',
  'Str',
  'Ctct',
  'StS',
  'StL',
  'GB',
  'FB',
  'LD',
  'Unk',
  'GSc',
  'IR',
  'IS',
  'WPA',
  'aLI',
  'cWPA',
  'acLI',
  'RE24'],
 ['Rick van den Hurk'],
 ['Renyel Pinto, BS (5), L (2-4)'],
 ['Matt Lindstrom'],
 ['Lee Gardner'],
 ['Taylor Tankersley'],
 ['Team Totals']]

In [None]:
########## GUIDE TO ACCESSING DATAFRAMES ##########

## Pull Home Stats --> dataframe_dictionary['(Home Code)(Date)']

## Pull Away Stats --> dataframe_dictionary['(Away Code)(Date)']

## Pull Box Score --> dataframe_dictionary['Box(Date)']

## Pull Game Logistics --> dataframe_dictionary['Game_Logistics(Date)']

In [None]:
#Test
dataframe_dictionary['Game_Logistics202205070']

In [None]:
#Test
dataframe_dictionary['SFN202205070']

In [None]:
#Test
dataframe_dictionary['SFN202205070']

In [None]:
#Test
dataframe_dictionary['Box202205070']

In [None]:
total_box_score_df = pd.concat(box_score_df_list)

total_box_score_df

In [None]:
total_home_stats_df = pd.concat(home_stats_df_list)

team_col = total_home_stats_df.pop("Team")
total_home_stats_df.insert(0, "Team", team_col)

total_home_stats_df

In [None]:
total_away_stats_df = pd.concat(away_stats_df_list)

team_col = total_away_stats_df.pop("Team")
total_away_stats_df.insert(0, "Team", team_col)

total_away_stats_df

In [None]:
total_game_logistics_df = pd.concat(game_logistics_df_list)
total_game_logistics_df

In [None]:
index_list = total_box_score_df.index.tolist()
home_away_list = []


for i in range(len(index_list)):
    if (i % 2) == 0:
        home_away_list.append('Away')
    else:
        home_away_list.append('Home')


total_box_score_df = total_box_score_df.set_index('Date')
# total_box_score_df.insert(0, 'Team', index_list)
total_box_score_df.insert(0, 'Home_Away', home_away_list)
total_box_score_df

In [None]:
pitching_line_df = pd.DataFrame(pitching_line)
pitching_line_df

In [None]:
total_game_logistics_df.to_csv('total_game_logistics.csv')
total_away_stats_df.to_csv('total_away_stats.csv')
total_home_stats_df.to_csv('total_home_stats.csv')
total_box_score_df.to_csv('total_box_score_df.csv')
pitching_line_df.to_csv('pitching_line_df.csv')
# hof_df.to_csv('mlb_hof.csv')
# mvp_df.to_csv('mlb_mvp.csv')
# all_league_df.to_csv('all_league_players.csv')
# allstar_df.to_csv('allstars.csv')
# agg_dpoy_seen_df.to_csv('dpoy.csv')

In [None]:
box_score_header_list