In [1]:
import glob
import pandas as pd
import bs4
from bs4 import BeautifulSoup
import datetime
import requests
import urllib2
import re
import sys
import os

In [2]:
def get_soup(filepath):
    with open(filepath, 'r') as myfile:
        xml = myfile.read()
    soup = BeautifulSoup(xml, 'lxml-xml')
    return soup

In [3]:
server_url = 'http://gdx.mlb.com/components/game/mlb/'

In [4]:
def extract_field(document, tag, attr=None):
    result = document.find_all(tag)

    if len(result) == 0:
        return None

    if attr is not None:
        result = [r.get(attr).encode('ASCII', 'ignore') for r in result]
    else:
        result = [r.text.encode('ASCII', 'ignore') for r in result]
     
    if len(result) == 1:
        return result[0]
    else:
        return result

In [5]:
def extract_game_info(game_soup):
    game_dict = {}
    
    boxscore = game_soup.find('boxscore')
    umpires = game_soup.find('umpires')
    teams = game_soup.find_all('team')
    
    game_dict['game_type'] = boxscore['game_type']
    game_dict['venue_name'] = boxscore['venue_name']
    game_dict['venue_id'] = boxscore['venue_id']
    game_dict['game_id'] = boxscore['game_id']
    game_dict['date'] = boxscore['date']
    
    umpire_dict = {}
    for umpire in umpires:
        if umpire['position'] == 'HP':
            umpire_dict['id'] = umpire['id']
            umpire_dict['name'] = umpire['name']
            
            game_dict['umpire'] = umpire_dict
            break
    
    team_list = []
    
    for team in teams:
        team_list.append(team.attrs)
    
    game_dict['teams'] = team_list
    
    return game_dict

In [6]:
def extract_pitch_info(inning_soup):
    atbats = inning_soup.find_all('atbat')
    
    atbat_list = []
    
    for atbat in atbats:
            
        atbat_dict = {}
        
        atbat_dict['isHome'] = 0 if atbat.parent.name == 'top' else 1
        atbat_dict['inning_count'] = atbat.parent.parent['num']

        atbat_dict['atbat_count'] = atbat['num']
        atbat_dict['batter_height'] = atbat['b_height']
        atbat_dict['batter_id'] = atbat['batter']
        atbat_dict['pitcher_id'] = atbat['pitcher']
        atbat_dict['handedness'] = atbat['stand']
        atbat_dict['p_throws'] = atbat['p_throws']

        pitches = atbat.find_all('pitch')

        pitch_list = []

        for pitch in pitches:

            try:
                ## If any one of these attribute is missing, we omit the pitch data for it is no longer useful in plotting pitchfx data
                pitch_dict = {}
                pitch_dict['des'] = pitch['des']
                pitch_dict['type'] = pitch['type']
                pitch_dict['px'] = pitch['px']
                pitch_dict['pz'] = pitch['pz']
                pitch_dict['sz_top'] = pitch['sz_top']
                pitch_dict['sz_bottom'] = pitch['sz_bot']

                pitch_list.append(pitch_dict)
            
            except:
                pass

        atbat_dict['pitches'] = pitch_list
        
        atbat_list.append(atbat_dict)
        
    return atbat_list

In [7]:
def parse_game(gid_dir):
    
    gid_dir = gid_dir.rstrip('/') + "/"
    
    game_dir = gid_dir + "rawboxscore.xml"
    inning_dir = gid_dir + "inning_all.xml"
    
    game_soup = get_soup(game_dir)
    inning_soup = get_soup(inning_dir)
        
    game_dict = extract_game_info(game_soup)
    
    game_dict['atbat'] = extract_pitch_info(inning_soup)
    
    return game_dict

In [60]:
data_dir = './data'

In [14]:
def make_dir(designated_dir):
    if not os.path.exists(designated_dir):
        os.makedirs(designated_dir)

In [15]:
def download_page(url, download_dir):
    """
    Grab page at url and either return it as a string or save it to file
    """
    try:
        response = urllib2.urlopen(url)
        html = response.read()
    except Exception as err:
        print >>sys.stderr, "url: {0}\n\t{1}".format(url, str(err))

    filename = url.split('/')[-1]
    if not os.path.exists(download_dir + filename):
        print "Downloading %s" %(download_dir.strip('./data/') + "/" + filename)
        try:
            with open(download_dir + filename, 'w') as handle:
                handle.write(html)
        except:
            pass
    else:
        pass

In [16]:
def grab_page_with_pattern(url, pattern):
    
    """
    get_data() extracts pitch data from all the games played on the given date.
    we expect year, month and day to be a string, or integers, of the form "YYYY", "MM", "DD", respectively.
    By default, download_data() will download the entire 2017 season, will download, at least, at the season level.
    """
    
    response = urllib2.urlopen(url)
    link = response.read()
    soup = BeautifulSoup(link)
    
    if len(pattern) > 0:
        wanted_pattern = pattern[0]
        matched_patterns = []
        
        for ahref in soup.findAll('a'):
            for c in ahref.contents:
                c = str(c).strip()
                
                if re.match(wanted_pattern, c):                  
                    if len(pattern) == 2:
                        c = c.strip('/')
                        
                    if len(pattern) == 1:
                        c = "/" + c
                        make_dir(data_dir + c)
                
                    matched_patterns.append(c)
    
        for matched in matched_patterns:
            new_url = url + matched
            grab_page_with_pattern(new_url, pattern[1:])

    else:
        directory = data_dir + "/" + url.split('/')[-2] + "/"
        download_page(url + 'rawboxscore.xml', directory)
        download_page(url + 'inning/inning_all.xml', directory)   

In [17]:
def download_data(year = 2017, month = "", day = ""):
    
    """
    get_data() extracts pitch data from all the games played on the given date.
    we expect year, month and day to be a string, or integers, of the form "YYYY", "MM", "DD", respectively.
    By default, download_data() will download the entire 2017 season, will download, at least, at the season level.
    """
    
    url = server_url
    
    year = str(year)
    month = str(month)
    day = str(day)
    
    year_pattern = "year_" + str(year)
    month_pattern = "month_" + str(month) if month is not None else "month_"
    day_pattern = "day_" + str(day) if day is not None else "day_"
    game_pattern  = "gid_"
    
    if month is "":
        url = server_url + year_pattern + "/"
        pattern = [month_pattern, day_pattern, game_pattern]
        
    elif day is "":
        url = server_url + year_pattern + "/" + month_pattern
        pattern = [day_pattern, game_pattern]

    else:
        url = server_url + year_pattern + "/" + month_pattern + "/" + day_pattern
        pattern = [game_pattern]
        
    print "Downloading data for %s %s %s" %(year, month, day)
   
    grab_page_with_pattern(url, pattern)    

In [18]:
import pymongo
from pymongo import MongoClient

In [19]:
client = MongoClient('mongodb://localhost:27017/')

In [20]:
pitchfx_db = client.test_database
pitchfx_collection = pitchfx_db.test_collection

In [22]:
data_dir = "./data/"
gid_dirs = glob.glob(data_dir + 'gid_*/')

In [23]:
start_time = datetime.datetime.now()

for d in gid_dirs:
    print d
    try:
        pitchfx_collection.insert_one(parse_game(d))
    except:
        pass
    
end_time = datetime.datetime.now()
print 'Elapsed:', end_time - start_time

./data/gid_2017_04_23_tormlb_anamlb_1/
./data/gid_2017_09_09_sdnmlb_arimlb_1/
./data/gid_2017_08_26_houmlb_anamlb_1/
./data/gid_2017_06_02_lanmlb_milmlb_1/
./data/gid_2017_04_24_sdnmlb_arimlb_1/
./data/gid_2017_08_13_sdnmlb_lanmlb_1/
./data/gid_2017_10_06_awcmlb_adtmlb_1/
./data/gid_2017_06_14_atlmlb_wasmlb_1/
./data/gid_2017_08_18_arimlb_minmlb_1/
./data/gid_2017_08_04_detmlb_balmlb_1/
./data/gid_2017_08_02_tbamlb_houmlb_1/
./data/gid_2017_05_03_milmlb_slnmlb_1/
./data/gid_2017_07_08_oakmlb_seamlb_1/
./data/gid_2017_07_01_sfnmlb_pitmlb_1/
./data/gid_2017_08_25_sdnmlb_miamlb_1/
./data/gid_2017_05_01_chamlb_kcamlb_1/
./data/gid_2017_05_17_wasmlb_pitmlb_1/
./data/gid_2017_04_25_houmlb_clemlb_1/
./data/gid_2017_09_19_texmlb_seamlb_1/
./data/gid_2017_08_29_miamlb_wasmlb_1/
./data/gid_2017_09_27_cinmlb_milmlb_1/
./data/gid_2017_06_28_colmlb_sfnmlb_1/
./data/gid_2017_09_10_tbamlb_bosmlb_1/
./data/gid_2017_05_03_texmlb_houmlb_1/
./data/gid_2017_06_11_colmlb_chnmlb_1/
./data/gid_2017_07_21_mil

./data/gid_2017_09_12_chamlb_kcamlb_1/
./data/gid_2017_04_15_chamlb_minmlb_1/
./data/gid_2017_09_08_colmlb_lanmlb_1/
./data/gid_2017_05_29_lanmlb_slnmlb_1/
./data/gid_2017_07_14_slnmlb_pitmlb_1/
./data/gid_2017_07_21_texmlb_tbamlb_1/
./data/gid_2017_07_25_houmlb_phimlb_1/
./data/gid_2017_10_19_lanmlb_chnmlb_1/
./data/gid_2017_07_14_nyamlb_bosmlb_1/
./data/gid_2017_06_19_houmlb_oakmlb_1/
./data/gid_2017_04_18_wasmlb_atlmlb_1/
./data/gid_2017_08_06_texmlb_minmlb_1/
./data/gid_2017_08_23_arimlb_nynmlb_1/
./data/gid_2017_06_11_miamlb_pitmlb_1/
./data/gid_2017_08_06_slnmlb_cinmlb_1/
./data/gid_2017_06_20_nynmlb_lanmlb_1/
./data/gid_2017_04_19_texmlb_oakmlb_1/
./data/gid_2017_06_06_chamlb_tbamlb_1/
./data/gid_2017_04_04_sdnmlb_lanmlb_1/
./data/gid_2017_08_15_atlmlb_colmlb_1/
./data/gid_2017_04_30_colmlb_arimlb_1/
./data/gid_2017_06_07_wasmlb_lanmlb_1/
./data/gid_2017_09_07_nyamlb_balmlb_1/
./data/gid_2017_07_05_miamlb_slnmlb_1/
./data/gid_2017_07_24_kcamlb_detmlb_1/
./data/gid_2017_04_07_nya

./data/gid_2017_04_25_tbamlb_balmlb_1/
./data/gid_2017_09_02_cinmlb_pitmlb_1/
./data/gid_2017_06_28_kcamlb_detmlb_1/
./data/gid_2017_06_01_milmlb_nynmlb_1/
./data/gid_2017_06_26_nyamlb_chamlb_1/
./data/gid_2017_06_17_chamlb_tormlb_1/
./data/gid_2017_09_23_slnmlb_pitmlb_1/
./data/gid_2017_07_20_sdnmlb_sfnmlb_1/
./data/gid_2017_06_08_sfnmlb_milmlb_1/
./data/gid_2017_06_03_sfnmlb_phimlb_1/
./data/gid_2017_08_22_lanmlb_pitmlb_1/
./data/gid_2017_06_27_chnmlb_wasmlb_1/
./data/gid_2017_04_01_cifmin_cinmlb_1/
./data/gid_2017_06_12_nyamlb_anamlb_1/
./data/gid_2017_06_17_lanmlb_cinmlb_1/
./data/gid_2017_09_21_colmlb_sdnmlb_1/
./data/gid_2017_07_06_sdnmlb_clemlb_1/
./data/gid_2017_05_19_texmlb_detmlb_1/
./data/gid_2017_09_04_slnmlb_sdnmlb_1/
./data/gid_2017_07_09_sdnmlb_phimlb_1/
./data/gid_2017_07_09_miamlb_sfnmlb_1/
./data/gid_2017_04_06_miamlb_wasmlb_1/
./data/gid_2017_06_04_minmlb_anamlb_1/
./data/gid_2017_07_18_seamlb_houmlb_1/
./data/gid_2017_07_26_cinmlb_nyamlb_1/
./data/gid_2017_07_09_bos

./data/gid_2017_07_24_chamlb_chnmlb_1/
./data/gid_2017_10_17_lanmlb_nlsmlb_1/
./data/gid_2017_08_19_lanmlb_detmlb_1/
./data/gid_2017_09_04_nyamlb_balmlb_1/
./data/gid_2017_08_19_clemlb_kcamlb_1/
./data/gid_2017_09_03_wasmlb_milmlb_1/
./data/gid_2017_10_06_awcmlb_adomlb_1/
./data/gid_2017_06_29_slnmlb_arimlb_1/
./data/gid_2017_04_05_anamlb_oakmlb_1/
./data/gid_2017_09_03_tormlb_balmlb_1/
./data/gid_2017_04_27_lanmlb_sfnmlb_1/
./data/gid_2017_05_21_chamlb_seamlb_1/
./data/gid_2017_08_12_clemlb_tbamlb_1/
./data/gid_2017_05_28_arimlb_milmlb_1/
./data/gid_2017_06_25_pitmlb_slnmlb_1/
./data/gid_2017_05_15_tbamlb_clemlb_1/
./data/gid_2017_05_10_slnmlb_miamlb_1/
./data/gid_2017_09_27_miamlb_colmlb_1/
./data/gid_2017_06_17_sfnmlb_colmlb_1/
./data/gid_2017_06_25_colmlb_lanmlb_1/
./data/gid_2017_07_14_texmlb_kcamlb_1/
./data/gid_2017_10_10_lanmlb_nwcmlb_1/
./data/gid_2017_04_18_bosmlb_tormlb_1/
./data/gid_2017_06_07_clemlb_colmlb_1/
./data/gid_2017_06_16_chnmlb_pitmlb_1/
./data/gid_2017_07_19_sln

./data/gid_2017_09_11_pitmlb_milmlb_1/
./data/gid_2017_07_30_colmlb_wasmlb_2/
./data/gid_2017_09_20_kcamlb_tormlb_1/
./data/gid_2017_05_07_chamlb_balmlb_1/
./data/gid_2017_06_20_clemlb_balmlb_1/
./data/gid_2017_10_10_wasmlb_ndhmlb_1/
./data/gid_2017_08_05_detmlb_balmlb_1/
./data/gid_2017_04_30_cinmlb_slnmlb_1/
./data/gid_2017_05_23_anamlb_tbamlb_1/
./data/gid_2017_08_30_detmlb_colmlb_1/
./data/gid_2017_05_19_nyamlb_tbamlb_1/
./data/gid_2017_05_13_tbamlb_bosmlb_1/
./data/gid_2017_09_01_slnmlb_sfnmlb_1/
./data/gid_2017_06_18_arimlb_phimlb_1/
./data/gid_2017_08_13_pitmlb_tormlb_1/
./data/gid_2017_05_03_balmlb_bosmlb_1/
./data/gid_2017_06_17_bosmlb_houmlb_1/
./data/gid_2017_09_10_nyamlb_texmlb_1/
./data/gid_2017_06_07_nynmlb_texmlb_1/
./data/gid_2017_04_30_anamlb_texmlb_1/
./data/gid_2017_04_21_sfnmlb_colmlb_1/
./data/gid_2017_10_06_awcmlb_clemlb_1/
./data/gid_2017_09_14_cinmlb_slnmlb_1/
./data/gid_2017_06_11_kcamlb_sdnmlb_1/
./data/gid_2017_04_30_balmlb_nyamlb_1/
./data/gid_2017_08_15_nyn

./data/gid_2017_06_23_chnmlb_miamlb_1/
./data/gid_2017_04_09_sfnmlb_sdnmlb_1/
./data/gid_2017_04_13_balmlb_tormlb_1/
./data/gid_2017_06_17_wasmlb_nynmlb_1/
./data/gid_2017_04_10_cinmlb_pitmlb_1/
./data/gid_2017_09_12_colmlb_arimlb_1/
./data/gid_2017_06_17_clemlb_minmlb_1/
./data/gid_2017_07_03_bosmlb_texmlb_1/
./data/gid_2017_10_10_lanmlb_arimlb_1/
./data/gid_2017_04_28_sdnmlb_sfnmlb_1/
./data/gid_2017_06_10_milmlb_arimlb_1/
./data/gid_2017_09_17_balmlb_nyamlb_1/
./data/gid_2017_04_17_pitmlb_slnmlb_1/
./data/gid_2017_07_26_anamlb_clemlb_1/
./data/gid_2017_04_06_phimlb_cinmlb_1/
./data/gid_2017_09_25_houmlb_texmlb_1/
./data/gid_2017_10_24_lg mlb_lg1mlb_1/
./data/gid_2017_08_31_wasmlb_milmlb_1/
./data/gid_2017_07_25_kcamlb_detmlb_1/
./data/gid_2017_09_07_slnmlb_sdnmlb_1/
./data/gid_2017_08_01_seamlb_texmlb_1/
./data/gid_2017_09_19_wasmlb_atlmlb_1/
./data/gid_2017_05_20_sfnmlb_slnmlb_1/
./data/gid_2017_09_13_detmlb_clemlb_1/
./data/gid_2017_09_29_tormlb_nyamlb_1/
./data/gid_2017_08_25_min

./data/gid_2017_09_03_bosmlb_nyamlb_1/
./data/gid_2017_06_21_arimlb_colmlb_1/
./data/gid_2017_09_18_oakmlb_detmlb_1/
./data/gid_2017_07_31_kcamlb_balmlb_1/
./data/gid_2017_09_20_chamlb_houmlb_1/
./data/gid_2017_08_28_pitmlb_chnmlb_1/
./data/gid_2017_06_04_bosmlb_balmlb_1/
./data/gid_2017_08_06_wasmlb_chnmlb_1/
./data/gid_2017_04_16_milmlb_cinmlb_1/
./data/gid_2017_04_14_balmlb_tormlb_1/
./data/gid_2017_04_13_nynmlb_miamlb_1/
./data/gid_2017_06_02_minmlb_anamlb_1/
./data/gid_2017_06_21_anamlb_nyamlb_1/
./data/gid_2017_06_18_bosmlb_houmlb_1/
./data/gid_2017_04_03_clemlb_texmlb_1/
./data/gid_2017_09_17_lanmlb_wasmlb_1/
./data/gid_2017_06_06_sdnmlb_arimlb_1/
./data/gid_2017_07_16_seamlb_chamlb_1/
./data/gid_2017_08_02_kcamlb_balmlb_1/
./data/gid_2017_04_01_chamlb_milmlb_1/
./data/gid_2017_07_18_sdnmlb_colmlb_1/
./data/gid_2017_08_23_milmlb_sfnmlb_1/
./data/gid_2017_04_25_wasmlb_colmlb_1/
./data/gid_2017_07_30_tbamlb_nyamlb_1/
./data/gid_2017_04_10_lanmlb_chnmlb_1/
./data/gid_2017_08_02_ari

./data/gid_2017_06_24_pitmlb_slnmlb_1/
./data/gid_2017_05_08_slnmlb_miamlb_1/
./data/gid_2017_04_21_wasmlb_nynmlb_1/
./data/gid_2017_06_07_phimlb_atlmlb_1/
./data/gid_2017_09_03_cinmlb_pitmlb_1/
./data/gid_2017_05_16_tbamlb_clemlb_1/
./data/gid_2017_05_15_nynmlb_arimlb_1/
./data/gid_2017_05_27_balmlb_houmlb_1/
./data/gid_2017_05_20_bosmlb_oakmlb_1/
./data/gid_2017_04_24_chnmlb_pitmlb_1/
./data/gid_2017_07_22_detmlb_minmlb_1/
./data/gid_2017_10_06_bosmlb_houmlb_1/
./data/gid_2017_06_30_wasmlb_slnmlb_1/
./data/gid_2017_07_16_lanmlb_miamlb_1/
./data/gid_2017_10_01_detmlb_minmlb_1/
./data/gid_2017_06_19_sdnmlb_chnmlb_1/
./data/gid_2017_06_28_chnmlb_wasmlb_1/
./data/gid_2017_10_07_chnmlb_wasmlb_1/
./data/gid_2017_04_24_minmlb_texmlb_1/
./data/gid_2017_08_08_lanmlb_arimlb_1/
./data/gid_2017_05_24_kcamlb_nyamlb_1/
./data/gid_2017_05_20_chamlb_seamlb_1/
./data/gid_2017_06_02_atlmlb_cinmlb_1/
./data/gid_2017_04_11_milmlb_tormlb_1/
./data/gid_2017_06_02_sfnmlb_phimlb_1/
./data/gid_2017_09_30_sdn

./data/gid_2017_08_01_arimlb_chnmlb_1/
./data/gid_2017_09_13_oakmlb_bosmlb_1/
./data/gid_2017_08_08_balmlb_anamlb_1/
./data/gid_2017_05_01_milmlb_slnmlb_1/
./data/gid_2017_07_15_minmlb_houmlb_1/
./data/gid_2017_05_28_slnmlb_colmlb_1/
./data/gid_2017_04_28_tbamlb_tormlb_1/
./data/gid_2017_09_26_sfnmlb_arimlb_1/
./data/gid_2017_08_07_milmlb_minmlb_1/
./data/gid_2017_09_13_miamlb_phimlb_1/
./data/gid_2017_04_13_lanmlb_chnmlb_1/
./data/gid_2017_09_15_chamlb_detmlb_1/
./data/gid_2017_04_07_seamlb_anamlb_1/
./data/gid_2017_09_13_chamlb_kcamlb_1/
./data/gid_2017_07_01_minmlb_kcamlb_2/
./data/gid_2017_06_10_cinmlb_lanmlb_1/
./data/gid_2017_08_26_milmlb_lanmlb_1/
./data/gid_2017_05_12_cinmlb_sfnmlb_1/
./data/gid_2017_08_10_anamlb_seamlb_1/
./data/gid_2017_08_27_colmlb_atlmlb_1/
./data/gid_2017_04_29_chamlb_detmlb_1/
./data/gid_2017_06_13_atlmlb_wasmlb_1/
./data/gid_2017_07_07_kcamlb_lanmlb_1/
./data/gid_2017_05_10_sdnmlb_texmlb_1/
./data/gid_2017_07_28_chnmlb_milmlb_1/
./data/gid_2017_04_25_nya

./data/gid_2017_04_12_chamlb_clemlb_1/
./data/gid_2017_08_14_kcamlb_oakmlb_1/
./data/gid_2017_05_13_cinmlb_sfnmlb_1/
./data/gid_2017_04_28_chnmlb_bosmlb_1/
./data/gid_2017_06_25_anamlb_bosmlb_1/
./data/gid_2017_05_29_seamlb_colmlb_1/
./data/gid_2017_05_18_milmlb_sdnmlb_1/
./data/gid_2017_05_04_balmlb_bosmlb_1/
./data/gid_2017_08_13_atlmlb_slnmlb_1/
./data/gid_2017_07_05_tormlb_nyamlb_1/
./data/gid_2017_07_08_houmlb_tormlb_1/
./data/gid_2017_07_09_anamlb_texmlb_1/
./data/gid_2017_09_12_lanmlb_sfnmlb_1/
./data/gid_2017_10_09_adomlb_awcmlb_1/
./data/gid_2017_04_06_seamlb_houmlb_1/
./data/gid_2017_05_21_wasmlb_atlmlb_1/
./data/gid_2017_05_31_detmlb_kcamlb_1/
./data/gid_2017_08_03_oakmlb_sfnmlb_1/
./data/gid_2017_07_05_houmlb_atlmlb_1/
./data/gid_2017_08_09_kcamlb_slnmlb_1/
./data/gid_2017_06_12_chnmlb_nynmlb_1/
./data/gid_2017_07_15_clemlb_oakmlb_1/
./data/gid_2017_07_19_wasmlb_anamlb_1/
./data/gid_2017_07_28_cinmlb_miamlb_1/
./data/gid_2017_06_20_houmlb_oakmlb_1/
./data/gid_2017_07_03_nyn

./data/gid_2017_10_06_adhmlb_adtmlb_1/
./data/gid_2017_08_29_lanmlb_arimlb_1/
./data/gid_2017_05_26_detmlb_chamlb_1/
./data/gid_2017_05_11_kcamlb_tbamlb_1/
./data/gid_2017_05_13_lanmlb_colmlb_1/
./data/gid_2017_09_01_oakmlb_seamlb_1/
./data/gid_2017_07_18_lanmlb_chamlb_1/
./data/gid_2017_08_03_texmlb_minmlb_1/
./data/gid_2017_10_01_cinmlb_chnmlb_1/
./data/gid_2017_06_03_chamlb_detmlb_1/
./data/gid_2017_04_22_wasmlb_nynmlb_1/
./data/gid_2017_09_10_sfnmlb_chamlb_1/
./data/gid_2017_06_16_nyamlb_oakmlb_1/
./data/gid_2017_06_26_minmlb_bosmlb_1/
./data/gid_2017_06_16_sdnmlb_milmlb_1/
./data/gid_2017_04_26_kcamlb_chamlb_1/
./data/gid_2017_05_22_pitmlb_atlmlb_1/
./data/gid_2017_05_26_seamlb_bosmlb_1/
./data/gid_2017_04_03_kcamlb_minmlb_1/
./data/gid_2017_08_12_atlmlb_slnmlb_1/
./data/gid_2017_09_16_texmlb_anamlb_1/
./data/gid_2017_08_28_clemlb_nyamlb_1/
./data/gid_2017_05_09_atlmlb_houmlb_1/
./data/gid_2017_08_09_balmlb_anamlb_1/
./data/gid_2017_09_21_tbamlb_balmlb_1/
./data/gid_2017_07_02_was

./data/gid_2017_04_23_miamlb_sdnmlb_1/
./data/gid_2017_05_27_cinmlb_phimlb_1/
./data/gid_2017_07_09_balmlb_minmlb_1/
./data/gid_2017_08_08_sdnmlb_cinmlb_1/
./data/gid_2017_05_20_phimlb_pitmlb_1/
./data/gid_2017_04_08_chnmlb_milmlb_1/
./data/gid_2017_04_06_tormlb_tbamlb_1/
./data/gid_2017_05_16_bosmlb_slnmlb_1/
./data/gid_2017_04_07_sfnmlb_sdnmlb_1/
./data/gid_2017_09_22_wasmlb_nynmlb_1/
./data/gid_2017_10_04_ndtmlb_arimlb_1/
./data/gid_2017_07_04_arimlb_lanmlb_1/
./data/gid_2017_06_02_arimlb_miamlb_1/
./data/gid_2017_05_12_nynmlb_milmlb_1/
./data/gid_2017_08_21_minmlb_chamlb_1/
./data/gid_2017_07_25_pitmlb_sfnmlb_1/
./data/gid_2017_08_27_pitmlb_cinmlb_1/
./data/gid_2017_06_06_houmlb_kcamlb_1/
./data/gid_2017_09_22_miamlb_arimlb_1/
./data/gid_2017_05_30_tbamlb_texmlb_1/
./data/gid_2017_08_14_cinmlb_chnmlb_1/
./data/gid_2017_08_30_texmlb_houmlb_1/
./data/gid_2017_06_20_sfnmlb_atlmlb_1/
./data/gid_2017_05_01_texmlb_houmlb_1/
./data/gid_2017_09_23_texmlb_oakmlb_1/
./data/gid_2017_04_05_col

./data/gid_2017_09_23_tbamlb_balmlb_1/
./data/gid_2017_07_31_atlmlb_phimlb_1/
./data/gid_2017_09_03_anamlb_texmlb_1/
./data/gid_2017_10_05_bosmlb_houmlb_1/
./data/gid_2017_04_28_minmlb_kcamlb_1/
./data/gid_2017_10_13_nyamlb_houmlb_1/
./data/gid_2017_08_19_wasmlb_sdnmlb_1/
./data/gid_2017_07_05_tbamlb_chnmlb_1/
./data/gid_2017_06_24_balmlb_tbamlb_1/
./data/gid_2017_04_12_nynmlb_phimlb_1/
./data/gid_2017_04_17_clemlb_minmlb_1/
./data/gid_2017_05_03_anamlb_seamlb_1/
./data/gid_2017_07_18_tormlb_bosmlb_1/
./data/gid_2017_06_23_texmlb_nyamlb_1/
./data/gid_2017_08_13_balmlb_oakmlb_1/
./data/gid_2017_09_29_oakmlb_texmlb_1/
./data/gid_2017_04_28_seamlb_clemlb_1/
./data/gid_2017_06_09_detmlb_bosmlb_1/
./data/gid_2017_09_24_clemlb_seamlb_1/
./data/gid_2017_05_07_tormlb_tbamlb_1/
./data/gid_2017_06_18_seamlb_texmlb_1/
./data/gid_2017_06_02_wasmlb_oakmlb_1/
./data/gid_2017_07_29_cinmlb_miamlb_1/
./data/gid_2017_05_23_detmlb_houmlb_1/
./data/gid_2017_04_29_tbamlb_tormlb_1/
./data/gid_2017_05_21_nya