In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsapi
import json
import re
import datetime

from tqdm import tqdm
from collections import OrderedDict

%matplotlib inline

In [3]:
def generate_teams_dict():
    team_params = {'activeStatus':'Y', 'season':2019, 'sportIds':1, 'fields':'teams,id,name,teamCode,fileCode,teamName,locationName,shortName,venue'}
    teams_info = statsapi.get('teams', team_params)

    teams_dict = {}
    for team in teams_info['teams']:
        teams_dict[team['fileCode']] = team['id']
    return teams_dict

In [4]:
def get_yesterdays_games():
    yesterday = datetime.datetime.today() - datetime.timedelta(days=1)
    return yesterday.strftime("%m/%d/%Y")

In [5]:
def get_todays_games():
    return statsapi.schedule(datetime.datetime.today().strftime("%m/%d/%Y"))

def matchups_and_prob_pitchers_dicts(day='today'):
    games_today = get_todays_games()
    if day == 'yesterday':
        games_today = get_yesterdays_games()
        
    home_away_dict = {}
    prob_pitchers_dict = {}
    for game in games_today:
        prob_pitchers_dict[game['away_id']] = game['away_probable_pitcher']
        prob_pitchers_dict[game['home_id']] = game['home_probable_pitcher']
        
        home_away_dict[game['home_id']] = game['away_id']
        
    away_home_dict = {v:k for k, v in home_away_dict.items()}
    return home_away_dict, away_home_dict, prob_pitchers_dict

In [6]:
def get_player_list(team_id):
    player_names = []
    roster = statsapi.roster(team_id)
    roster_list = roster.split("\n")[:-1]
    for player in roster_list:
        player_names.append(" ".join(player.split()[2:]))
    return player_names

get_player_list(137)

['Abiatal Avelino',
 'Alex Dickerson',
 'Andrew Suarez',
 'Austin Slater',
 'Brandon Belt',
 'Brandon Crawford',
 'Buster Posey',
 'Dereck Rodriguez',
 'Donovan Solano',
 'Evan Longoria',
 'Fernando Abad',
 'Jandel Gustave',
 'Jeff Samardzija',
 'Kevin Pillar',
 'Logan Webb',
 'Madison Bumgarner',
 'Mike Yastrzemski',
 'Reyes Moronta',
 'Sam Coonrod',
 'Scooter Gennett',
 'Stephen Vogt',
 'Tony Watson',
 'Trevor Gott',
 'Tyler Beede',
 'Will Smith']

In [7]:
def get_player_id_from_name(player_name):
    try:
        return statsapi.lookup_player(player_name)[0]['id']
    except IndexError:
        return False

def check_pos_player(player_name):
    try:
        return statsapi.lookup_player(player_name)[0]['primaryPosition']['abbreviation'] != "P"
    except IndexError:
        return False

def get_current_season_stats(player_name):

    if not check_pos_player(player_name):
        raise ValueError("Player name entered is not a position player")
        
    player_id = get_player_id_from_name(player_name)
    stats_dict = OrderedDict({"Name": player_name, "ID": player_id, 
                  "Team": statsapi.lookup_player(player_id)[0]['currentTeam']['id']})
    
    # Look up the player's current season hitting stats
    get_player_stats = statsapi.player_stats(player_id, 'hitting') 
    
    # Get the stats for the most recent season
    curr_season_stats = get_player_stats.split("Season Hitting")[-1]
    
    #Break up the stats into a list
    stats_list = curr_season_stats.split("\n")[1:-2]
    for stat in stats_list:
        stat_name = re.search("[A-Za-z]+", stat).group()
        stat_val = re.search("[^:A-Za-z]+", stat).group()
        try:
            stats_dict[stat_name] = float(stat_val)
        except ValueError:
            stats_dict[stat_name] = 0.0
    return stats_dict

In [8]:
# These functions were defined with the help of toddrob99 on github, who developed the
# MLB-StatsAPI module. I made a post on reddit.com/r/mlbdata, which he mantains to 
# answer questions about making API calls for specific purposes. I asked how to get stats
# over the past x days and how to get head-to-head batting stats. The post is linked
# here: https://www.reddit.com/r/mlbdata/comments/cewwfo/getting_headtohead_batting_stats_and_last_x_games/?

def batting_past_N_days(N, player_id, end_date=datetime.datetime.today()):
    
    start_date = (end_date - datetime.timedelta(days=N)).strftime("%m/%d/%Y")
    end_date = end_date.strftime("%m/%d/%Y")
    hydrate = ('stats(group=[hitting],type=[byDateRange],startDate={},endDate={}),currentTeam'.
               format(start_date, end_date))
    
    params = {'personId': player_id, 'hydrate':hydrate}
    r = statsapi.get('person',params)
    batting_stats = r['people'][0]['stats'][0]['splits'][0]['stat']

    # Only get rate stats for past N days
    filtered = {k + "_p{}d".format(N):(float(v) if v != '.---' or v != '-.--' else 0.0)
                for k, v in batting_stats.items() 
                if type(v) == str 
                and k != 'stolenBasePercentage'
                or k == 'hits'} 
    filtered = OrderedDict(sorted(filtered.items()))
    
    return filtered

def get_h2h_vs_pitcher(batter_id, opponent_id):
    
    hydrate = 'stats(group=[hitting],type=[vsPlayer],opposingPlayerId={},season=2019,sportId=1)'.format(opponent_id)
    params = {'personId': batter_id, 'hydrate':hydrate, 'sportId':1}
    r = statsapi.get('person',params)
    
    try: 
        batting_stats = r['people'][0]['stats'][1]['splits'][0]['stat']
    except KeyError:
        return OrderedDict({'atBats_h2h': 0.0, 'avg_h2h': 0.0, 'hits_h2h': 0.0, 
                            'obp_h2h': 0.0, 'ops_h2h': 0.0, 'slg_h2h': 0.0})
    
    # Only get rate stats vs pitcher
    filtered = {(k + "_h2h"):(float(v) if v != "-.--" and v != ".---" else 0.0)
                for k, v in batting_stats.items() 
                if type(v) == str 
                and k != 'stolenBasePercentage'
                and k != 'atBatsPerHomeRun'
                or k == 'hits'
                or k == 'atBats'} 
    
    filtered = OrderedDict(sorted(filtered.items()))
    
    return filtered

In [9]:
def pitching_past_N_days(N, player_id, end_date=datetime.datetime.today()):
    
    start_date = (end_date - datetime.timedelta(days=N)).strftime("%m/%d/%Y")
    end_date = end_date.strftime("%m/%d/%Y")
    hydrate = 'stats(group=[pitching],type=[byDateRange],startDate={},endDate={}),currentTeam'.format(start_date, end_date)
    
    params = {'personId': player_id, 'hydrate':hydrate}
    r = statsapi.get('person',params)
    pitching_stats = r['people'][0]['stats'][0]['splits'][0]['stat']
    
    # Only get rate stats for past N days
    filtered = {k + "_p{}d".format(N):float(v) for k, v in pitching_stats.items() 
                if type(v) == str and v != ".---" and v != "-.--"} 
    
    return filtered

In [10]:
def check_pitcher_right_handed(pitcher_id):
    try:
        params = {'personId': pitcher_id}
        r = statsapi.get('person',params)
        return r['people'][0]['pitchHand']['code'] == 'R'
    except IndexError:
        return False

In [11]:
def check_batter_right_handed(batter_id):
    try:
        params = {'personId': batter_id}
        r = statsapi.get('person',params)
        return r['people'][0]['batSide']['code'] == 'R'
    except IndexError:
        return False

In [12]:
def check_pitcher_batter_opposite_hand(batter_id, pitcher_id):
    return check_pitcher_right_handed(pitcher_id) != check_batter_right_handed(batter_id)

In [13]:
def player_got_hit_in_game(player_id, game_id, home_or_away):
    
    params = {'gamePk':game_id,
      'fields': 'gameData,teams,teamName,shortName,teamStats,batting,atBats,runs,hits,rbi,strikeOuts,baseOnBalls,leftOnBase,players,boxscoreName,liveData,boxscore,teams,players,id,fullName,batting,avg,ops,era,battingOrder,info,title,fieldList,note,label,value'}
    r = statsapi.get('game', params)
    player_stats = r['liveData']['boxscore']['teams'][home_or_away]['players'].get('ID' + str(player_id), False)
    if not player_stats: 
        return False 
    else:
        return player_stats['stats']['batting'].get('hits', 0) > 0

In [14]:
def convert_to_FL_format(name):
    last_first = name.split(",")
    last_first.reverse()
    last_first[0] = last_first[0].strip()
    return " ".join(last_first)

In [15]:
def get_opposing_pitcher(player_id, game_id):
    teams = statsapi.get('schedule', {'sportId': '1', 'gamePk': game_id, 
                                      'hydrate':'probablePitcher'})['dates'][0]['games'][0]['teams']
    home_team_id = teams['home']['team']['id']
    away_team_id = teams['away']['team']['id']
    
    home_prob_pitcher = teams['home']['probablePitcher']['fullName']
    away_prob_pitcher = teams['away']['probablePitcher']['fullName']
    
    if statsapi.lookup_player(player_id)[0]['currentTeam']['id'] == home_team_id:
        return away_prob_pitcher
    else: 
        return home_prob_pitcher

In [16]:
def batting_past_N_games(N, player_id):
    hydrate = 'stats(group=[hitting],type=[lastXGames],limit={}),currentTeam'.format(N)
    
    params = {'personId': player_id, 'hydrate':hydrate}
    
    try:
        r = statsapi.get('person',params)
        batting_stats = r['people'][0]['stats'][0]['splits'][0]['stat']
    except (ValueError, KeyError):
        return {k:v for k, v in (zip(np.arange(5), [0.0]*5))}
    
    # Only get rate stats for past N days
    filtered = {k + "_p{}G".format(N):(float(v) if v != '.---' and v != '-.--' else 0.0)
                for k, v in batting_stats.items() 
                if type(v) == str 
                and k != 'stolenBasePercentage'
                or k == 'hits'} 
    
    filtered = OrderedDict(sorted(filtered.items()))
    
    return filtered

def pitching_past_N_games(N, player_id):
    if player_id == 547989:
        return {k:v for k, v in (zip(np.arange(15), [0.0]*15))}
    
    hydrate = 'stats(group=[pitching],type=[lastXGames],limit={}),currentTeam'.format(N)
    
    params = {'personId': player_id, 'hydrate':hydrate}
    try:
        r = statsapi.get('person',params)
    except ValueError:              # The request fails if a pitcher is making their debut
        return {k:v for k, v in (zip(np.arange(15), [0.0]*15))}
    
    pitching_stats = r['people'][0]['stats'][0]['splits'][0]['stat']
    
    # Only get rate stats for past N days
    filtered = {(k + "_p{}G".format(N)):(float(v) if v != '.---' and v != '-.--' else 0.0)
                for k, v in pitching_stats.items() 
                if type(v) == str} 
    
    filtered = OrderedDict(sorted(filtered.items()))
    
    return filtered

In [28]:
# This cell generates rows for a DataFrame of batting stats--one row per player

yesterday = (datetime.datetime.today() - datetime.timedelta(days = 1)).strftime("%m/%d/%Y")
today = datetime.datetime.today().strftime("%m/%d/%Y")

###############################################################
# 
# Change GENERATE_TRAIN_DATA to False to generate 
# data for today's games instead, which won't have 
# labels included for whether or not the player
# got a hit
#
GENERATE_TRAIN_DATA = False
#
################################################################

gameday = yesterday
if not GENERATE_TRAIN_DATA:
    gameday = today
    
rows_list = []
for game in tqdm(statsapi.schedule(gameday)):
    
#     if game['status'] not in ['In Progress', 'Final']:
#         continue
    
    game_id = game['game_id']
    away_id = game['away_id']
    home_id = game['home_id']
    home_player_list = get_player_list(home_id)
    away_player_list = get_player_list(away_id)
    
    away_prob_Pname = convert_to_FL_format(game['away_probable_pitcher'])
    home_prob_Pname = convert_to_FL_format(game['home_probable_pitcher'])
    
    away_probable_pitcher = get_player_id_from_name(away_prob_Pname)
    home_probable_pitcher = get_player_id_from_name(home_prob_Pname)
    
    away_pitcher_p5G = pitching_past_N_games(5, away_probable_pitcher)
    home_pitcher_p5G = pitching_past_N_games(5, home_probable_pitcher)
    
    for player in home_player_list:
#         print(player)
        player_id = get_player_id_from_name(player)
        try:
            new_row = list(get_current_season_stats(player).values())
            new_row += list(batting_past_N_games(7, player_id).values())
            new_row += list(batting_past_N_games(15, player_id).values())
            new_row += list(away_pitcher_p5G.values())
            new_row += list(get_h2h_vs_pitcher(player_id, away_probable_pitcher).values())
            new_row.append(float(check_pitcher_batter_opposite_hand(batter_id=player_id, 
                                                                  pitcher_id=away_probable_pitcher)))
            if GENERATE_TRAIN_DATA:
                new_row.append(player_got_hit_in_game(player_id, game_id, 'home'))
                
            rows_list.append(new_row)
        except (ValueError, IndexError):
            continue

    for player in away_player_list:
#         print(player)
        player_id = get_player_id_from_name(player)
        try:
            new_row = list(get_current_season_stats(player).values())
            new_row += list(batting_past_N_games(7, player_id).values())
            new_row += list(batting_past_N_games(15, player_id).values())
            new_row += list(home_pitcher_p5G.values())
            new_row += list(get_h2h_vs_pitcher(player_id, home_probable_pitcher).values())
            new_row.append(float(check_pitcher_batter_opposite_hand(batter_id=player_id, 
                                                                  pitcher_id=away_probable_pitcher)))
            if GENERATE_TRAIN_DATA:
                new_row.append(player_got_hit_in_game(player_id, game_id, 'away'))
                
            rows_list.append(new_row)
        except (ValueError, IndexError):
            continue


100%|██████████| 15/15 [15:10<00:00, 60.11s/it]


In [29]:
sample_hitter = get_player_id_from_name("Kevin Pillar")
sample_pitcher = get_player_id_from_name("Jacob DeGrom")
player_stats_columns = list(get_current_season_stats("Kevin Pillar").keys())
player_stats_columns += list(batting_past_N_games(7, sample_hitter).keys())
player_stats_columns += list(batting_past_N_games(15, sample_hitter).keys())
player_stats_columns += list(pitching_past_N_games(5, sample_pitcher).keys())
player_stats_columns += list(get_h2h_vs_pitcher(sample_hitter, sample_pitcher).keys())

if GENERATE_TRAIN_DATA:
    player_stats_columns += ['pitcher_hitter_opposite_hand', 'player_got_hit']
else:
    player_stats_columns += ['pitcher_hitter_opposite_hand']

In [30]:
player_stats_table = pd.DataFrame(data=rows_list, columns=player_stats_columns)
player_stats_table#[player_stats_table['Name'] == 'Mike Ford'].iloc[0]

Unnamed: 0,Name,ID,Team,gamesPlayed,groundOuts,runs,doubles,triples,homeRuns,strikeOuts,...,walksPer9Inn_p5G,whip_p5G,winPercentage_p5G,atBats_h2h,avg_h2h,hits_h2h,obp_h2h,ops_h2h,slg_h2h,pitcher_hitter_opposite_hand
0,Adam Engel,641553,145,44.0,19.0,11.0,4.0,2.0,2.0,38.0,...,3.24,1.26,1.00,5.0,0.400,2.0,0.400,0.800,0.400,0.0
1,Eloy Jimenez,650391,145,78.0,80.0,40.0,8.0,0.0,18.0,89.0,...,3.24,1.26,1.00,3.0,0.333,1.0,0.333,0.666,0.333,0.0
2,James McCann,543510,145,84.0,69.0,45.0,19.0,0.0,12.0,97.0,...,3.24,1.26,1.00,8.0,0.250,2.0,0.400,0.650,0.250,0.0
3,Jon Jay,445055,145,31.0,40.0,11.0,8.0,0.0,0.0,22.0,...,3.24,1.26,1.00,23.0,0.261,6.0,0.261,0.522,0.261,1.0
4,Jose Abreu,547989,145,112.0,122.0,55.0,24.0,0.0,24.0,109.0,...,3.24,1.26,1.00,18.0,0.167,3.0,0.211,0.489,0.278,0.0
5,Leury Garcia,544725,145,105.0,131.0,72.0,21.0,1.0,7.0,106.0,...,3.24,1.26,1.00,6.0,0.167,1.0,0.167,0.334,0.167,1.0
6,Matt Skole,605474,145,5.0,1.0,2.0,1.0,0.0,0.0,6.0,...,3.24,1.26,1.00,0.0,0.000,0.0,0.000,0.000,0.000,1.0
7,Ryan Cordell,641477,145,74.0,35.0,18.0,6.0,0.0,6.0,60.0,...,3.24,1.26,1.00,3.0,0.333,1.0,0.333,0.666,0.333,0.0
8,Ryan Goins,572365,145,21.0,14.0,8.0,5.0,1.0,2.0,21.0,...,3.24,1.26,1.00,3.0,0.000,0.0,0.000,0.000,0.000,1.0
9,Tim Anderson,641313,145,79.0,80.0,46.0,17.0,0.0,12.0,68.0,...,3.24,1.26,1.00,10.0,0.400,4.0,0.455,1.155,0.700,0.0


In [31]:
player_stats_table.to_csv("data/player_stats/player_stats_{}.csv".format(gameday.replace("/", "_")), index=False)

In [24]:
len(batting_past_N_games(15, get_player_id_from_name("Buster Posey"))) == len(batting_past_N_games(15, get_player_id_from_name("Adam Engel")))

True

In [29]:
(len(get_h2h_vs_pitcher(get_player_id_from_name("Buster Posey"), get_player_id_from_name("Erick Fedde")))
    == len(get_h2h_vs_pitcher(get_player_id_from_name("Adam Engel"), get_player_id_from_name("Daniel Norris"))))

False

In [30]:
get_h2h_vs_pitcher(get_player_id_from_name("Buster Posey"), get_player_id_from_name("Erick Fedde"))

OrderedDict([('atBats_h2h', 0.0),
             ('avg_h2h', 0.0),
             ('hits_h2h', 0.0),
             ('obp_h2h', 0.0),
             ('ops_h2h', 0.0),
             ('slg_h2h', 0.0)])

In [27]:
get_h2h_vs_pitcher(get_player_id_from_name("Adam Engel"), get_player_id_from_name("Daniel Norris"))

OrderedDict([('atBatsPerHomeRun_h2h', 0.0),
             ('atBats_h2h', 4.0),
             ('avg_h2h', 0.0),
             ('hits_h2h', 0.0),
             ('obp_h2h', 0.0),
             ('ops_h2h', 0.0),
             ('slg_h2h', 0.0)])

In [47]:
get_current_season_stats(get_player_id_from_name("Yasiel Puig"))

OrderedDict([('Name', 624577),
             ('ID', 624577),
             ('Team', 114),
             ('gamesPlayed', 104.0),
             ('groundOuts', 80.0),
             ('runs', 52.0),
             ('doubles', 17.0),
             ('triples', 1.0),
             ('homeRuns', 22.0),
             ('strikeOuts', 90.0),
             ('baseOnBalls', 25.0),
             ('intentionalWalks', 1.0),
             ('hits', 98.0),
             ('hitByPitch', 5.0),
             ('avg', 0.253),
             ('atBats', 387.0),
             ('obp', 0.305),
             ('slg', 0.473),
             ('ops', 0.778),
             ('caughtStealing', 5.0),
             ('stolenBases', 16.0),
             ('stolenBasePercentage', 0.762),
             ('groundIntoDoublePlay', 9.0),
             ('numberOfPitches', 1503.0),
             ('plateAppearances', 420.0),
             ('totalBases', 183.0),
             ('rbi', 63.0),
             ('leftOnBase', 173.0),
             ('sacBunts', 0.0),
             

In [48]:
get_current_season_stats(get_player_id_from_name("Buster Posey"))

OrderedDict([('Name', 457763),
             ('ID', 457763),
             ('Team', 137),
             ('gamesPlayed', 81.0),
             ('groundOuts', 84.0),
             ('runs', 33.0),
             ('doubles', 21.0),
             ('triples', 0.0),
             ('homeRuns', 6.0),
             ('strikeOuts', 50.0),
             ('baseOnBalls', 24.0),
             ('intentionalWalks', 0.0),
             ('hits', 73.0),
             ('hitByPitch', 4.0),
             ('avg', 0.262),
             ('atBats', 279.0),
             ('obp', 0.328),
             ('slg', 0.401),
             ('ops', 0.729),
             ('caughtStealing', 0.0),
             ('stolenBases', 0.0),
             ('stolenBasePercentage', 0.0),
             ('groundIntoDoublePlay', 10.0),
             ('numberOfPitches', 1194.0),
             ('plateAppearances', 308.0),
             ('totalBases', 112.0),
             ('rbi', 33.0),
             ('leftOnBase', 111.0),
             ('sacBunts', 0.0),
             ('sa

In [37]:
len(pitching_past_N_games(5, get_player_id_from_name("Jacob DeGrom")))

15

In [35]:
statsapi.schedule()

[{'away_id': 145,
  'away_name': 'Chicago White Sox',
  'away_pitcher_note': 'Lopez didn’t have his best stuff in his start Tuesday against the Mets, but he still managed to hold them to two runs over 5 1/3 innings. Lopez has a 2.05 ERA and a .194 batting average against since the All-Star break.',
  'away_probable_pitcher': 'Lopez, Reynaldo',
  'away_score': 10,
  'current_inning': 9,
  'doubleheader': 'N',
  'game_date': '2019-08-04',
  'game_datetime': '2019-08-04T17:05:00Z',
  'game_id': 567118,
  'game_num': 1,
  'game_type': 'R',
  'home_id': 143,
  'home_name': 'Philadelphia Phillies',
  'home_pitcher_note': 'Smyly has been a godsend through two starts with the Phillies. He has allowed just one earned run in 13 innings, after struggling earlier this season with the Rangers. He is throwing his cutter more, which has been a factor to his success.',
  'home_probable_pitcher': 'Smyly, Drew',
  'home_score': 3,
  'inning_state': 'Bottom',
  'status': 'In Progress',
  'summary': '2019

In [36]:
pitching_past_N_games(5, get_player_id_from_name("J.A. Happ"))

OrderedDict([('avg_p5G', 0.303),
             ('era_p5G', 5.04),
             ('groundOutsToAirouts_p5G', 0.9),
             ('hitsPer9Inn_p5G', 10.8),
             ('homeRunsPer9_p5G', 1.44),
             ('inningsPitched_p5G', 25.0),
             ('pitchesPerInning_p5G', 17.56),
             ('runsScoredPer9_p5G', 5.04),
             ('stolenBasePercentage_p5G', 0.0),
             ('strikePercentage_p5G', 0.63),
             ('strikeoutWalkRatio_p5G', 2.56),
             ('strikeoutsPer9Inn_p5G', 8.28),
             ('walksPer9Inn_p5G', 3.24),
             ('whip_p5G', 1.56),
             ('winPercentage_p5G', 0.33)])

In [23]:
batting_past_N_games(5, get_player_id_from_name("Buster Posey"))

OrderedDict([('atBatsPerHomeRun_p5G', 0.0),
             ('avg_p5G', 0.308),
             ('hits_p5G', 4.0),
             ('obp_p5G', 0.4),
             ('ops_p5G', 0.785),
             ('slg_p5G', 0.385)])

In [32]:
batting_past_N_games(7, get_player_id_from_name("Carlos Correa"))

OrderedDict([('atBatsPerHomeRun_p7G', 8.0),
             ('avg_p7G', 0.417),
             ('hits_p7G', 10.0),
             ('obp_p7G', 0.548),
             ('ops_p7G', 1.465),
             ('slg_p7G', 0.917)])

In [22]:
batting_past_N_games(7, get_player_id_from_name("Trevor Story"))

OrderedDict([('atBatsPerHomeRun_p7G', 6.25),
             ('avg_p7G', 0.32),
             ('hits_p7G', 8.0),
             ('obp_p7G', 0.414),
             ('ops_p7G', 1.334),
             ('slg_p7G', 0.92)])

In [17]:
yesterday = (datetime.datetime.today() - datetime.timedelta(days = 1)).strftime("%m/%d/%Y")
today = datetime.datetime.today().strftime("%m/%d/%Y")

In [37]:
pred_yest = pd.read_csv("data/predictions/predictions_{}.csv".format(yesterday.replace("/", "_")), index_col=0)
stats_yest = pd.read_csv("data/player_stats/player_stats_{}.csv".format(yesterday.replace("/", "_")))

Unnamed: 0,Name,player_got_hit
33,Rafael Devers,True
62,Carlos Santana,True
158,Cody Bellinger,True
162,Justin Turner,True
166,Max Muncy,False
182,Christian Yelich,True
241,Charlie Blackmon,True
279,George Springer,True
354,Javier Baez,True
359,Nicholas Castellanos,True


In [46]:
past_results1 = stats_yest[stats_yest['Name'].isin(pred_yest['name'])].loc[:, ['Name', 'player_got_hit']]
past_results1.loc[10] = ['Overall Accuracy', str(sum(past_results1['player_got_hit']) * 10) + "%"]

In [62]:
past_results1.to_csv("data/past_results/past_results_{}.csv".format(yesterday.replace("/", "_")), index=False)