In [None]:
# MLB API docs and tester
# http://statsapi-default-elb-prod-876255662.us-east-1.elb.amazonaws.com/docs/#!
#
# JSON viewer
# http://jsonviewer.stack.hu/
#
# https://github.com/benjamincrom/baseball#analyze-a-players-season-ra-dickey---2017
# https://baseballwithr.wordpress.com/2015/02/17/conceptualizing-the-mlb-strike-zone-using-pitchfx-data/


import requests
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import sys
%matplotlib inline

API_BASE_URL = "http://statsapi.mlb.com/api"
API_PLAYBYPLAY_URL = API_BASE_URL + "/v1/game/{}/playByPlay?fields={}"
API_SCHEDULE_URL = API_BASE_URL + "/v1/schedule?sportId=1&date={}&fields={}"
API_TEAMS_URL = API_BASE_URL + "/v1/teams?sportId=1&activeStatus=ACTIVE&fields={}"

# Team name dictionary
TEAM_NAMES_BY_ABBREV = {}

# strike zone coordinates (x,y) -- best guess
# remember coordinate system is inverse
STRIKE_ZONE = [ [100,90], [150,90], # top - R - L
                [100,160],[150,160] # bottom - R - L
              ]
Y_CORRECTION = 40

In [None]:
# Enter team trigraph and date of game
# ====================================
team = 'WSH'
game_date = '10/30/2019'
# ====================================

In [None]:
def get_data(url):
    try:
        results = requests.get(url).json()
        if 'messageNumber' in results:
            print('ERROR:  {} - {}'.format(results['messageNumber'], results['message']))
    except:
        print('An unhandled exception occurred retrieving data from MLB.\n')
    return results


def load_teams():
    fields = 'teams,0,id,name,abbreviation'
    teams = get_data(API_TEAMS_URL.format(fields))['teams']

    # load dictionaries using specific attributes of teams
    for team in teams:
        TEAM_NAMES_BY_ABBREV.update({team['abbreviation']: team['name']})

    return


def find_gamepk(team, game_date):
    game_pk = 0  # no game today
    away_team = ''
    home_team = ''
    
    try:
        # validate team names
        if team in TEAM_NAMES_BY_ABBREV.keys():
            fields = 'dates,games,gamePk,teams,team,name,gameNumber'
            schedule = get_data(API_SCHEDULE_URL.format(game_date, fields))

            # loop through games looking for team
            for games in schedule['dates'][0]['games']:

                if games['teams']['away']['team']['name'] == TEAM_NAMES_BY_ABBREV[team]: 
                    game_pk = games['gamePk']
                    away_team = games['teams']['away']['team']['name']
                    home_team = games['teams']['home']['team']['name']
                    break
                elif games['teams']['home']['team']['name'] == TEAM_NAMES_BY_ABBREV[team]:
                    game_pk = games['gamePk']
                    away_team = games['teams']['away']['team']['name']
                    home_team = games['teams']['home']['team']['name']   
                    break
    except:
        game_pk = 0  # no game found
    return game_pk, away_team, home_team
    

def plot_pitches(inning_half, team, pitch_data_tuple):
    # extract x-y coordinates for each type of pitch result
    v_bx = [x[1] for x in pitch_data_tuple if ('BALL' in x[3].upper() and inning_half == x[0])]
    v_by = [x[2] - Y_CORRECTION for x in pitch_data_tuple if ('BALL' in x[3].upper() and inning_half == x[0])]
    v_csx = [x[1] for x in pitch_data_tuple if ('CALLED' in x[3].upper() and inning_half == x[0])]
    v_csy = [x[2] - Y_CORRECTION for x in pitch_data_tuple if ('CALLED' in x[3].upper() and inning_half == x[0])]
    v_ox = [x[1] for x in pitch_data_tuple if (('OUT' in x[3].upper() or 'SAC' in x[3].upper()) and inning_half == x[0])]
    v_oy = [x[2] - Y_CORRECTION for x in pitch_data_tuple if (('OUT' in x[3].upper() or 'SAC' in x[3].upper()) and inning_half == x[0])]
    v_sx = [x[1] for x in pitch_data_tuple if ('SWINGING' in x[3].upper() and inning_half == x[0])]
    v_sy = [x[2] - Y_CORRECTION for x in pitch_data_tuple if ('SWINGING' in x[3].upper() and inning_half == x[0])]
    v_fx = [x[1] for x in pitch_data_tuple if ('FOUL' in x[3].upper() and inning_half == x[0])]
    v_fy = [x[2] - Y_CORRECTION for x in pitch_data_tuple if ('FOUL' in x[3].upper() and inning_half == x[0])]
    v_hsx = [x[1] for x in pitch_data_tuple if ('SINGLE' in x[3].upper() and inning_half == x[0])]
    v_hsy = [x[2] - Y_CORRECTION for x in pitch_data_tuple if ('SINGLE' in x[3].upper() and inning_half == x[0])]
    v_hdx = [x[1] for x in pitch_data_tuple if ('DOUBLE' in x[3].upper() and inning_half == x[0])]
    v_hdy = [x[2] - Y_CORRECTION for x in pitch_data_tuple if ('DOUBLE' in x[3].upper() and inning_half == x[0])]
    v_htx = [x[1] for x in pitch_data_tuple if ('TRIPLE' in x[3].upper() and inning_half == x[0])]
    v_hty = [x[2] - Y_CORRECTION for x in pitch_data_tuple if ('TRIPLE' in x[3].upper() and inning_half == x[0])]
    v_hhx = [x[1] for x in pitch_data_tuple if ('HOME' in x[3].upper() and inning_half == x[0])]
    v_hhy = [x[2] - Y_CORRECTION for x in pitch_data_tuple if ('HOME' in x[3].upper() and inning_half == x[0])]

    # calculate missed balls and strikes calls
    missed_s, missed_b = missed_bs_calls(inning_half, team, pitch_data_tuple)
    
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(18,8))
    fig.suptitle('{} Pitching Results (catcher\'s perspective)'.format(team), fontsize=20)

    # build chart 1
    ax1.set_ylim(250, 0)
    ax1.set_xlim(250, 0)
    ax1.set_xlabel('Balls={}, Called Strikes={}, Swinging Strikes={}, Fouls={}\nStrikes called Balls={},Balls called Strikes={}\nTotal Pitches={}'\
                  .format(len(v_bx), len(v_csx), len(v_sx), len(v_fx), missed_s, missed_b, len(pitch_data_tuple)))

    # plot pitches
    v_balls = ax1.scatter(v_bx, v_by, color='blue', label='Balls')
    v_called_strikes = ax1.scatter(v_csx, v_csy, marker='D', color='red', label='Called Strikes')
    v_strikes = ax1.scatter(v_sx, v_sy, marker='x', color='red', label='Swinging Strikes')
    v_fouls = ax1.scatter(v_fx, v_fy, color='green', label = 'Fouls')
    ax1.legend()
    ax1.set_xticks(())
    ax1.set_yticks(())

    # draw strike zone
    # top
    ax1.plot([STRIKE_ZONE[0][0], STRIKE_ZONE[1][0]], [STRIKE_ZONE[0][1], STRIKE_ZONE[1][1]], color='black', linestyle='-', linewidth=1)
    # bottom
    ax1.plot([STRIKE_ZONE[2][0], STRIKE_ZONE[3][0]], [STRIKE_ZONE[2][1], STRIKE_ZONE[3][1]], color='black', linestyle='-', linewidth=1)
    # left side
    ax1.plot([STRIKE_ZONE[0][0], STRIKE_ZONE[2][0]], [STRIKE_ZONE[0][1], STRIKE_ZONE[2][1]], color='black', linestyle='-', linewidth=1)
    # right side
    ax1.plot([STRIKE_ZONE[1][0], STRIKE_ZONE[3][0]], [STRIKE_ZONE[1][1], STRIKE_ZONE[3][1]], color='black', linestyle='-', linewidth=1)
    
    # draw home plate
    # top
    ax1.plot([STRIKE_ZONE[2][0], STRIKE_ZONE[3][0]], [STRIKE_ZONE[2][1] + 20, STRIKE_ZONE[3][1] + 20], color='black', linestyle='-', linewidth=1)
    # left side
    ax1.plot([STRIKE_ZONE[3][0], STRIKE_ZONE[3][0]], [STRIKE_ZONE[2][1] + 20, STRIKE_ZONE[3][1] + 40], color='black', linestyle='-', linewidth=1)
    # right side
    ax1.plot([STRIKE_ZONE[2][0], STRIKE_ZONE[2][0]], [STRIKE_ZONE[2][1] + 20, STRIKE_ZONE[3][1] + 40], color='black', linestyle='-', linewidth=1)
    # left slope
    ax1.plot([STRIKE_ZONE[3][0], STRIKE_ZONE[3][0] - ((STRIKE_ZONE[1][0] - STRIKE_ZONE[0][0]) / 2)], [STRIKE_ZONE[3][1] + 40, STRIKE_ZONE[3][1] + 60], color='black', linestyle='-', linewidth=1)
    # right slope
    ax1.plot([STRIKE_ZONE[2][0], STRIKE_ZONE[2][0] + ((STRIKE_ZONE[1][0] - STRIKE_ZONE[0][0]) / 2)], [STRIKE_ZONE[2][1] + 40, STRIKE_ZONE[2][1] + 60], color='black', linestyle='-', linewidth=1)

    # build chart 2
    ax2.set_ylim(250, 0)
    ax2.set_xlim(250, 0)
    ax2.set_xlabel('Outs={}, Singles={}, Doubles={}, Triples={}, Home Runs={}'\
                  .format(len(v_ox), len(v_hsx), len(v_hdx), len(v_htx), len(v_hhx)))

    # plot pitches
    v_outs = ax2.scatter(v_ox, v_oy, marker='D', color='red', label='Outs')
    v_singles = ax2.scatter(v_hsx, v_hsy, marker='s', color='green', label='Singles')
    v_doubles = ax2.scatter(v_hdx, v_hdy, marker='s', color='blue', label='Doubles')
    v_triples = ax2.scatter(v_htx, v_hty, marker='s', color='purple', label='Triples')
    v_homers = ax2.scatter(v_hhx, v_hhy, marker='s', color='black', label='Home Runs')
    ax2.legend()
    ax2.set_xticks(())
    ax2.set_yticks(())

    # draw strike zone
    # top
    ax2.plot([STRIKE_ZONE[0][0], STRIKE_ZONE[1][0]], [STRIKE_ZONE[0][1], STRIKE_ZONE[1][1]], color='black', linestyle='-', linewidth=1)
    # bottom
    ax2.plot([STRIKE_ZONE[2][0], STRIKE_ZONE[3][0]], [STRIKE_ZONE[2][1], STRIKE_ZONE[3][1]], color='black', linestyle='-', linewidth=1)
    # left side
    ax2.plot([STRIKE_ZONE[0][0], STRIKE_ZONE[2][0]], [STRIKE_ZONE[0][1], STRIKE_ZONE[2][1]], color='black', linestyle='-', linewidth=1)
    # right side
    ax2.plot([STRIKE_ZONE[1][0], STRIKE_ZONE[3][0]], [STRIKE_ZONE[1][1], STRIKE_ZONE[3][1]], color='black', linestyle='-', linewidth=1)
    
    # draw home plate
    # top
    ax2.plot([STRIKE_ZONE[2][0], STRIKE_ZONE[3][0]], [STRIKE_ZONE[2][1] + 20, STRIKE_ZONE[3][1] + 20], color='black', linestyle='-', linewidth=1)
    # left side
    ax2.plot([STRIKE_ZONE[3][0], STRIKE_ZONE[3][0]], [STRIKE_ZONE[2][1] + 20, STRIKE_ZONE[3][1] + 40], color='black', linestyle='-', linewidth=1)
    # right side
    ax2.plot([STRIKE_ZONE[2][0], STRIKE_ZONE[2][0]], [STRIKE_ZONE[2][1] + 20, STRIKE_ZONE[3][1] + 40], color='black', linestyle='-', linewidth=1)
    # left slope
    ax2.plot([STRIKE_ZONE[3][0], STRIKE_ZONE[3][0] - ((STRIKE_ZONE[1][0] - STRIKE_ZONE[0][0]) / 2)], [STRIKE_ZONE[3][1] + 40, STRIKE_ZONE[3][1] + 60], color='black', linestyle='-', linewidth=1)
    # right slope
    ax2.plot([STRIKE_ZONE[2][0], STRIKE_ZONE[2][0] + ((STRIKE_ZONE[1][0] - STRIKE_ZONE[0][0]) / 2)], [STRIKE_ZONE[2][1] + 40, STRIKE_ZONE[2][1] + 60], color='black', linestyle='-', linewidth=1)

    # display
    plt.show()
    
    
def get_pitch_data(game_pk):
    
    fields = "allPlays,result,event,about,halfInning,playEvents,pitchData,coordinates,details,description"
    data = get_data(API_PLAYBYPLAY_URL.format(game_pk, fields))

    # Process pitch data
    pitch_data_tuple = []
    for n in range(len(data['allPlays'])):
        play = data['allPlays'][n]
        inning_half = play['about']['halfInning']
        # print(inning_half)
        play_result = play['result']['event']
        # print(play_result)
        for m in range(len(play['playEvents'])):
            event = play['playEvents'][m]
            pitch_result = event['details']['description']
            if 'In play' in pitch_result:
                pitch_result = play_result
                # print(pitch_result)
            if 'pitchData' in event.keys():
                if 'x' in event['pitchData']['coordinates']:
                    x = int(event['pitchData']['coordinates']['x'])
                    y = int(event['pitchData']['coordinates']['y'])
                    pitch_data_tuple.append((inning_half.upper(), x, y, pitch_result))
                
    # pitch_data = pd.DataFrame(data=pitch_data_tuple, columns=['inning_half', 'x', 'y', 'pitch_result'])
    # pitch_data.head(10)
    return pitch_data_tuple


def missed_bs_calls(inning_half, team, pitch_data_tuple):
    x_max = STRIKE_ZONE[1][0]
    x_min = STRIKE_ZONE[0][0]
    y_max = STRIKE_ZONE[2][1]
    y_min = STRIKE_ZONE[0][1]
    
    missed_strikes = [[x[1],x[2] - Y_CORRECTION] for x in pitch_data_tuple if ('BALL' in x[3].upper() and inning_half == x[0] and \
            x[1] <= x_max and x[1] >= x_min and x[2] - Y_CORRECTION <= y_max and x[2] - Y_CORRECTION >= y_min)]
    should_be_balls = [[x[1],x[2] - Y_CORRECTION] for x in pitch_data_tuple if ('CALLED' in x[3].upper() and inning_half == x[0] and \
            (x[1] > x_max or x[1] < x_min or x[2] - Y_CORRECTION > y_max or x[2] - Y_CORRECTION < y_min))]

    return len(missed_strikes), len(should_be_balls)              


In [None]:
# ===== MAIN LOOP =====

# load team names and abbrev
load_teams()

# Get game PK
game_pk, away_team, home_team = find_gamepk(team, game_date)

# Retrieve pitch data
if game_pk > 0:
    pitch_data = get_pitch_data(game_pk)
    
    # Plot the pitch data
    plot_pitches('TOP', home_team, pitch_data)
    plot_pitches('BOTTOM', away_team, pitch_data)
    
else:
    print("No game found for {} on {}".format(team, game_date))
    print(TEAM_NAMES_BY_ABBREV)


