In [1]:
import config
import sqlalchemy
import functions
import statsapi as mlb
import sql_alch_schema
from datetime import datetime

In [2]:
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String,DateTime,Date,Time,Boolean
from sqlalchemy import ForeignKey
from sqlalchemy.orm import relationship
from sqlalchemy.schema import Table
from sqlalchemy import distinct

In [3]:
_sql_alchemy_connection = (
                                f'mysql+mysqlconnector://'
                                f'{config.user}:{config.password}'
                                f'@{config.host}:{config.port}'
                                f'/{config.schema}'
                           )
## Create the engine 
db = sqlalchemy.create_engine(_sql_alchemy_connection,
                              echo = False,
                              connect_args = {'ssl_disabled' : True,})

In [4]:
Base = sql_alch_schema.Base

Game = sql_alch_schema.Game
Play = sql_alch_schema.Play
Team = sql_alch_schema.Team
GameTeamLink = sql_alch_schema.GameTeamLink
Person = sql_alch_schema.Person


from sqlalchemy.orm import sessionmaker
Session = sessionmaker(bind=db)
session = Session()

In [5]:
Base.metadata.create_all(db)

In [6]:
import sqlalchemy_schemadisplay
from sqlalchemy import MetaData

schema_viz = sqlalchemy_schemadisplay.create_schema_graph(metadata=MetaData(db))
schema_viz.write_png('dbschema.png')

A Game record queried from mysql has a .teams attribute that retuns two GameTeamLink records as shown below

In [7]:
test_gameId = '2019/03/28/anamlb-oakmlb-1'
game_test = session.query(Game).filter_by(id=test_gameId).one()

print(game_test)

<Game(pk='566086',id='2019/03/28/anamlb-oakmlb-1')>


In [8]:
game_test.teams

[<GameTeam(game_id='2019/03/28/anamlb-oakmlb-1',team_id='108')>,
 <GameTeam(game_id='2019/03/28/anamlb-oakmlb-1',team_id='133')>]

In [9]:
play_test = session.query(Play).first()

In [10]:
play_test.batter_id

645277

In [11]:
play_test.game

<Game(pk='533881',id='2018/03/01/atlmlb-detmlb-1')>

A game_team object can access the associated game record and team record as well as plays for that game

In [12]:
game_team_test = game_test.teams[0]

print(game_team_test,'\n')
print(game_team_test.team,'\n')
print(game_team_test.player_1_id,'\n')
game_team_test.game.plays

<GameTeam(game_id='2019/03/28/anamlb-oakmlb-1',team_id='108')> 

<Team(name='Los Angeles Angels')> 

405395 



[<Play(game_id='2019/03/28/anamlb-oakmlb-1',atBatIndex='0')>,
 <Play(game_id='2019/03/28/anamlb-oakmlb-1',atBatIndex='1')>,
 <Play(game_id='2019/03/28/anamlb-oakmlb-1',atBatIndex='10')>,
 <Play(game_id='2019/03/28/anamlb-oakmlb-1',atBatIndex='11')>,
 <Play(game_id='2019/03/28/anamlb-oakmlb-1',atBatIndex='12')>,
 <Play(game_id='2019/03/28/anamlb-oakmlb-1',atBatIndex='13')>,
 <Play(game_id='2019/03/28/anamlb-oakmlb-1',atBatIndex='14')>,
 <Play(game_id='2019/03/28/anamlb-oakmlb-1',atBatIndex='15')>,
 <Play(game_id='2019/03/28/anamlb-oakmlb-1',atBatIndex='16')>,
 <Play(game_id='2019/03/28/anamlb-oakmlb-1',atBatIndex='17')>,
 <Play(game_id='2019/03/28/anamlb-oakmlb-1',atBatIndex='18')>,
 <Play(game_id='2019/03/28/anamlb-oakmlb-1',atBatIndex='19')>,
 <Play(game_id='2019/03/28/anamlb-oakmlb-1',atBatIndex='2')>,
 <Play(game_id='2019/03/28/anamlb-oakmlb-1',atBatIndex='20')>,
 <Play(game_id='2019/03/28/anamlb-oakmlb-1',atBatIndex='21')>,
 <Play(game_id='2019/03/28/anamlb-oakmlb-1',atBatIndex='22

In [156]:
team_test = game_team_test.team
team_test

<Team(name='Los Angeles Angels')>

In [162]:
game_record_test.g

<Game(pk='565249',id='2019/06/02/wasmlb-cinmlb-1')>

In [157]:
team_test.games

[<GameTeam(game_id='2019/02/23/sfnmlb-anamlb-1',team_id='108')>,
 <GameTeam(game_id='2019/02/24/anamlb-lanmlb-1',team_id='108')>,
 <GameTeam(game_id='2019/02/24/cinmlb-anamlb-2',team_id='108')>,
 <GameTeam(game_id='2019/02/25/milmlb-anamlb-1',team_id='108')>,
 <GameTeam(game_id='2019/02/26/anamlb-oakmlb-1',team_id='108')>,
 <GameTeam(game_id='2019/02/27/anamlb-colmlb-1',team_id='108')>,
 <GameTeam(game_id='2019/02/28/texmlb-anamlb-1',team_id='108')>,
 <GameTeam(game_id='2019/03/01/kcamlb-anamlb-1',team_id='108')>,
 <GameTeam(game_id='2019/03/02/anamlb-cinmlb-1',team_id='108')>,
 <GameTeam(game_id='2019/03/03/oakmlb-anamlb-1',team_id='108')>,
 <GameTeam(game_id='2019/03/04/anamlb-chamlb-1',team_id='108')>,
 <GameTeam(game_id='2019/03/05/chnmlb-anamlb-1',team_id='108')>,
 <GameTeam(game_id='2019/03/06/anamlb-colmlb-1',team_id='108')>,
 <GameTeam(game_id='2019/03/07/anamlb-kcamlb-1',team_id='108')>,
 <GameTeam(game_id='2019/03/07/lanmlb-anamlb-2',team_id='108')>,
 <GameTeam(game_id='2019/

we can also access players through a game_team_object

In [13]:
[x for x in game_team_test.players()]==[x for x in game_team_test.players()][0:None]

True

In [14]:
game_test.teams[0].players()

<generator object players at 0x107f39cf0>

## Player's Table 

In [15]:
mike_trout_id = 545361

In [29]:
mike_trout_person = session.query(Person).filter(Person.id==mike_trout_id).one()

In [30]:
mike_trout_person.primaryPosition_type

'Outfielder'

In [36]:
session.query(distinct(Person.primaryPosition_name)).all()

[('Pitcher'),
 ('Outfielder'),
 ('Designated Hitter'),
 ('First Base'),
 ('Catcher'),
 ('Second Base'),
 ('Third Base'),
 ('Shortstop'),
 ('Outfield'),
 ('Infield')]

## Let's find some player stats
Eventually, we want team stats. But lets start with a single player

In [48]:
# look at all possible values for the event from a play 
play_events = [item for sublist in session.query(distinct(Play.event)).all() for item in sublist]
play_eventTypes = [item for sublist in session.query(distinct(Play.eventType)).all() for item in sublist]

In [56]:
play_events

['Double',
 'Hit By Pitch',
 'Strikeout',
 'Lineout',
 'Home Run',
 'Flyout',
 'Groundout',
 'Single',
 'Pop Out',
 'Forceout',
 'Field Error',
 'Double Play',
 'Sac Fly',
 'Walk',
 'Bunt Groundout',
 'Triple',
 'Grounded Into DP',
 'Caught Stealing 2B',
 'Sac Bunt',
 'Strikeout Double Play',
 'Catcher Interference',
 'Fielders Choice',
 'Fielders Choice Out',
 'Runner Out',
 'Batter Interference',
 'Fan Interference',
 'Intent Walk',
 'Sac Fly Double Play',
 'Pickoff 1B',
 'Bunt Pop Out',
 'Pickoff Caught Stealing 2',
 'Caught Stealing Home',
 'Bunt Lineout',
 'Game Advisory',
 'Batter Out',
 'Pickoff Caught Stealing 3',
 'Caught Stealing 3B',
 'Pickoff Caught Stealing H',
 'Runner Double Play',
 'Pickoff 2B',
 'Wild Pitch',
 'Pickoff 3B',
 'Triple Play',
 'Sac Bunt Double Play',
 'Passed Ball',
 'Stolen Base 2B',
 'Pickoff Error 1B']

In [85]:
sacs = [x for x in play_events if 'Sac' in x]
no_ab = ['Hit By Pitch',
        'Walk',
        'Intent Walk',
        'Catcher Interference',
        'Fan Interference']
[no_ab.append(x) for x in sacs]


['Hit By Pitch',
 'Walk',
 'Intent Walk',
 'Catcher Interference',
 'Fan Interference',
 'Sac Fly',
 'Sac Bunt',
 'Sac Fly Double Play',
 'Sac Bunt Double Play']

In [86]:
hits = [
    'Single',
    'Double',
    'Triple',
    'Home Run',
]

In [109]:
def hitter_stats(session,player_id,game_record=None,date=None):
    """
    function takes in sql alchemy session, an instance of the Game class, and a player id.
    returns player stats up until the date of the relevant game.
    """
    if game_record==None and date==None:
        print('error: must specify a game or date')
    elif game_record!=None and date!=None:
        print('error: must specify either a date or a game. not both')
    else:
        if game_record:
            date = game_record.dateTime
        else:
            date=date
        stats = {}
        PAs = []
        hits = [
                    'Single',
                    'Double',
                    'Triple',
                    'Home Run',
                ]
        for game, play in session.query(Game,Play).\
                        filter(Game.id==Play.game_id).\
                        filter(and_(Game.type=='R',Game.dateTime<date)).\
                        filter(Play.batter_id==player_id).\
                        all():
            PAs.append(play)
        player_hits = [x for x in PAs if x.event in hits]
        at_bats = [x for x in PAs if x.event not in no_ab]
        stats['player_id'] = player_id
        stats['plate_appearances'] = len(PAs)
        stats['at_bats'] = len(at_bats)
        stats['hits']=len(player_hits)
        stats['singles']=len([x for x in PAs if x.event=='Single'])
        stats['doubles']=len([x for x in PAs if x.event=='Double'])
        stats['triples']=len([x for x in PAs if x.event=='Triple'])
        stats['home_runs']=len([x for x in PAs if x.event=='Home Run'])
        stats['walks']=len([x for x in PAs if x.event=='Walk'])
        return stats

In [110]:
game_record_test = session.query(Game).filter(Game.dateTime<datetime(2019,6,3)).all()[-1]
trout_stats = hitter_stats(session,mike_trout_id,date=datetime.today())

In [111]:
trout_stats

{'player_id': 545361,
 'plate_appearances': 600,
 'at_bats': 469,
 'hits': 136,
 'singles': 63,
 'doubles': 26,
 'triples': 2,
 'home_runs': 45,
 'walks': 96}

In [152]:
import pandas as pd
def game_players_stats(session,game_record):    
    for team in game_record.teams:
        if team.team_id==game_record_test.homeTeam_id:
            home_player_stats = [hitter_stats(session,x,game_record) for x in team.players()]
            for player in home_player_stats:
                player['home_away']='home'
        else:
            away_player_stats=[hitter_stats(session,x,game_record) for x in team.players()]
            for player in away_player_stats:
                player['home_away']='away'
    df = pd.DataFrame.from_dict(home_player_stats)
    df = df.append(pd.DataFrame.from_dict(away_player_stats))
    df.reset_index(inplace=True)
    return df

In [153]:
players_df_test = game_players_stats(session,game_record_test)

In [154]:
players_df_test

Unnamed: 0,index,player_id,plate_appearances,at_bats,hits,singles,doubles,triples,home_runs,walks,home_away
0,0,547179,9,8,1,0,1,0,0,1,home
1,1,608385,199,180,43,25,8,0,10,14,home
2,2,624577,217,201,43,29,4,0,10,12,home
3,3,607237,0,0,0,0,0,0,0,0,home
4,4,669222,131,117,32,20,6,2,4,12,home
5,5,458015,225,195,48,34,9,1,4,24,home
6,6,435043,0,0,0,0,0,0,0,0,home
7,7,543101,19,17,3,3,0,0,0,0,home
8,8,628452,0,0,0,0,0,0,0,0,home
9,9,578428,199,189,57,42,9,2,4,7,home


In [106]:
gameTeam_record_test = game_record_test.teams[0]

In [108]:
gameTeam_record_test.players()

<generator object players at 0x10baf79a8>

Plot cumulative hits for a player

In [None]:
import matplotlib.pyplot as plt

In [None]:
ordered_games = session.query(Game).filter(Game.type=='R').order_by(Game.dateTime).all()

In [None]:
trout_PAs = session.query(Play).filter(Play.batter_id==mike_trout_id).all()

In [None]:
trout_hit_dict = [{'time':x.startTime,'event':x.event} for x in trout_PAs if x.event in hits]

In [None]:
trout_hit_dates = [x['time'] for x in trout_hit_dict if x]
trout_hit_dates.sort()
trout_hit_dates = trout_hit_dates[1:]

In [None]:
count = 1
trout_cum_hits = []
for hit_date in trout_hit_dates:
    trout_cum_hits.append({'date':hit_date,'cum_hits':count})
    count+=1

In [None]:
dates = [x['date'] for x in trout_cum_hits]
cum_hits = [x['cum_hits'] for x in trout_cum_hits]

In [None]:
plt.plot_date(x=dates,y=cum_hits,xdate=True)