In [1]:
from datetime import datetime

In [2]:
from sqlalchemy import create_engine
from sqlalchemy import Table, Column, Integer, String, MetaData, ForeignKey,DateTime,Boolean,Date,Time,Float,DATE,DATETIME,TIME
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import relationship

import statsapi as mlb
from datetime import datetime
import time

In [3]:
class MyDatabase:
    # http://docs.sqlalchemy.org/en/latest/core/engines.html
    DB_ENGINE = {
       'sqlite': 'sqlite:///{DB}'
    }

    # Main DB Connection Ref Obj
    db_engine = None
    def __init__(self, dbtype, username='', password='', dbname=''):
        dbtype = dbtype.lower()
        if dbtype in self.DB_ENGINE.keys():
            engine_url = self.DB_ENGINE[dbtype].format(DB=dbname)
            self.db_engine = create_engine(engine_url)
            print(self.db_engine)
        else:
            print("DBType is not found in DB_ENGINE")

In [4]:
db = MyDatabase('sqlite')

Engine(sqlite:///)


In [5]:
from sqlalchemy.orm import sessionmaker
Session = sessionmaker(bind=db.db_engine)
session = Session()
Base = declarative_base()

In [6]:
class Person(Base):
    __tablename__ = 'people'
    __table_args__ = {'extend_existing': True}
    
    def __init__(self,personId,session,commit=False):
        try:
            current_players=[item for sublist in 
                             session.execute("""select id from people""").fetchall() for item in sublist]
        except:
            current_players=[]
            print('no table found')
            session.rollback()
        if personId not in current_players:
            api_call = mlb.get('person',{'personId':personId})

            person = api_call['people'][0]
            person['birthDate'] = person.get('birthDate','1900-01-01')


            self.id=person.get('id','null')
            self.fullName=person.get('fullName','null')
            self.firstName=person.get('firstName','null')
            self.lastName=person.get('lastName','null')
            self.primaryNumber=person.get('primaryNumber','null')
            
            self.birthDate=datetime.date(datetime.strptime(person.get('birthDate','1900-01-01'),'%Y-%m-%d'))
            
            self.currentAge=person.get('currentAge','null')
            self.birthCity=person.get('birthCity','null')
            self.birthCountry=person.get('birthCountry','null')
            self.height=person.get('height','null')
            self.weight=person.get('weight','null')
            self.active=person.get('active','null')
            self.primaryPosition_code=person.get('primaryPosition',{'code':'null'})['code']
            self.primaryPosition_name=person.get('primaryPosition',{'name':'null'})['name']
            self.primaryPosition_type=person.get('primaryPosition',{'type':'null'})['type']
            self.primaryPosition_abbreviation=person.get('primaryPosition',{'abbreviation':'null'})['abbreviation']
            self.gender=person.get('gender','null')
            self.isPlayer=person.get('isPlayer','null')
            self.isVerified=person.get('isVerified','null')
            self.draftYear=person.get('draftYear','null')
            
            self.mlbDebutDate=datetime.date(datetime.strptime(person.get('mlbDebutDate','1900-01-01'),'%Y-%m-%d'))
            
            self.batSide=person.get('batSide',{'description':'null'})['description']
            self.pitchHand=person.get('pitchHand',{'description':'null'})['description']
            self.nameSlug=person.get('nameSlug','null')
            self.fullFMLName=person.get('fullFMLName','null')
            self.strikeZoneTop=person.get('strikeZoneTop','null')
            self.strikeZoneBottom=person.get('strikeZoneBottom','null')
            
            if commit==True:
                session.add(self)
                session.commit()
        else:
            pass
    
    id = Column(Integer, primary_key=True)
    fullName = Column(String(25))
    firstName = Column(String(25))
    lastName = Column(String(25))
    primaryNumber = Column(Integer)
    birthDate = Column(Date)
    currentAge = Column(Integer)
    birthCity = Column(String(25))
    birthCountry = Column(String(25))
    height = Column(String(25))
    weight = Column(Integer)
    active = Column(Boolean)
    primaryPosition_code = Column(String(10))
    primaryPosition_name = Column(String(25))
    primaryPosition_type = Column(String(25))
    primaryPosition_abbreviation = Column(String(25))
    gender = Column(String(5))
    isPlayer = Column(Boolean)
    isVerified = Column(Boolean)
    draftYear = Column(Integer)
    mlbDebutDate = Column(Date)
    batSide = Column(String(10))
    pitchHand = Column(String(10))
    nameSlug = Column(String(30))
    fullFMLName = Column(String(50))
    strikeZoneTop = Column(Float)
    strikeZoneBottom = Column(Float)
    
    def __repr__(self):
        return "<Person(nameSlug='%s')>" % self.nameSlug

In [7]:
class Pitch(Base):
    __tablename__= 'pitches'
    __table_args__ = {'extend_existing': True}
    
    def __init__(self,pitch_dict,play_id,session):
        details = pitch_dict['details']
        count = pitch_dict['count']
        pitchData = pitch_dict.get('pitchData',{'null':'null'})
        coordinates = pitchData['coordinates']
        breaks=pitchData['breaks']

        self.call_code=details['code']
        self.call_description=details['description']
        self.ballColor=details['ballColor']
        self.trailColor=details['trailColor']
        self.isInPlay=details['isInPlay']
        self.isStrike=details['isStrike']
        self.isBall=details['isBall']
        self.type_code=details['type']['code']
        self.type_description=details['type']['description']

        self.count_balls=count['balls']
        self.count_strikes=count['strikes']

        self.startSpeed=pitchData['startSpeed']
        self.endSpeed=pitchData['endSpeed']
        self.strikeZoneTop=pitchData['strikeZoneTop']
        self.strikeZoneBottom=pitchData['strikeZoneBottom']

        self.aY = coordinates.get('aY',99.9)
        self.aZ = coordinates.get('aZ',99.9)
        self.pfxX = coordinates.get('pfxX',99.9)
        self.pfxZ = coordinates.get('pfxZ',99.9)
        self.pX = coordinates.get('pX',99.9)
        self.pZ = coordinates.get('pZ',99.9)
        self.vX0 = coordinates.get('vX0',99.9)
        self.vY0 = coordinates.get('vY0',99.9)
        self.vZ0 = coordinates.get('vZ0',99.9)
        self.x = coordinates.get('x',99.9)
        self.y = coordinates.get('y',99.9)
        self.x0 = coordinates.get('x0',99.9)
        self.y0 = coordinates.get('y0',99.9)
        self.z0 = coordinates.get('zy',99.9)
        self.aX = coordinates.get('aX',99.9)

        self.breakAngle = breaks.get('breakAngle',99.9)
        self.breakLength = breaks.get('breakLength',99.9)
        self.breakY = breaks.get('breakY',99.9)
        self.spinRate = breaks.get('spinRate',99.9)
        self.spinDirection = breaks.get('spinDirection',99.9)

        self.zone = pitchData.get('zone',0)
        self.typeConfidence = pitchData.get('typeConfidence',99.9)
        self.plateTime = pitchData.get('plateTime',99.9)
        self.extension = pitchData.get('extension',99.9)

        self.pfxId = pitch_dict['pfxId']
        self.pitchId = pitch_dict['playId']
        self.pitchNumber = pitch_dict['pitchNumber']
        self.startTime = datetime.strptime(pitch_dict['startTime'],'%Y-%m-%dT%H:%M:%S.%fZ')
        self.endTime = datetime.strptime(pitch_dict['endTime'],'%Y-%m-%dT%H:%M:%S.%fZ')
        self.play_id = play_id
        

    call_code = Column(String(3))
    call_description = Column(String(25))
    ballColor = Column(String(50))
    trailColor = Column(String(50))
    isInPlay = Column(Boolean)
    isStrike = Column(Boolean)
    isBall = Column(Boolean)
    type_code = Column(String(10))
    type_description = Column(String(30))
    hasReview = Column(Boolean)
    count_balls = Column(Integer)
    count_strikes = Column(Integer)
    startSpeed = Column(Float)
    endSpeed = Column(Float)
    strikeZoneTop=Column(Float)
    strikeZoneBottom=Column(Float)
    aY = Column(Float)
    aZ = Column(Float)
    pfxX = Column(Float)
    pfxZ = Column(Float)
    pX = Column(Float)
    pZ = Column(Float)
    vX0 = Column(Float)
    vY0 = Column(Float)
    vZ0 = Column(Float)
    x = Column(Float)
    y = Column(Float)
    x0 = Column(Float)
    y0 = Column(Float)
    z0 = Column(Float)
    aX = Column(Float)
    breakAngle = Column(Float)
    breakLength = Column(Float)
    breakY = Column(Float)
    spinRate = Column(Float)
    spinDirection = Column(Float)
    zone = Column(Integer)
    typeConfidence = Column(Float)
    plateTime = Column(Float)
    extension = Column(Float)
    index = Column(Integer)
    pfxId = Column(String(200))
    pitchId = Column(String(200),primary_key=True)
    startTime = Column(DateTime)
    endTime = Column(DateTime)
    
    play_id = Column(String(200),ForeignKey("plays.id"))
    play = relationship('Play',back_populates='pitches')
    
    def __repr__(self):
        return "<Pitch(id=%s)>" % self.pitchId

# Declare the mapping for the Plays table 
class Play(Base):
    __tablename__= 'plays'
    __table_args__ = {'extend_existing': True} 
    
    def __init__(self,play,game_id,session,commit=False):
            result = play['result']
            about = play['about']
            batter = play.get('matchup',{'batter':'null'})['batter']
            pitcher = play.get('matchup',{'pitcher':'null'})['pitcher']
            count = play['count']
            Person(batter['id'],session,commit=commit)
            Person(pitcher['id'],session,commit=commit)
            
            play_id = game_id+'AB_'+str(about['atBatIndex'])

            self.id=play_id
            self.type=result['type']
            self.event=result.get('event','null')
            self.eventType=result.get('eventType','null')
            self.description=result.get('description','null')
            self.rbi=result.get('rbi','null')
            self.awayScore=result.get('awayScore','null')
            self.homeScore=result.get('homeScore','null')

            self.atBatIndex=about.get('atBatIndex','null')
            self.halfInning=about['halfInning']
            self.inning=about['inning']
            
            self.startTime=datetime.time(datetime.strptime(about.get('startTime','1900-01-01T01:01:1.0Z'),'%Y-%m-%dT%H:%M:%S.%fZ'))
            
            self.endTime=datetime.time(datetime.strptime(about.get('endTime','1900-01-01T01:01:01.0Z'),'%Y-%m-%dT%H:%M:%S.%fZ'))
            
            self.isComplete=about.get('isComplete','null')
            self.isScoringPlay=about.get('isScoringPlay','null')
            self.hasReview=about.get('hasReview','null')
            self.hasOut=about.get('hasOut','null')
            self.captivatingIndex=about.get('captivatingIndex','null')

            self.batter_id=batter.get('id','null')
            self.pitcher_id=pitcher.get('id','null')

            self.count_balls=count.get('balls','null')
            self.count_strikes=count.get('striks','null')
            self.count_outs=count.get('outs','null')

            self.num_pitches=len(play['pitchIndex'])
            self.num_actions=len(play['actionIndex'])
            self.num_runners=len(play['runners'])

            self.game_id=game_id
            
            # for every play, there are pitches, runners, and actions
            pitch_dicts = [play['playEvents'][x] for x in play['pitchIndex'] if play['playEvents'][x]['isPitch']==True]
            pitch_records = []
            for pitch_dict in pitch_dicts:
                pitch_records.append(Pitch(pitch_dict,play_id,session))
                session.add_all(pitch_records)
                session.commit()


    id = Column(String(200),primary_key=True,unique=True)
    type = Column(String(10))
    event = Column(String(25))
    eventType = Column(String(25))
    description = Column(String(250))
    rbi = Column(Integer)
    awayScore = Column(Integer)
    homeScore = Column(Integer)
    
    atBatIndex = Column(Integer)
    halfInning = Column(String(10))
    inning = Column(Integer)
    startTime = Column(Time)
    endTime = Column(Time)
    isComplete = Column(Boolean)
    isScoringPlay = Column(Boolean)
    hasReview = Column(Boolean)
    hasOut = Column(Boolean)
    captivatingIndex = Column(Integer)
    
    batter_id = Column(Integer,ForeignKey("people.id"))
    pitcher_id = Column(Integer,ForeignKey("people.id"))
    
    count_balls = Column(Integer)
    count_strikes = Column(Integer)
    count_outs = Column(Integer)
    
    num_pitches = Column(Integer)
    num_actions = Column(Integer)
    num_runners = Column(Integer)
    
    def __repr__(self):
        return "<Play(game_id='%s',atBatIndex='%s')>" % (
                     self.game_id,self.atBatIndex)
    
# Declare mapping for the game table 
class Game(Base):
    __tablename__ = 'games'
    __table_args__ = {'extend_existing': True}
    
    def __init__(self,pk,session,commit=False,verbose=False):
        """
        Takes in a gamepk and a sql alchemy session, calls the 'game' api endpoint, and turns the information into a
        mapped class instance for that game as well as all the plays in that game. When commit=True, the 
        function will also add these instances to a sqlalchemy session and commit them to the database. 
        """
        try:
            already_added_pk = [item for sublist in session.execute('select pk from games').fetchall() for item in sublist]
        except:
            print("query for existing records didn't work")
            session.rollback()
            already_added_pk=[]
        if int(pk) not in already_added_pk:
            api_call = mlb.get('game',{'gamePk':pk})

            gameData = api_call['gameData']
            game = gameData['game']
            _datetime = gameData['datetime']
            status = gameData['status']
            weather = gameData['weather']
            probablePitchers = gameData['probablePitchers']

            liveData = api_call['liveData']
            all_plays = liveData['plays']['allPlays']
            

            self.pk=game['pk']
            self.type=game['type']
            self.doubleHeader=game['doubleHeader']
            self.id=game['id']
            self.gamedayType=game['gamedayType']
            self.tiebreaker=game['tiebreaker']
            self.gameNumber=game['gameNumber']
            self.calenderEventId=game['calendarEventID']
            self.season=game['season']

            self.dateTime=datetime.strptime(_datetime['dateTime'],'%Y-%m-%dT%H:%M:%SZ')
            self.originalDate=datetime.date(datetime.strptime(_datetime['originalDate'],"%Y-%m-%d"))
            self.dayNight=_datetime['dayNight']
            self.time=datetime.time(datetime.strptime(_datetime['time']+_datetime['ampm'],"%H:%M%p"))

            self.abstractGameState=status['abstractGameState']
            self.codedGameState=status['codedGameState']
            self.detailedState=status['detailedState']
            self.statusCode=status['statusCode']
            self.abstractGameCode=status['abstractGameCode']

            self.homeTeam_id=gameData['teams']['home']['id']
            self.awayTeam_id=gameData['teams']['away']['id']

            self.condition=weather.get('condition','null')
            self.temp=weather.get('temp','null')
            self.wind=weather.get('wind','null')

            self.venue_id=gameData['venue']['id']

            self.home_probablePitcher=probablePitchers.get('home',{'null':'null'}).get('id','null')
            self.away_probablePitcher=probablePitchers.get('away',{'null':'null'}).get('id','null') 
            
            play_records = []
            for play in all_plays:
                play_records.append(Play(play,game['id'],session,commit=commit))
            
            if commit:
                if verbose:
                    print('adding game record')
                session.add(self)
                if verbose:
                    print('adding play records')
                session.add_all(play_records)
                if verbose:
                    print('commit...')
                session.commit()
        
    
    pk = Column(Integer)
    type = Column(String(1))
    doubleHeader = Column(String(1))
    id = Column(String(150), primary_key=True,unique=True)
    gamedayType = Column(String(1))
    tiebreaker = Column(String(1))
    gameNumber = Column(Integer)
    calenderEventId = Column(String(50))
    season = Column(Integer)
    
    dateTime = Column(String(200))
    originalDate = Column(Date)
    dayNight = Column(String(12))
    time = Column(Time)
    
    abstractGameState = Column(String(12))
    codedGameState = Column(String(3))
    detailedState = Column(String(12))
    statusCode = Column(String(3))
    abstractGameCode = Column(String(3))
    
    homeTeam_id = Column(Integer)
    awayTeam_id = Column(Integer)
    
    condition = Column(String(25))
    temp = Column(Integer)
    wind = Column(String(50))
    
    venue_id = Column(Integer)
    
    home_probablePitcher = Column(Integer)
    away_probablePitcher = Column(Integer)
    
    def __repr__(self): 
        return "<Game(pk='%s',id='%s')>" % (
                        self.pk, self.id)

Play.game_id = Column(String(150),ForeignKey('games.id'))
Play.pitches = relationship('Pitch',order_by=Pitch.startTime,back_populates='play')    
Play.game = relationship("Game",back_populates="plays")

Game.plays = relationship(
    "Play",order_by=Play.id,back_populates='game')

Play.batter = relationship('Person',back_populates='hitter_at_bats',foreign_keys=Play.batter_id)
Play.pitcher = relationship('Person',back_populates='pitcher_at_bats',foreign_keys=Play.pitcher_id)

Person.hitter_at_bats = relationship('Play',order_by=Play.startTime,
                                     back_populates='batter',foreign_keys=Play.batter_id)

Person.pitcher_at_bats = relationship('Play',order_by=Play.startTime,
                                     back_populates='pitcher',foreign_keys=Play.pitcher_id)



In [8]:
Base.metadata.create_all(db.db_engine)

In [9]:
Game(567491,session,commit=True,verbose=True)

adding game record
adding play records
commit...


<Game(pk='567491',id='2019/06/11/nynmlb-nyamlb-1')>

In [10]:
session.execute("""select * from games""").fetchall()

[(567491, 'R', 'S', '2019/06/11/nynmlb-nyamlb-1', 'P', 'N', 1, '14-567491-2019-06-11', 2019, '2019-06-11 17:05:00', '2019-06-11', 'day', '01:05:00.000000', 'Final', 'F', 'Final', 'F', 'F', 147, 121, 'Partly Cloudy', 72, '16 mph, In From LF', 3313, 547888, 554430)]

In [11]:
session.execute("""select * from pitches""").fetchall()

[('C', 'Called Strike', 'rgba(170, 21, 11, 1.0)', 'rgba(0, 0, 254, 1.0)', 0, 1, 0, 'SL', 'Slider', None, 0, 1, 81.0, 75.5, 3.39, 1.63, 19.29, -31.29, 3.39, 0.62, 0.03, 2.17, 3.42, -117.89, -0.9, 115.75, 180.07, -1.87, 50.0, 99.9, 4.84, 9.6, 9.6, 24.0, 2238.0, 100.0, 8, 2.0, 0.47, 5.44, None, '190611_170920', '3439a723-6a36-4621-96c2-d49185e077ba', '2019-06-11 17:09:15.000000', '2019-06-11 17:09:32.000000', '2019/06/11/nynmlb-nyamlb-1AB_0'),
 ('B', 'Ball', 'rgba(39, 161, 39, 1.0)', 'rgba(188, 0, 33, 1.0)', 0, 0, 1, 'FF', 'Four-Seam Fastball', None, 1, 1, 90.8, 83.8, 3.4, 1.61, 24.54, -17.91, -7.37, 7.97, -0.07, 4.17, 6.54, -132.03, 0.02, 119.64, 126.19, -1.61, 50.0, 99.9, -13.18, 33.6, 4.8, 24.0, 2072.0, 219.0, 11, 2.0, 0.42, 5.63, None, '190611_170937', '09ed14e1-d887-48e8-a462-d294a952c961', '2019-06-11 17:09:32.000000', '2019-06-11 17:09:50.000000', '2019/06/11/nynmlb-nyamlb-1AB_0'),
 ('B', 'Ball', 'rgba(39, 161, 39, 1.0)', 'rgba(119, 0, 152, 1.0)', 0, 0, 1, 'FS', 'Splitter', None, 2

In [12]:
import pandas as pd

In [13]:
pd.read_sql_query("""select pitch.type_description,pitch.play_id,play.pitcher_id from pitches pitch inner join plays play on pitch.play_id=play.id""",db.db_engine)

Unnamed: 0,type_description,play_id,pitcher_id
0,Slider,2019/06/11/nynmlb-nyamlb-1AB_0,547888
1,Four-Seam Fastball,2019/06/11/nynmlb-nyamlb-1AB_0,547888
2,Splitter,2019/06/11/nynmlb-nyamlb-1AB_0,547888
3,Splitter,2019/06/11/nynmlb-nyamlb-1AB_0,547888
4,Four-Seam Fastball,2019/06/11/nynmlb-nyamlb-1AB_0,547888
...,...,...,...
302,Four-Seam Fastball,2019/06/11/nynmlb-nyamlb-1AB_79,571735
303,Changeup,2019/06/11/nynmlb-nyamlb-1AB_80,571735
304,Changeup,2019/06/11/nynmlb-nyamlb-1AB_80,571735
305,Changeup,2019/06/11/nynmlb-nyamlb-1AB_80,571735


In [14]:
db.db_engine.table_names()

['games', 'people', 'pitches', 'plays']

In [15]:
person=session.query(Person).first()

In [16]:
play=session.query(Play).first()

In [17]:
play.pitches

[<Pitch(id=3439a723-6a36-4621-96c2-d49185e077ba)>,
 <Pitch(id=09ed14e1-d887-48e8-a462-d294a952c961)>,
 <Pitch(id=ba7446d8-3c24-4e85-8190-9fa36cad9661)>,
 <Pitch(id=38dccd69-da5d-45db-934c-54087c04f8a1)>,
 <Pitch(id=0bdba930-2980-4c47-8d04-1409140836c1)>]

In [None]:
class Team(Base):
    __tablename__ = 'teams'
    __table_args__ = {'extend_existing': True}
    
    id = Column(Integer,primary_key=True)
    name = Column(String(50))
    venue_id = Column(Integer)
    teamCode = Column(String(10))
    abbreviation = Column(String(10))
    teamName = Column(String(25))
    locationName = Column(String(25))
    league_id = Column(Integer)
    division_id = Column(Integer)
    
    def __repr__(self):
        return "<Team(name='%s')>" % self.name

In [None]:
class GameTeamLink(Base):
    __tablename__ = 'game_team_link'
    __table_args__ = {'extend_existing': True}
    
    game_id = Column(String(150),ForeignKey('games.id'),primary_key=True)
    team_id = Column(Integer,ForeignKey('teams.id'),primary_key=True)
    
    # add roster at the time of game 
    player_1_id = Column(Integer,ForeignKey('people.id'))
    player_2_id = Column(Integer,ForeignKey('people.id'))
    player_3_id = Column(Integer,ForeignKey('people.id'))
    player_4_id = Column(Integer,ForeignKey('people.id'))
    player_5_id = Column(Integer,ForeignKey('people.id'))
    player_6_id = Column(Integer,ForeignKey('people.id'))
    player_7_id = Column(Integer,ForeignKey('people.id'))
    player_8_id = Column(Integer,ForeignKey('people.id'))
    player_9_id = Column(Integer,ForeignKey('people.id'))
    player_10_id = Column(Integer,ForeignKey('people.id'))
    player_11_id = Column(Integer,ForeignKey('people.id'))
    player_12_id = Column(Integer,ForeignKey('people.id'))
    player_13_id = Column(Integer,ForeignKey('people.id'))
    player_14_id = Column(Integer,ForeignKey('people.id'))
    player_15_id = Column(Integer,ForeignKey('people.id'))
    player_16_id = Column(Integer,ForeignKey('people.id'))
    player_17_id = Column(Integer,ForeignKey('people.id'))
    player_18_id = Column(Integer,ForeignKey('people.id'))
    player_19_id = Column(Integer,ForeignKey('people.id'))
    player_20_id = Column(Integer,ForeignKey('people.id'))
    player_21_id = Column(Integer,ForeignKey('people.id'))
    player_22_id = Column(Integer,ForeignKey('people.id'))
    player_23_id = Column(Integer,ForeignKey('people.id'))
    player_24_id = Column(Integer,ForeignKey('people.id'))
    player_25_id = Column(Integer,ForeignKey('people.id'))
    player_26_id = Column(Integer,ForeignKey('people.id'))
    player_27_id = Column(Integer,ForeignKey('people.id'))
    player_28_id = Column(Integer,ForeignKey('people.id'))
    player_29_id = Column(Integer,ForeignKey('people.id'))
    player_30_id = Column(Integer,ForeignKey('people.id'))
    player_31_id = Column(Integer,ForeignKey('people.id'))
    player_32_id = Column(Integer,ForeignKey('people.id'))
    player_33_id = Column(Integer,ForeignKey('people.id'))
    player_34_id = Column(Integer,ForeignKey('people.id'))
    player_35_id = Column(Integer,ForeignKey('people.id'))
    player_36_id = Column(Integer,ForeignKey('people.id'))
    player_37_id = Column(Integer,ForeignKey('people.id'))
    player_38_id = Column(Integer,ForeignKey('people.id'))
    player_39_id = Column(Integer,ForeignKey('people.id'))
    player_40_id = Column(Integer,ForeignKey('people.id'))
    
    #relationships
    game = relationship('Game',back_populates='teams')
    team = relationship('Team',back_populates='games')
    
    def __repr__(self):
        return "<GameTeamLink(game_id='%s',team_id='%s')>" % self.game_id,self.team_id

# update game and team tables 
Game.teams = relationship("GameTeamLink",back_populates='game')
Team.games = relationship("GameTeamLink",back_populates='team')

In [None]:
def create_add_personRecord(personId):
    current_players=[item for sublist in 
                     session.execute("""select id from people""").fetchall() for item in sublist]
    if personId not in current_players:
        api_call = mlb.get('person',{'personId':personId})

        person = api_call['people'][0]
        person['birthDate'] = person.get('birthDate','1900-01-01')


        person_record = Person(id=person.get('id','null'),
                               fullName=person.get('fullName','null'),
                               firstName=person.get('firstName','null'),
                               lastName=person.get('lastName','null'),
                               primaryNumber=person.get('primaryNumber','null'),
                               birthDate=datetime.strptime(person.get('birthDate','1900-01-01'),'%Y-%m-%d'),
                               currentAge=person.get('currentAge','null'),
                               birthCity=person.get('birthCity','null'),
                               birthCountry=person.get('birthCountry','null'),
                               height=person.get('height','null'),
                               weight=person.get('weight','null'),
                               active=person.get('active','null'),
                               primaryPosition_code=person.get('primaryPosition',{'code':'null'})['code'],
                               primaryPosition_name=person.get('primaryPosition',{'name':'null'})['name'],
                               primaryPosition_type=person.get('primaryPosition',{'type':'null'})['type'],
                               primaryPosition_abbreviation=person.get('primaryPosition',{'abbreviation':'null'})['abbreviation'],
                               gender=person.get('gender','null'),
                               isPlayer=person.get('isPlayer','null'),
                               isVerified=person.get('isVerified','null'),
                               draftYear=person.get('draftYear','null'),
                               mlbDebutDate=datetime.strptime(person.get('mlbDebutDate','1900-01-01'),'%Y-%m-%d'),
                               batSide=person.get('batSide',{'description':'null'})['description'],
                               pitchHand=person.get('pitchHand',{'description':'null'})['description'],
                               nameSlug=person.get('nameSlug','null'),
                               fullFMLName=person.get('fullFMLName','null'),
                               strikeZoneTop=person.get('strikeZoneTop','null'),
                               strikeZoneBottom=person.get('strikeZoneBottom','null'),
                              )
        session.add(person_record)
        session.commit()
    else:
        pass

In [None]:
def create_gameRecord_playsRecords(pk,session,commit=True):
    """
    This function takes in a gamepk and a sql alchemy session, calls the 'game' api endpoint, and turns the information into a
    mapped class instance for that game as well as all the plays in that game. When commit=True, the 
    function will also add these instances to a sqlalchemy session and commit them to the database. 
    """
    
    already_added_pk = [item for sublist in session.execute('select pk from games').fetchall() for item in sublist]
    
    if int(pk) not in already_added_pk:
        api_call = mlb.get('game',{'gamePk':pk})

        gameData = api_call['gameData']
        game = gameData['game']
        _datetime = gameData['datetime']
        status = gameData['status']
        weather = gameData['weather']
        probablePitchers = gameData['probablePitchers']

        liveData = api_call['liveData']
        all_plays = liveData['plays']['allPlays']
    
        already_added = [item for sublist in session.execute('select id from games').fetchall() for item in sublist]
    

        game_record = Game(pk=game['pk'],
                     type=game['type'],
                     doubleHeader=game['doubleHeader'],
                     id=game['id'],
                     gamedayType=game['gamedayType'],
                     tiebreaker=game['tiebreaker'],
                     gameNumber=game['gameNumber'],
                     calenderEventId=game['calendarEventID'],
                     season=game['season'],

                     dateTime=datetime.strptime(_datetime['dateTime'],'%Y-%m-%dT%H:%M:%SZ'),
                     originalDate=datetime.strptime(_datetime['originalDate'],"%Y-%m-%d"),
                     dayNight=_datetime['dayNight'],
                     time=datetime.time(datetime.strptime(_datetime['time']+_datetime['ampm'],"%H:%M%p")),

                     abstractGameState=status['abstractGameState'],
                     codedGameState=status['codedGameState'],
                     detailedState=status['detailedState'],
                     statusCode=status['statusCode'],
                     abstractGameCode=status['abstractGameCode'],

                     homeTeam_id=gameData['teams']['home']['id'],
                     awayTeam_id=gameData['teams']['away']['id'],

                     condition=weather.get('condition','null'),
                     temp=weather.get('temp','null'),
                     wind=weather.get('wind','null'),

                     venue_id=gameData['venue']['id'],

                     home_probablePitcher=probablePitchers.get('home',{'null':'null'}).get('id','null'),
                     away_probablePitcher=probablePitchers.get('away',{'null':'null'}).get('id','null') 
                     )
        play_records = []
        for play in all_plays:
            result = play['result']
            about = play['about']
            batter = play.get('matchup',{'batter':'null'})['batter']
            pitcher = play.get('matchup',{'pitcher':'null'})['pitcher']
            count = play['count']
            create_add_personRecord(batter['id'])
            create_add_personRecord(pitcher['id'])

            play_record = Play(id=str(game['pk'])+game['id']+str(about['atBatIndex']),
                              type=result['type'],
                              event=result.get('event','null'),
                              eventType=result.get('eventType','null'),
                              description=result.get('description','null'),
                              rbi=result.get('rbi','null'),
                              awayScore=result.get('awayScore','null'),
                              homeScore=result.get('homeScore','null'),

                              atBatIndex=about.get('atBatIndex','null'),
                              halfInning=about['halfInning'],
                              inning=about['inning'],
                              startTime=datetime.strptime(about.get('startTime','1900-01-01T01:01:1.0Z'),'%Y-%m-%dT%H:%M:%S.%fZ'),
                              endTime=datetime.strptime(about.get('endTime','1900-01-01T01:01:01.0Z'),'%Y-%m-%dT%H:%M:%S.%fZ'),
                              isComplete=about.get('isComplete','null'),
                              isScoringPlay=about.get('isScoringPlay','null'),
                              hasReview=about.get('hasReview','null'),
                              hasOut=about.get('hasOut','null'),
                              captivatingIndex=about.get('captivatingIndex','null'),

                              batter_id=batter.get('id','null'),
                              pitcher_id=pitcher.get('id','null'),

                              count_balls=count.get('balls','null'),
                              count_strikes=count.get('striks','null'),
                              count_outs=count.get('outs','null'),

                              num_pitches=len(play['pitchIndex']),
                              num_actions=len(play['actionIndex']),
                              num_runners=len(play['runners']),

                              game_id=game['id'] 
                              )
            play_records.append(play_record)
            
        if commit:
            session.add(game_record)
            session.commit()

            session.add_all(play_records)
            session.commit()
        else:
        
            return game_record,play_records


In [None]:
session.rollback()

In [None]:
create_gameRecord_playsRecords(567491,session)

In [None]:
def create_addTeams(session):
    team_query = session.query(Team).all()
    already_added = [instance.id for instance in team_query]
    
    team_ids = [item for sublist in session.execute('select homeTeam_id,awayTeam_id from games').fetchall() for item in sublist]
    team_records=[]
    for team_id in team_ids:
        if team_id not in already_added:
            team = mlb.get('team',{'teamId':team_id})['teams'][0]

            team_record = Team(id=team['id'],
                              name=team['name'],
                              venue_id=team['venue']['id'],
                              teamCode=team['teamCode'],
                              abbreviation=team['abbreviation'],
                              teamName=team['teamName'],
                              locationName=team.get('locationName','null'),
                              league_id=team.get('league',{'id':'null'})['id'],
                              division_id=team.get('division',{'id':'null'})['id'])

            team_records.append(team_record)
        session.add_all(team_records)
        session.commit()

In [None]:
create_addTeam([147],session)

In [None]:
teams = [item for sublist in session.execute('select homeTeam_id,awayTeam_id from games').fetchall() for item in sublist]

In [None]:
create_addTeam(teams,session)

In [None]:
# Series of functions to create and add GameTeamLink records        
def get_roster_inputs(query):   
    roster_inputs = []
    for instance in query.all():
        roster_input_dict = {'date':datetime.strftime(instance.dateTime,'%Y-%m-%d'),
                             'season':instance.season,
                             'homeTeam':instance.homeTeam_id,
                             'awayTeam':instance.awayTeam_id,
                             }
        roster_inputs.append(roster_input_dict)
        
    return roster_inputs

def get_roster(roster_input_dict):
    #player_list = ['player_'+str(x) for x in range(1,41)]
    home = mlb.get('team_roster',
                   {'teamId':roster_input_dict['homeTeam'],
                    'rosterType':'active',
                    'season':roster_input_dict['season'],
                    'date':roster_input_dict['date']
                   })['roster']
    home_roster_ids = [x['person']['id'] for x in home]
    player_list = ['player_'+str(x) for x in range(1,len(home_roster_ids)+1)]
    home_roster_dict = {x:y for x,y in zip(player_list,home_roster_ids)}
    home_roster_dict['teamId'] = roster_input_dict['homeTeam']
    
    away = mlb.get('team_roster',
                   {'teamId':roster_input_dict['awayTeam'],
                    'rosterType':'active',
                    'season':roster_input_dict['season'],
                    'date':roster_input_dict['date']
                   })['roster']
    away_roster_ids = [x['person']['id'] for x in away]
    player_list = ['player_'+str(x) for x in range(1,len(away_roster_ids)+1)]
    away_roster_dict = {x:y for x,y in zip(player_list,away_roster_ids)}
    away_roster_dict['teamId']=roster_input_dict['awayTeam']
    return home_roster_dict,away_roster_dict

def create_GameTeamLink(game_ids):
    records = []
    for game_id in game_ids:
        game_query = session.query(Game).filter_by(id=game_id)
        roster_input_dicts = get_roster_inputs(game_query)
        home_roster,away_roster = get_roster(roster_input_dicts[0])
        rosters = [home_roster,away_roster]
#       rosters.append(roster)
        
        for roster in rosters:
            game_team_record = GameTeamLink(game_id=game_id,
                                            team_id=roster['teamId'],

                                            player_1_id = roster.get('player_1','null'),
                                            player_2_id = roster.get('player_2','null'),
                                            player_3_id = roster.get('player_3','null'),
                                            player_4_id = roster.get('player_4','null'),
                                            player_5_id = roster.get('player_5','null'),
                                            player_6_id = roster.get('player_6','null'),
                                            player_7_id = roster.get('player_7','null'),
                                            player_8_id = roster.get('player_8','null'),
                                            player_9_id = roster.get('player_9','null'),
                                            player_10_id = roster.get('player_10','null'),
                                            player_11_id = roster.get('player_11','null'),
                                            player_12_id = roster.get('player_12','null'),
                                            player_13_id = roster.get('player_13','null'),
                                            player_14_id = roster.get('player_14','null'),
                                            player_15_id = roster.get('player_15','null'),
                                            player_16_id = roster.get('player_16','null'),
                                            player_17_id = roster.get('player_17','null'),
                                            player_18_id = roster.get('player_18','null'),
                                            player_19_id = roster.get('player_19','null'),
                                            player_20_id = roster.get('player_20','null'),
                                            player_21_id = roster.get('player_21','null'),
                                            player_22_id = roster.get('player_22','null'),
                                            player_23_id = roster.get('player_23','null'),
                                            player_24_id = roster.get('player_24','null'),
                                            player_25_id = roster.get('player_25','null'),
                                            player_26_id = roster.get('player_26','null'),
                                            player_27_id = roster.get('player_27','null'),
                                            player_28_id = roster.get('player_28','null'),
                                            player_29_id = roster.get('player_29','null'),
                                            player_30_id = roster.get('player_30','null'),
                                            player_31_id = roster.get('player_31','null'),
                                            player_32_id = roster.get('player_32','null'),
                                            player_33_id = roster.get('player_33','null'),
                                            player_34_id = roster.get('player_34','null'),
                                            player_35_id = roster.get('player_35','null'),
                                            player_36_id = roster.get('player_36','null'),
                                            player_37_id = roster.get('player_37','null'),
                                            player_38_id = roster.get('player_38','null'),
                                            player_39_id = roster.get('player_39','null'),
                                            player_40_id = roster.get('player_40','null')
                                           )
            records.append(game_team_record)
    return records

def chunk(n,list_to_chunk):
    """
    takes in n, and a list to chunk. returns a list of lists with n length. The last chunk size may or may not 
    be equal to n. 
    """
    return [ list_to_chunk[i:i+n] for i in range(0,len(list_to_chunk),n) ]

def create_add_GameTeamLink(session,start=0,stop=None,chunk_size=50):   
    # collect game_ids from the games table 
    ids_list_test = session.query(Game.id).all()[start:stop]
    ids_list_test=[item for sublist in ids_list_test for item in sublist]

    # collect game_ids from the game_link table
    # I'll naturally have duplicates so I think I'll make it a set 
    already_added = list({item for sublist in session.query(GameTeamLink.game_id).all() for item in sublist})

    games_to_get = [game for game in ids_list_test if game not in already_added]
    
    list_of_chunks = chunk(chunk_size,games_to_get)
    count = 1
    
    for _chunk in list_of_chunks:
        try:
            print(f'starting chunk {count} out of {len(list_of_chunks)}')
            games_teams_to_add = create_GameTeamLink(_chunk)

            session.add_all(games_teams_to_add)
            session.commit()

            count = count+1
        except:
            print('chunk failed. Rolling back the session and trying the next chunk')
            session.rollback()
            continue 

In [None]:
create_add_GameTeamLink(session)

In [None]:
from os import walk
import csv
import re
def read_gamePks():
    gamePks_path = "/Users/schlinkertc/code/mlb_predictions/gamePks"
    f = []
    for (dirpath, dirnames, filenames) in walk(gamePks_path):
        f.extend(filenames)
        break
    pk_paths = [gamePks_path + '/' + x for x in f if x[0]!= '.']
    
    gamePks = {}
    for path in pk_paths:
        season = re.findall('/gamePks/([^.csv]+)',path)
        with open(path, 'r') as f:
            reader = csv.reader(f)
            seasonPks = list(reader)
        gamePks[season[0]] = [item for sublist in seasonPks for item in sublist]
    return gamePks

In [None]:
gamePk_dict=read_gamePks()

In [None]:
games_2019 = gamePk_dict['2019']

In [None]:
count = 1 
for game in games_2019:
    print(f'starting {count} out of {len(games_2019)}')
    create_gameRecord_playsRecords(game,session)
    count+=1
create_addTeams(session)
create_add_GameTeamLink(session)

In [11]:
api_call = mlb.get('game',{'gamePk':567491})

plays = api_call['liveData']['plays']['allPlays']

len(plays)

play_test = plays[0]

play_test

pitches_test=[play_test['playEvents'][x] for x in play_test['pitchIndex']]
actions_test = [play_test['playEvents'][x] for x in play_test['actionIndex']]
runners_test = [play_test['runners'][x] for x in play_test['runnerIndex']]

runners_test[0]

actions_test[0]

len(pitches_test)

play_test

pitch_test=pitches_test[1]

pitch_test['pitchData']

In [None]:
def create_pitch_record(pitch_dict):
    details = pitch_dict['details']
    count = pitch_dict['count']
    pitchData = pitch_dict['pitchData']
    coordinates = pitchData['coordinates']
    breaks=pitchData['breaks']
    
    pitch_record=Pitch(
        call_code=details['code']
        call_description=details['description']
        ballColor=details['ballColor']
        trailColor=details['trailColor']
        isInPlay=details['isInPlay']
        isStrike=details['isStrike']
        isBall=details['isBall']
        type_code=details['type']['code']
        type_description=details['type']['description']
        
        count_balls=count['balls']
        count_strikes=count['strikes']
        
        startSpeed=pitchData['startSpeed']
        endSpeed=pitchData['endSpeed']
        strikeZoneTop=pitchData['strikeZoneTop']
        strikeZoneBottom=pitchData['strikeZoneBottom']
        
        aY = coordinates.get('aY','null')
        aZ = coordinates.get('aZ','null')
        pfxX = coordinates.get('pfxX','null')
        pfxZ = coordinates.get('pfxZ','null')
        pX = coordinates.get('pX','null')
        pZ = coordinates.get('pZ','null')
        vX0 = coordinates.get('vX0','null')
        vY0 = coordinates.get('vY0','null')
        vZ0 = coordinates.get('vZ0','null')
        x = coordinates.get('x','null')
        y = coordinates.get('y','null')
        x0 = coordinates.get('x0','null')
        y0 = coordinates.get('y0','null')
        z0 = coordinates.get('zy','null')
        aX = coordinates.get('aX','null')
        
        breakAngle = breaks.get('breakAngle','null')
        breakLength = breaks.get()
        breakY
        spinRate
        spinDirection
        
        zone
        typeConfidence
    )