In [1]:
import config
import sqlalchemy
import functions
import statsapi as mlb

In [2]:
gamePks = functions.read_gamePks()

In [34]:
test_pk2 = gamePks['2019'][101]

## SQL Alchemy engine

In [4]:
_sql_alchemy_connection = (
                                f'mysql+mysqlconnector://'
                                f'{config.user}:{config.password}'
                                f'@{config.host}:{config.port}'
                                f'/{config.schema}'
                           )

db = sqlalchemy.create_engine(_sql_alchemy_connection,
                              echo = False,
                              connect_args = {'ssl_disabled' : True})

In [5]:
from sqlalchemy.ext.declarative import declarative_base

Base = declarative_base()

from sqlalchemy import Column, Integer, String,DateTime,Date,Time,Boolean
# Declare mapping for the game table 
class Game(Base):
    __tablename__ = 'games'
    __table_args__ = {'extend_existing': True}
    
    pk = Column(Integer)
    type = Column(String(1))
    doubleHeader = Column(String(1))
    id = Column(String(150), primary_key=True,unique=True)
    gamedayType = Column(String(1))
    tiebreaker = Column(String(1))
    gameNumber = Column(Integer)
    calenderEventId = Column(String(50))
    season = Column(Integer)
    
    dateTime = Column(DateTime)
    originalDate = Column(Date)
    dayNight = Column(String(12))
    time = Column(Time)
    
    abstractGameState = Column(String(12))
    codedGameState = Column(String(3))
    detailedState = Column(String(12))
    statusCode = Column(String(3))
    abstractGameCode = Column(String(3))
    
    homeTeam_id = Column(Integer)
    awayTeam_id = Column(Integer)
    
    condition = Column(String(25))
    temp = Column(Integer)
    wind = Column(String(50))
    
    venue_id = Column(Integer)
    
    home_probablePitcher = Column(Integer)
    away_probablePitcher = Column(Integer)
    
    def __repr__(self): 
        return "<Game(pk='%s',id='%s')>" % (
                        self.pk, self.id)

In [6]:
db.table_names()

['games']

In [7]:
# Create the table 
Base.metadata.create_all(db)

In [8]:
# Create instance of the mapped class 
def create_gameRecord(pk):
    from datetime import datetime
    import time
    api_call = mlb.get('game',{'gamePk':pk})
    gameData = api_call['gameData']
    game = gameData['game']
    _datetime = gameData['datetime']
    status = gameData['status']
    weather = gameData['weather']
    probablePitchers = gameData['probablePitchers']
    
    game_record = Game(pk=game['pk'],
                 type=game['type'],
                 doubleHeader=game['doubleHeader'],
                 id=game['id'],
                 gamedayType=game['gamedayType'],
                 tiebreaker=game['tiebreaker'],
                 gameNumber=game['gameNumber'],
                 calenderEventId=game['calendarEventID'],
                 season=game['season'],
                  
                 dateTime=datetime.strptime(_datetime['dateTime'],'%Y-%m-%dT%H:%M:%SZ'),
                 originalDate=datetime.strptime(_datetime['originalDate'],"%Y-%m-%d"),
                 dayNight=_datetime['dayNight'],
                 time=datetime.strptime(_datetime['time']+_datetime['ampm'],"%H:%M%p"),
                  
                 abstractGameState=status['abstractGameState'],
                 codedGameState=status['codedGameState'],
                 detailedState=status['detailedState'],
                 statusCode=status['statusCode'],
                 abstractGameCode=status['abstractGameCode'],
                  
                 homeTeam_id=gameData['teams']['home']['id'],
                 awayTeam_id=gameData['teams']['away']['id'],
                  
                 condition=weather.get('condition','null'),
                 temp=weather.get('temp','null'),
                 wind=weather.get('wind','null'),
                  
                 venue_id=gameData['venue']['id'],
                  
                 home_probablePitcher=probablePitchers.get('home',{'null':'null'}).get('id','null'),
                 away_probablePitcher=probablePitchers.get('away',{'null':'null'}).get('id','null') 
                 )
    return game_record

In [9]:
test_record = create_gameRecord(test_pk)

In [10]:
test_record

<Game(pk='566651',id='2019/02/28/minmlb-tbamlb-1')>

In [11]:
# Create a Session 
from sqlalchemy.orm import sessionmaker
Session = sessionmaker(bind=db)
session = Session()


In [12]:
# Adding and updating objects
session.add(test_record)

In [13]:
# instance is not added until we 'flush' with a query
test_query = session.query(Game)
test_query.all()

IntegrityError: (raised as a result of Query-invoked autoflush; consider using a session.no_autoflush block if this flush is occurring prematurely)
(mysql.connector.errors.IntegrityError) 1062 (23000): Duplicate entry '2019/02/28/minmlb-tbamlb-1' for key 'PRIMARY'
[SQL: INSERT INTO games (pk, type, `doubleHeader`, id, `gamedayType`, tiebreaker, `gameNumber`, `calenderEventId`, season, `dateTime`, `originalDate`, `dayNight`, time, `abstractGameState`, `codedGameState`, `detailedState`, `statusCode`, `abstractGameCode`, `homeTeam_id`, `awayTeam_id`, `condition`, temp, wind, venue_id, `home_probablePitcher`, `away_probablePitcher`) VALUES (%(pk)s, %(type)s, %(doubleHeader)s, %(id)s, %(gamedayType)s, %(tiebreaker)s, %(gameNumber)s, %(calenderEventId)s, %(season)s, %(dateTime)s, %(originalDate)s, %(dayNight)s, %(time)s, %(abstractGameState)s, %(codedGameState)s, %(detailedState)s, %(statusCode)s, %(abstractGameCode)s, %(homeTeam_id)s, %(awayTeam_id)s, %(condition)s, %(temp)s, %(wind)s, %(venue_id)s, %(home_probablePitcher)s, %(away_probablePitcher)s)]
[parameters: {'pk': 566651, 'type': 'S', 'doubleHeader': 'N', 'id': '2019/02/28/minmlb-tbamlb-1', 'gamedayType': 'Y', 'tiebreaker': 'N', 'gameNumber': 1, 'calenderEventId': '14-566651-2019-02-28', 'season': '2019', 'dateTime': datetime.datetime(2019, 2, 28, 23, 35), 'originalDate': datetime.datetime(2019, 2, 28, 0, 0), 'dayNight': 'night', 'time': datetime.datetime(1900, 1, 1, 6, 35), 'abstractGameState': 'Final', 'codedGameState': 'F', 'detailedState': 'Final', 'statusCode': 'F', 'abstractGameCode': 'F', 'homeTeam_id': 139, 'awayTeam_id': 142, 'condition': 'Clear', 'temp': '74', 'wind': '10 mph, L To R', 'venue_id': 2534, 'home_probablePitcher': 605483, 'away_probablePitcher': 606167}]
(Background on this error at: http://sqlalche.me/e/gkpj)

In [14]:
session.commit()

InvalidRequestError: This Session's transaction has been rolled back due to a previous exception during flush. To begin a new transaction with this Session, first issue Session.rollback(). Original exception was: (raised as a result of Query-invoked autoflush; consider using a session.no_autoflush block if this flush is occurring prematurely)
(mysql.connector.errors.IntegrityError) 1062 (23000): Duplicate entry '2019/02/28/minmlb-tbamlb-1' for key 'PRIMARY'
[SQL: INSERT INTO games (pk, type, `doubleHeader`, id, `gamedayType`, tiebreaker, `gameNumber`, `calenderEventId`, season, `dateTime`, `originalDate`, `dayNight`, time, `abstractGameState`, `codedGameState`, `detailedState`, `statusCode`, `abstractGameCode`, `homeTeam_id`, `awayTeam_id`, `condition`, temp, wind, venue_id, `home_probablePitcher`, `away_probablePitcher`) VALUES (%(pk)s, %(type)s, %(doubleHeader)s, %(id)s, %(gamedayType)s, %(tiebreaker)s, %(gameNumber)s, %(calenderEventId)s, %(season)s, %(dateTime)s, %(originalDate)s, %(dayNight)s, %(time)s, %(abstractGameState)s, %(codedGameState)s, %(detailedState)s, %(statusCode)s, %(abstractGameCode)s, %(homeTeam_id)s, %(awayTeam_id)s, %(condition)s, %(temp)s, %(wind)s, %(venue_id)s, %(home_probablePitcher)s, %(away_probablePitcher)s)]
[parameters: {'pk': 566651, 'type': 'S', 'doubleHeader': 'N', 'id': '2019/02/28/minmlb-tbamlb-1', 'gamedayType': 'Y', 'tiebreaker': 'N', 'gameNumber': 1, 'calenderEventId': '14-566651-2019-02-28', 'season': '2019', 'dateTime': datetime.datetime(2019, 2, 28, 23, 35), 'originalDate': datetime.datetime(2019, 2, 28, 0, 0), 'dayNight': 'night', 'time': datetime.datetime(1900, 1, 1, 6, 35), 'abstractGameState': 'Final', 'codedGameState': 'F', 'detailedState': 'Final', 'statusCode': 'F', 'abstractGameCode': 'F', 'homeTeam_id': 139, 'awayTeam_id': 142, 'condition': 'Clear', 'temp': '74', 'wind': '10 mph, L To R', 'venue_id': 2534, 'home_probablePitcher': 605483, 'away_probablePitcher': 606167}]
(Background on this error at: http://sqlalche.me/e/gkpj) (Background on this error at: http://sqlalche.me/e/7s2a)

In [15]:
test_execute = db.execute("select pk,id from games;").fetchall()
test_execute

[(566651, '2019/02/28/minmlb-tbamlb-1')]

In [None]:
def create_addGame(db,session,gamePks):
    """
    Takes in an SQL Alchemy engine, an SQL Alchemy session, and a list of gamePKs. 
    Queries the database to prevent duplicate records, creates records using create_gameRecord function, 
    and adds record to the session if not a duplicate. commits the session changes.         
    """
    already_added = db.execute("select pk,id from games;").fetchall()
    
    successfully_added = 0
    previously_added = 0 
    for game in gamePks:
        record = create_gameRecord(game)
        if tuple((record.pk,record.id)) not in already_added:
            session.add(record)
            successfully_added+=1
        else:
            previously_added+=1
    print("successfully added: ",successfully_added)
    print("previously added: ", previously_added)
    session.commit()
    
    

In [None]:
#create_addGame(db,session,gamePks['2019'][::300])

In [16]:
from sqlalchemy import ForeignKey
from sqlalchemy.orm import relationship

class Play(Base):
    __tablename__= 'plays'
    __table_args__ = {'extend_existing': True} 
    
    id = Column(String(200),primary_key=True,unique=True)
    type = Column(String(10))
    event = Column(String(25))
    eventType = Column(String(25))
    description = Column(String(250))
    rbi = Column(Integer)
    awayScore = Column(Integer)
    homeScore = Column(Integer)
    
    atBatIndex = Column(Integer)
    halfInning = Column(String(10))
    inning = Column(Integer)
    startTime = Column(DateTime)
    endTime = Column(DateTime)
    isComplete = Column(Boolean)
    isScoringPlay = Column(Boolean)
    hasReview = Column(Boolean)
    hasOut = Column(Boolean)
    captivatingIndex = Column(Integer)
    
    batter_id = Column(Integer)
    pitcher_id = Column(Integer)
    
    count_balls = Column(Integer)
    count_strikes = Column(Integer)
    count_outs = Column(Integer)
    
    num_pitches = Column(Integer)
    num_actions = Column(Integer)
    num_runners = Column(Integer)
    
    #game_pk = Column(Integer,ForeignKey('games.pk'),nullable=False)
    game_id = Column(String(150),ForeignKey('games.id'))
    
    game = relationship("Game",back_populates="plays")
    
    def __repr__(self):
        return "<Play(game_id='%s',atBatIndex='%s')>" % (
                     self.game_id,self.atBatIndex)

Game.plays = relationship(
    "Play",order_by=Play.id,back_populates='game')

In [17]:
Base.metadata.create_all(db)

In [73]:
# Create instance of the mapped class 
def create_gameRecord_playsRecords(pk):
    from datetime import datetime
    import time
    already_added_pk = [item for sublist in db.execute('select pk from games').fetchall() for item in sublist]
    
    if int(pk) not in already_added_pk:
        api_call = mlb.get('game',{'gamePk':pk})

        gameData = api_call['gameData']
        game = gameData['game']
        _datetime = gameData['datetime']
        status = gameData['status']
        weather = gameData['weather']
        probablePitchers = gameData['probablePitchers']

        liveData = api_call['liveData']
        all_plays = liveData['plays']['allPlays']
    
        already_added = [item for sublist in db.execute('select id from games').fetchall() for item in sublist]
    

        game_record = Game(pk=game['pk'],
                     type=game['type'],
                     doubleHeader=game['doubleHeader'],
                     id=game['id'],
                     gamedayType=game['gamedayType'],
                     tiebreaker=game['tiebreaker'],
                     gameNumber=game['gameNumber'],
                     calenderEventId=game['calendarEventID'],
                     season=game['season'],

                     dateTime=datetime.strptime(_datetime['dateTime'],'%Y-%m-%dT%H:%M:%SZ'),
                     originalDate=datetime.strptime(_datetime['originalDate'],"%Y-%m-%d"),
                     dayNight=_datetime['dayNight'],
                     time=datetime.strptime(_datetime['time']+_datetime['ampm'],"%H:%M%p"),

                     abstractGameState=status['abstractGameState'],
                     codedGameState=status['codedGameState'],
                     detailedState=status['detailedState'],
                     statusCode=status['statusCode'],
                     abstractGameCode=status['abstractGameCode'],

                     homeTeam_id=gameData['teams']['home']['id'],
                     awayTeam_id=gameData['teams']['away']['id'],

                     condition=weather.get('condition','null'),
                     temp=weather.get('temp','null'),
                     wind=weather.get('wind','null'),

                     venue_id=gameData['venue']['id'],

                     home_probablePitcher=probablePitchers.get('home',{'null':'null'}).get('id','null'),
                     away_probablePitcher=probablePitchers.get('away',{'null':'null'}).get('id','null') 
                     )
        play_records = []
        for play in all_plays:
            result = play['result']
            about = play['about']
            batter = play.get('matchup',{'batter':'null'})['batter']
            pitcher = play.get('matchup',{'pitcher':'null'})['pitcher']
            count = play['count']

            play_record = Play(id=str(game['pk'])+game['id']+str(about['atBatIndex']),
                              type=result['type'],
                              event=result['event'],
                              eventType=result.get('eventType','null'),
                              description=result.get('description','null'),
                              rbi=result.get('rbi','null'),
                              awayScore=result.get('awayScore','null'),
                              homeScore=result.get('homeScore','null'),

                              atBatIndex=about.get('atBatIndex','null'),
                              halfInning=about['halfInning'],
                              inning=about['inning'],
                              startTime=datetime.strptime(about.get('startTime','1900-01-01T01:01:1.0Z'),'%Y-%m-%dT%H:%M:%S.%fZ'),
                              endTime=datetime.strptime(about.get('endTime','1900-01-01T01:01:01.0Z'),'%Y-%m-%dT%H:%M:%S.%fZ'),
                              isComplete=about.get('isComplete','null'),
                              isScoringPlay=about.get('isScoringPlay','null'),
                              hasReview=about.get('hasReview','null'),
                              hasOut=about.get('hasOut','null'),
                              captivatingIndex=about.get('captivatingIndex','null'),

                              batter_id=batter.get('id','null'),
                              pitcher_id=pitcher.get('id','null'),

                              count_balls=count.get('balls','null'),
                              count_strikes=count.get('striks','null'),
                              count_outs=count.get('outs','null'),

                              num_pitches=len(play['pitchIndex']),
                              num_actions=len(play['actionIndex']),
                              num_runners=len(play['runners']),

                              game_id=game['id'] 
                              )
            play_records.append(play_record)
            
        session.add(game_record)
        session.commit()
            
        session.add_all(play_records)
        session.commit()
    else:
        print("skipping duplicate")
    #return game_record,play_records

In [19]:
db.table_names()

['games', 'plays']

In [51]:
test_pk

'564917'

In [62]:
create_gameRecord_playsRecords(test_pk)

skipping duplicate


In [57]:
#session.flush()
session.rollback()

  "Session's state has been changed on "


In [None]:
session.rollback()
[create_gameRecord_playsRecords(x) for x in gamePks['2019']]

skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping duplicate
skipping dup

In [26]:
session.add_all(test_playRecords)

In [27]:
session.commit()

In [25]:
session.rollback()

  "Session's state has been changed on "


In [49]:
#db.execute("drop table games")
#db.execute("drop table plays")

<sqlalchemy.engine.result.ResultProxy at 0x109a6fe48>