# MLB Data Pipeline with Airflow


   <img src="mlb_logo.png" alt="MLB logo">


Notes:

    - Goal is to automate mlb statistics throughout the season for any given team. 
    - 5-stage process that includes: ingestion from API source, staging and historical storage, loading into Postgres, 
    and ML/statistical analysis using Python's data science libraries.

Functions learned

    - items(): https://www.w3schools.com/python/ref_dictionary_items.asp 
    // used mainly for getting tuples from a dict
    - update(): https://www.digitalocean.com/community/tutorials/python-add-to-dictionary 
    // used for updating k,v in a dict
    - save a variable for previous week's roster... this is because rosters change throughout the season...if the player
    is not on the previous roster, then add them
    - pickle(): https://www.datacamp.com/tutorial/pickle-python-tutorial

In [418]:
# https://appac.github.io/mlb-data-api-docs/
# https://www.xstats.org/articles/2021/8/27/scraping-the-mlb-api-using-node

# import necessary libraries
import airflow
import requests
import json
import pickle
import numpy as np
import pandas as pd

from airflow import DAG
from airflow.operators.python import PythonOperator
from datetime import date


In [317]:
path = '/home/mgcruz/01 - DATA/mlbAirflow'
today = str(date.today())

### Goal: Get pitcher and hitter stats for the Dodgers (only bring back relevant attributes) and load to Postgres

I will be using the Dodgers in this scenario (GO BLUE)



### 1. USE TEAM ID TO GET THE TEAM'S PLAYERS' INFO 
Note: 'last_roster.txt' is used to account for roster updates, as the code cross-checks the current roster, 'players_list', with the last roster.


In [474]:
# grab team_id for the Dodgers. Change the last line if you wish to use another team :) 

response = requests.get("http://lookup-service-prod.mlb.com/json/named.team_all_season.bam?sports_code='mlb'&season='2021'")
teams = response.json()['team_all_season']['queryResults']['row']
#print(f"MLB lookup service API response code for team data: {response.status_code}")

team_id = list(x['team_id'] for x in teams if x['name_display_full'] == "Los Angeles Dodgers")[0]
print(f"{team_id}: 'Los Angeles Dodgers'")

119: 'Los Angeles Dodgers'


In [424]:
# pulls a list of dictionaries, one for each Dodgers player
response = requests.get(f"https://lookup-service-prod.mlb.com/json/named.roster_40.bam?team_id='{team_id}'")
players = response.json()['roster_40']['queryResults']['row']
#print(f"MLB lookup service API response code for players data: {response.status_code}")

# We want only items in player_attributes
player_attributes = ['player_id', 'name_display_first_last', 'birth_date', 'bats',  'position_txt', 'primary_position', 'starter_sw', 'start_date', 'throws', 'team_id']

# retrieve only the player_ids and positions from the last players request
players_list = []
for count, dict in enumerate(players):
    new_dict = {}
    print(f"Now running {count}, '{dict['name_display_first_last']}' ({dict['player_id']}), Primary Position: {dict['position_txt']}")
    new_dict.update({k: v for k, v in players[count].items() if k in player_attributes})
    players_list.append(new_dict)


MLB lookup service API response code for players data: 200
Now running 0, 'Austin Barnes' (605131), Primary Position: C
Now running 1, 'Mookie Betts' (605141), Primary Position: RF
Now running 2, 'Walker Buehler' (621111), Primary Position: P
Now running 3, 'Diego Cartaya' (682616), Primary Position: C
Now running 4, 'Hunter Feduccia' (676439), Primary Position: C
Now running 5, 'Caleb Ferguson' (657571), Primary Position: P
Now running 6, 'J.P. Feyereisen' (656420), Primary Position: P
Now running 7, 'Nick Frasso' (693308), Primary Position: P
Now running 8, 'Freddie Freeman' (518692), Primary Position: 1B
Now running 9, 'Tyler Glasnow' (607192), Primary Position: P
Now running 10, 'Tony Gonsolin' (664062), Primary Position: P
Now running 11, 'Brusdar Graterol' (660813), Primary Position: P
Now running 12, 'Michael Grove' (675627), Primary Position: P
Now running 13, 'Teoscar Hernandez' (606192), Primary Position: RF
Now running 14, 'Jason Heyward' (518792), Primary Position: RF
Now r

In [425]:
# get a list of all the players on the Dodgers

players_ids = []

for player in players_list:
    players_ids.append(player['player_id'])
    

### Cross-check with the previous roster for any updates

In [426]:
with open(last_path, 'rb') as f: # open the most-recent roster prior to today

    last_roster = pickle.load(f) # deserialize using load()
    last_roster 
    f.close()
    
for player in players_ids:
    if player not in last_roster:
        print(f"{x} is a new player")
        
for x in last_roster:
    if x not in players_ids:
        print(f"{x} is no longer on the roster")

if len(players_ids) == len(last_roster):
    print("No roster changes")

No roster changes


### Output the roster to a text file using pickle library and save the path for future reads

In [427]:
with open(path + '/' + today +'_roster.pkl', 'wb') as f: 
     pickle.dump(players_ids,f)
     f.close()
 
last_path = path + '/' + today +'_roster.pkl' # save today's file to be used as the next reference

## 2. USE PLAYER ID'S AND PITCHERS ID'S TO GET THEIR HITTING AND PITCHING STATS 


### Pitching Stats

In [428]:
# create a loop that gets every players stats from id's list

# we want these stats attributes
stats_attributes = ['team_id', 'hr', 'season', 'ab', 'hldr', 'ao', 'slg', 'ops', 'hbp', 'rbi', 'go_ao', 'hfly', 'lob', 'xbh', 'end_date', 'bb', 'np', 'hgnd', 'roe', 'sb', 'player_id', 'avg', 'sf', 'sac', 'wo', 'hpop', 'so', 'gidp_opp', 'gidp', 'ppa', 'd', 'tpa', 'g', 'h', 'ibb', 'go', 'team_seq', 'tb', 'cs', 'r', 't', 'babip', 'obp']

pitching_stats = []

for count, id in enumerate(players_ids): # getting pitching stats
    
    print(f"Stats running {count}, '{id}'")
    pitch_response = requests.get(f"http://lookup-service-prod.mlb.com/json/named.sport_pitching_tm.bam?league_list_id='mlb'&game_type='R'&season='2021'&player_id={id}")

    p_stats = pitch_response.json()['sport_pitching_tm']['queryResults'] 

    if p_stats['totalSize'] == '0':
        print(f"No data for '{id}'")
        # players that do not have data for pitching
    else:

        if type(p_stats['row']) == type({}) and p_stats['row']['team_id'] == team_id:
            pitching_stats.append({k:v for k,v in p_stats['row'].items() if k in stats_attributes})
            print(f"'{id}' has Dodgers stats")
        elif type(p_stats['row']) == type({}) and p_stats['row']['team_id'] != team_id:
            # players that have data for pitching from one team, but not from the Dodgers. This relates to season transactions
            print(f"'{id}' has non-Dodgers stats")
        else:
            for dict in p_stats['row']:
                if dict['team_id'] == team_id:
                    pitching_stats.append({k:v for k,v in dict.items() if k in stats_attributes})
                    # players that have data for pitching from multiple teams. Check to see if Dodgers are one of the teams.
                    print(f"'{id}' has Dodgers stats and multiple teams")


pitching_stats

Stats running 0, '605131'
No data for '605131'
Stats running 1, '605141'
No data for '605141'
Stats running 2, '621111'
'621111' has Dodgers stats
Stats running 3, '682616'
No data for '682616'
Stats running 4, '676439'
No data for '676439'
Stats running 5, '657571'
No data for '657571'
Stats running 6, '656420'
Stats running 7, '693308'
No data for '693308'
Stats running 8, '518692'
No data for '518692'
Stats running 9, '607192'
'607192' has non-Dodgers stats
Stats running 10, '664062'
'664062' has Dodgers stats
Stats running 11, '660813'
'660813' has Dodgers stats
Stats running 12, '675627'
No data for '675627'
Stats running 13, '606192'
No data for '606192'
Stats running 14, '518792'
No data for '518792'
Stats running 15, '669165'
No data for '669165'
Stats running 16, '523260'
'523260' has Dodgers stats
Stats running 17, '689017'
No data for '689017'
Stats running 18, '666158'
No data for '666158'
Stats running 19, '622534'
No data for '622534'
Stats running 20, '669160'
'669160' h

[{'hr': '19',
  'team_id': '119',
  'season': '2021',
  'ab': '747',
  'hldr': '79',
  'ao': '186',
  'slg': '.329',
  'ops': '.586',
  'go_ao': '1.13',
  'hfly': '28',
  'end_date': '2022-06-11T00:00:00',
  'bb': '52',
  'np': '3152',
  'hgnd': '42',
  'sb': '12',
  'player_id': '621111',
  'avg': '.199',
  'sf': '3',
  'sac': '7',
  'hpop': '0',
  'so': '212',
  'gidp_opp': '83',
  'gidp': '17',
  'ppa': '3.87',
  'g': '33',
  'h': '149',
  'ibb': '2',
  'go': '210',
  'team_seq': '1.0',
  'cs': '3',
  'r': '61',
  'babip': '.250',
  'obp': '.256'},
 {'hr': '8',
  'team_id': '119',
  'season': '2021',
  'ab': '203',
  'hldr': '21',
  'ao': '58',
  'slg': '.369',
  'ops': '.686',
  'go_ao': '0.71',
  'hfly': '8',
  'end_date': '2022-08-26T00:00:00',
  'bb': '34',
  'np': '990',
  'hgnd': '12',
  'sb': '0',
  'player_id': '664062',
  'avg': '.202',
  'sf': '0',
  'sac': '2',
  'hpop': '0',
  'so': '65',
  'gidp_opp': '24',
  'gidp': '1',
  'ppa': '4.14',
  'g': '15',
  'h': '41',
  'ib

In [429]:
len(pitching_stats)

# 8 players on the Dodgers with pitching stats

8

### Hitting Stats

In [430]:

hitting_stats = []

for count, id in enumerate(players_ids): # getting hitting stats
    
    print(f"Stats running {count}, '{id}'")
    hit_response = requests.get(f"http://lookup-service-prod.mlb.com/json/named.sport_hitting_tm.bam?league_list_id='mlb'&game_type='R'&season='2021'&player_id={id}")

    h_stats = hit_response.json()['sport_hitting_tm']['queryResults'] 

    if h_stats['totalSize'] == '0':
        print(f"No data for '{id}'")
        # players that do not have data for hitting
    else:

        if type(h_stats['row']) == type({}) and h_stats['row']['team_id'] == team_id:
            hitting_stats.append({k:v for k,v in h_stats['row'].items() if k in stats_attributes})
            print(f"'{id}' has Dodgers stats")
        elif type(h_stats['row']) == type({}) and h_stats['row']['team_id'] != team_id:
            # players that have data for hitting from one team, but not from the Dodgers. This relates to season transactions
            print(f"'{id}' has non-Dodgers stats")
        else:
            for dict in h_stats['row']:
                if dict['team_id'] == team_id:
                    hitting_stats.append({k:v for k,v in dict.items() if k in stats_attributes})
                    # players that have data for hitting from multiple teams. Check to see if Dodgers are one of the teams.
                    print(f"'{id}' has Dodgers stats and multiple teams")


hitting_stats

Stats running 0, '605131'
'605131' has Dodgers stats
Stats running 1, '605141'
'605141' has Dodgers stats
Stats running 2, '621111'
'621111' has Dodgers stats
Stats running 3, '682616'
No data for '682616'
Stats running 4, '676439'
No data for '676439'
Stats running 5, '657571'
No data for '657571'
Stats running 6, '656420'
Stats running 7, '693308'
No data for '693308'
Stats running 8, '518692'
'518692' has non-Dodgers stats
Stats running 9, '607192'
'607192' has non-Dodgers stats
Stats running 10, '664062'
'664062' has Dodgers stats
Stats running 11, '660813'
'660813' has Dodgers stats
Stats running 12, '675627'
No data for '675627'
Stats running 13, '606192'
'606192' has non-Dodgers stats
Stats running 14, '518792'
'518792' has non-Dodgers stats
Stats running 15, '669165'
No data for '669165'
Stats running 16, '523260'
'523260' has Dodgers stats
Stats running 17, '689017'
No data for '689017'
Stats running 18, '666158'
'666158' has Dodgers stats
Stats running 19, '622534'
'622534' h

[{'hr': '6',
  'team_id': '119',
  'season': '2021',
  'ab': '200',
  'hldr': '17',
  'ao': '55',
  'slg': '.345',
  'ops': '.644',
  'hbp': '4',
  'rbi': '23',
  'go_ao': '0.85',
  'hfly': '10',
  'lob': '102',
  'xbh': '14',
  'end_date': '2022-08-12T00:00:00',
  'bb': '20',
  'np': '890',
  'hgnd': '16',
  'roe': '2',
  'sb': '1',
  'player_id': '605131',
  'avg': '.215',
  'sf': '0',
  'sac': '1',
  'wo': '0',
  'hpop': '0',
  'so': '56',
  'gidp_opp': '36',
  'gidp': '6',
  'ppa': '3.96',
  'd': '8',
  'tpa': '225',
  'g': '77',
  'h': '43',
  'ibb': '1',
  'go': '47',
  'team_seq': '1.0',
  'tb': '69',
  'cs': '0',
  'r': '28',
  't': '0',
  'babip': '.268',
  'obp': '.299'},
 {'hr': '23',
  'team_id': '119',
  'season': '2021',
  'ab': '466',
  'hldr': '62',
  'ao': '157',
  'slg': '.487',
  'ops': '.854',
  'hbp': '11',
  'rbi': '58',
  'go_ao': '0.67',
  'hfly': '32',
  'lob': '152',
  'xbh': '55',
  'end_date': '2022-06-19T00:00:00',
  'bb': '68',
  'np': '2232',
  'hgnd': '2

In [431]:
len(hitting_stats)

# 14 players on the Dodgers with hitting stats

14

### 3. INSERT VALUES TO POSTGRES TABLES: players_list, hit_stats, pitch_stats

### Check to make sure each record has sufficient data


In [432]:
conditions_met = True

for i in range(len(players_list)):
    if len(players_list[i]) != 10:
        print(i + 'is not complete')
        conditions_met = False
        
if conditions_met:
    print("All are complete")

All are complete


In [433]:
conditions_met = True

for i in range(len(pitching_stats)):
    if len(pitching_stats[i]) != 33:
        print(i + 'is not complete')
        conditions_met = False
        
if conditions_met:
    print("All are complete")

All are complete


In [434]:
conditions_met = True

for i in range(len(hitting_stats)):
    if len(hitting_stats[i]) != 43:
        print(i + 'is not complete')
        conditions_met = False
        
if conditions_met:
    print("All are complete")

All are complete


## Creating PG tables
Format into dataframes to fit database table structure and fix any data types (ex. start_date)

### Player Info 

In [442]:
players_df = pd.DataFrame(players_list).drop(columns='team_id')
players_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 39 entries, 0 to 38
Data columns (total 9 columns):
 #   Column                   Non-Null Count  Dtype 
---  ------                   --------------  ----- 
 0   name_display_first_last  39 non-null     object
 1   birth_date               39 non-null     object
 2   bats                     39 non-null     object
 3   player_id                39 non-null     object
 4   position_txt             39 non-null     object
 5   primary_position         39 non-null     object
 6   starter_sw               39 non-null     object
 7   start_date               39 non-null     object
 8   throws                   39 non-null     object
dtypes: object(9)
memory usage: 2.9+ KB


In [465]:
# change dates into datetime64
players_df[['start_date', 'birth_date']] = players_df[['start_date', 'birth_date']].astype('datetime64[ns]')
players_df.head()

Unnamed: 0,name_display_first_last,birth_date,bats,player_id,position_txt,primary_position,starter_sw,start_date,throws
0,Austin Barnes,1989-12-28,R,605131,C,2,N,2014-12-11,R
1,Mookie Betts,1992-10-07,R,605141,RF,9,N,2020-02-10,R
2,Walker Buehler,1994-07-28,R,621111,P,1,N,2017-09-06,R
3,Diego Cartaya,2001-09-07,R,682616,C,2,N,2022-11-15,R
4,Hunter Feduccia,1997-06-05,L,676439,C,2,N,2023-11-14,R


### Pitching Stats

In [484]:
pitching_df = pd.DataFrame(pitching_stats).drop(columns=['season','team_id','team_seq'])

# datetime64[ns]
pitching_df['end_date'] = pitching_df['end_date'].astype('datetime64[ns]')

# float64
pitching_df[['slg','ops','go_ao','avg','ppa','babip','obp']] = pitching_df[['slg','ops','go_ao','avg','ppa','babip','obp']].astype('float64')
    
# int64
pitching_df[['hr','ab','hldr','ao','hfly','bb','np','hgnd','sb','sf','sac','hpop','so','gidp_opp','gidp','g','h','ibb','go','cs','r']] = pitching_df[['hr','ab','hldr','ao','hfly','bb','np','hgnd','sb','sf','sac','hpop','so','gidp_opp','gidp','g','h','ibb','go','cs','r']].astype('int64')

pitching_df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8 entries, 0 to 7
Data columns (total 30 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   hr         8 non-null      int64         
 1   ab         8 non-null      int64         
 2   hldr       8 non-null      int64         
 3   ao         8 non-null      int64         
 4   slg        8 non-null      float64       
 5   ops        8 non-null      float64       
 6   go_ao      8 non-null      float64       
 7   hfly       8 non-null      int64         
 8   end_date   7 non-null      datetime64[ns]
 9   bb         8 non-null      int64         
 10  np         8 non-null      int64         
 11  hgnd       8 non-null      int64         
 12  sb         8 non-null      int64         
 13  player_id  8 non-null      object        
 14  avg        8 non-null      float64       
 15  sf         8 non-null      int64         
 16  sac        8 non-null      int64         
 17  h

In [485]:
pitching_df.head()

Unnamed: 0,hr,ab,hldr,ao,slg,ops,go_ao,hfly,end_date,bb,...,gidp,ppa,g,h,ibb,go,cs,r,babip,obp
0,19,747,79,186,0.329,0.586,1.13,28,2022-06-11,52,...,17,3.87,33,149,2,210,3,61,0.25,0.256
1,8,203,21,58,0.369,0.686,0.71,8,2022-08-26,34,...,1,4.14,15,41,0,41,2,20,0.254,0.316
2,2,131,18,21,0.389,0.738,2.33,3,2022-07-11,13,...,3,3.49,34,34,6,49,1,18,0.314,0.349
3,3,161,16,27,0.286,0.544,2.15,4,2021-11-05,15,...,1,3.91,48,28,1,58,0,16,0.227,0.258
4,4,86,9,10,0.349,0.596,2.5,3,2021-11-07,6,...,0,3.96,5,16,0,25,1,8,0.255,0.247


### Hitting Stats

In [531]:
hitting_df = pd.DataFrame(hitting_stats).drop(columns=['team_id','season','team_seq'])


# float64
# fix the weird characters..imputate Nan's to zero's... 
# https://stackoverflow.com/questions/33961028/remove-non-numeric-rows-in-one-column-with-pandas


hitting_df[['slg','ops','go_ao','avg','ppa','babip','obp']]

Unnamed: 0,slg,ops,go_ao,avg,ppa,babip,obp
0,.345,0.644,0.85,.215,3.96,.268,.299
1,.487,0.854,0.67,.264,4.06,.276,.367
2,.130,0.293,5.25,.101,4.21,.292,.162
3,.235,0.458,2.50,.176,3.95,.333,.222
4,.000,0.0,1.00,.000,2.00,.000,.000
5,.000,0.0,*.**,.000,4.00,.000,.000
6,.364,0.692,1.11,.242,3.78,.300,.328
7,.000,0.0,*.**,.000,3.75,.---,.000
8,.527,0.895,0.98,.249,4.11,.257,.368
9,.000,0.0,-.--,.000,5.33,.---,.000


In [533]:
for col in hitting_df:
    if col in ['slg','ops','go_ao','avg','ppa','babip','obp']:
        hitting_df[col] = hitting_df[pd.to_numeric(hitting_df[col], errors='coerce').notnull()][col]
        
hitting_df[['slg','ops','go_ao','avg','ppa','babip','obp']] = hitting_df[['slg','ops','go_ao','avg','ppa','babip','obp']].fillna(0.000)
hitting_df[['slg','ops','go_ao','avg','ppa','babip','obp']]

Unnamed: 0,slg,ops,go_ao,avg,ppa,babip,obp
0,0.345,0.644,0.85,0.215,3.96,0.268,0.299
1,0.487,0.854,0.67,0.264,4.06,0.276,0.367
2,0.13,0.293,5.25,0.101,4.21,0.292,0.162
3,0.235,0.458,2.5,0.176,3.95,0.333,0.222
4,0.0,0.0,1.0,0.0,2.0,0.0,0.0
5,0.0,0.0,0.0,0.0,4.0,0.0,0.0
6,0.364,0.692,1.11,0.242,3.78,0.3,0.328
7,0.0,0.0,0.0,0.0,3.75,0.0,0.0
8,0.527,0.895,0.98,0.249,4.11,0.257,0.368
9,0.0,0.0,0.0,0.0,5.33,0.0,0.0


In [534]:

# datetime64[ns]
hitting_df['end_date'] = hitting_df['end_date'].astype('datetime64[ns]')

# float64
hitting_df[['slg','ops','go_ao','avg','ppa','babip','obp']] = hitting_df[['slg','ops','go_ao','avg','ppa','babip','obp']].astype('float64')

# int64
hitting_df[['hr','ab','hldr','ao','sf','sac','wo','hpop','so','gidp_opp','gidp', 'ppa','d','tpa','g','h','ibb','go','tb','cs','r','t','hbp',
'rbi', 'hfly','lob','xbh','bb','np','hgnd','roe','sb']] = hitting_df[['hr','ab','hldr','ao','sf','sac','wo','hpop','so','gidp_opp','gidp', 'ppa','d','tpa','g','h','ibb','go','tb','cs','r','t','hbp',
'rbi', 'hfly','lob','xbh','bb','np','hgnd','roe','sb']].astype('int64')

hitting_df.info()




<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14 entries, 0 to 13
Data columns (total 40 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   hr         14 non-null     int64         
 1   ab         14 non-null     int64         
 2   hldr       14 non-null     int64         
 3   ao         14 non-null     int64         
 4   slg        14 non-null     float64       
 5   ops        14 non-null     float64       
 6   hbp        14 non-null     int64         
 7   rbi        14 non-null     int64         
 8   go_ao      14 non-null     float64       
 9   hfly       14 non-null     int64         
 10  lob        14 non-null     int64         
 11  xbh        14 non-null     int64         
 12  end_date   13 non-null     datetime64[ns]
 13  bb         14 non-null     int64         
 14  np         14 non-null     int64         
 15  hgnd       14 non-null     int64         
 16  roe        14 non-null     int64         
 17 

In [535]:
hitting_df.head()

Unnamed: 0,hr,ab,hldr,ao,slg,ops,hbp,rbi,go_ao,hfly,...,g,h,ibb,go,tb,cs,r,t,babip,obp
0,6,200,17,55,0.345,0.644,4,23,0.85,10,...,77,43,1,47,69,0,28,0,0.268,0.299
1,23,466,62,157,0.487,0.854,11,58,0.67,32,...,122,123,2,105,227,5,93,3,0.276,0.367
2,0,69,2,4,0.13,0.293,0,3,5.25,0,...,34,7,0,21,9,0,5,0,0.292,0.162
3,0,17,0,2,0.235,0.458,0,0,2.5,0,...,17,3,0,5,4,0,2,0,0.333,0.222
4,0,2,0,1,0.0,0.0,0,0,1.0,0,...,34,0,0,1,0,0,0,0,0.0,0.0


## Populating PG Database

In [None]:

conn = psycopg2.connect(
    dbname='mlb_team',
    user='admin',
    password='baseball',
    host='localhost',
    port=5432
)
cursor = conn.cursor()

# Creating the placeholders for the SQL query
columns = ', '.join(data[0].keys())
placeholders = ', '.join(['%s'] * len(data[0]))

insert_query = f"INSERT INTO {table_name} ({columns}) VALUES ({placeholders})"

values = [tuple(d.values()) for d in data]

# Execute the SQL query
cursor.executemany(insert_query, values)

# Commit changes and close connection
conn.commit()
cursor.close()
conn.close()


# Inserting player information into PostgreSQL
insert_into_db(players_list, 'players_table')

# Inserting hitting stats into PostgreSQL
insert_into_db(hit_stats, 'hitting_stats_table')

# Inserting pitching stats into PostgreSQL
insert_into_db(pitch_stats, 'pitching_stats_table')

