In [1]:
# Import the dependencies
import pandas as pd
import numpy as np
import datetime

# ORM Stuff
from sqlalchemy import create_engine, inspect, text

In [2]:
df = pd.read_csv("Resources/2019_2022_mlb_hitting_leaders.csv")
df.head()

Unnamed: 0,Year,Player Name,Player Position,Team Name,G,AB,R,H,2B,3B,HR,RBI,BB,SO,SB,CS,AVG,OBP,SLG,OPS
0,2022,Aaron Judge,CF,NYY,157,570,133,177,28,0,62,131,111,175,16,3,0.311,0.425,0.686,1.111
1,2022,Yordan Alvarez,DH,HOU,135,470,95,144,29,2,37,97,78,106,1,1,0.306,0.406,0.613,1.019
2,2022,Paul Goldschmidt,1B,STL,151,561,106,178,41,0,35,115,79,141,7,0,0.317,0.404,0.578,0.982
3,2022,Jose Altuve,2B,HOU,141,527,103,158,39,0,28,57,66,87,18,1,0.3,0.387,0.533,0.92
4,2022,Freddie Freeman,1B,LAD,159,612,117,199,47,2,21,100,84,102,13,3,0.325,0.407,0.511,0.918


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 539 entries, 0 to 538
Data columns (total 20 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Year             539 non-null    int64  
 1   Player Name      539 non-null    object 
 2   Player Position  539 non-null    object 
 3   Team Name        539 non-null    object 
 4   G                539 non-null    int64  
 5   AB               539 non-null    int64  
 6   R                539 non-null    int64  
 7   H                539 non-null    int64  
 8   2B               539 non-null    int64  
 9   3B               539 non-null    int64  
 10  HR               539 non-null    int64  
 11  RBI              539 non-null    int64  
 12  BB               539 non-null    int64  
 13  SO               539 non-null    int64  
 14  SB               539 non-null    int64  
 15  CS               539 non-null    int64  
 16  AVG              539 non-null    float64
 17  OBP             

In [4]:
# Data Cleaning
# Rename columns
df.rename(columns={
    "Year": "year",
    "Player Name": "player_name",
    "Player Position": "player_position",
    "Team Name": "team_abv"
}, inplace=True)

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 539 entries, 0 to 538
Data columns (total 20 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   year             539 non-null    int64  
 1   player_name      539 non-null    object 
 2   player_position  539 non-null    object 
 3   team_abv         539 non-null    object 
 4   G                539 non-null    int64  
 5   AB               539 non-null    int64  
 6   R                539 non-null    int64  
 7   H                539 non-null    int64  
 8   2B               539 non-null    int64  
 9   3B               539 non-null    int64  
 10  HR               539 non-null    int64  
 11  RBI              539 non-null    int64  
 12  BB               539 non-null    int64  
 13  SO               539 non-null    int64  
 14  SB               539 non-null    int64  
 15  CS               539 non-null    int64  
 16  AVG              539 non-null    float64
 17  OBP             

In [6]:
# Overwrite to new csv titled 2019_2022_mlb_hitting_leaders.csv, place in Resources folder
df.to_csv("Resources/hitting_leaders.csv", index=False)


In [7]:
# Write to DB
engine = create_engine("sqlite:///hitting_leaders.sqlite")

In [8]:
# Write to SQL
df.to_sql(name="hitting_leaders", con=engine, index=False, if_exists="append", method="multi")

539

In [9]:
# Create the inspector and connect
inspector = inspect(engine)

# Collect the names of tables within the database
tables = inspector.get_table_names()

# Using the inspector to print the column names within the 'dow' table and its types
for table in tables:
    print(table)
    print("--------")
    columns = inspector.get_columns(table)
    for column in columns:
        print(column["name"], column["type"])

    print()

hitting_leaders
--------
year BIGINT
player_name TEXT
player_position TEXT
team_abv TEXT
G BIGINT
AB BIGINT
R BIGINT
H BIGINT
2B BIGINT
3B BIGINT
HR BIGINT
RBI BIGINT
BB BIGINT
SO BIGINT
SB BIGINT
CS BIGINT
AVG FLOAT
OBP FLOAT
SLG FLOAT
OPS FLOAT



In [10]:
engine.dispose()