In [1]:
from sqlalchemy import create_engine
from sqlalchemy import Column, Integer, String, Float
import pandas as pd
import numpy as np
import pandas_profiling

from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()

In [2]:
class BaseballPlayer(Base):
  __tablename__ = "player"
  player_id = Column(String, primary_key=True)
  birth_year = Column(Integer)
  birth_month = Column(Integer)
  birth_day = Column(Integer)
  birth_country = Column(String)
  birth_state = Column(String)
  birth_city = Column(String)
  name_first = Column(String)
  name_last = Column(String)
  name_given = Column(String)
  weight = Column(Integer)
  height = Column(Integer)
  bats = Column(String)
  throws = Column(String)
  debut = Column(String)
  final_game = Column(String)

In [3]:
# Create Database Connection
engine = create_engine('sqlite:///../Resources/database.sqlite')
Base.metadata.create_all(engine)

In [4]:
from sqlalchemy.orm import Session
session = Session(bind=engine)

In [5]:
data_import = pd.read_sql('SELECT * FROM player',engine.connect())

In [None]:
# Print all of the player names in the database
players = session.query(BaseballPlayer)
for player in players:
  print(player.name_given)

In [6]:
# Find the number of players from the USA
usa = session.query(BaseballPlayer).\
    filter(BaseballPlayer.birth_country == 'USA').count()
print("There are {} players from the USA".format(usa))

There are 16504 players from the USA


In [7]:
# Find those players who were born before 1990
born_before_1990 = session.query(BaseballPlayer).\
    filter(BaseballPlayer.birth_year < 1990).count()
    
print("{} players were born before 1990".format(born_before_1990))

18335 players were born before 1990


In [8]:
# Find those players from the USA who were born after 1989
born_after_1989 = session.query(BaseballPlayer).\
    filter(BaseballPlayer.birth_year > 1989).filter(BaseballPlayer.birth_country == "USA").\
    count()
print("{} USA players were born after 1989".format(born_after_1989))

300 USA players were born after 1989


In [7]:
from sqlalchemy import inspect
inspector = inspect(engine)
inspector.get_table_names()

['BaseballPlayers',
 'all_star',
 'appearances',
 'batting',
 'batting_postseason',
 'college',
 'fielding',
 'fielding_outfield',
 'fielding_postseason',
 'hall_of_fame',
 'home_game',
 'manager',
 'manager_award',
 'manager_award_vote',
 'manager_half',
 'park',
 'pitching',
 'pitching_postseason',
 'player',
 'player_award',
 'player_award_vote',
 'player_college',
 'postseason',
 'salary',
 'team',
 'team_franchise',
 'team_half']

In [10]:
pd.DataFrame(inspector.get_columns('manager'))

Unnamed: 0,name,type,nullable,default,autoincrement,primary_key
0,player_id,TEXT,True,,auto,0
1,year,INTEGER,True,,auto,0
2,team_id,TEXT,True,,auto,0
3,league_id,TEXT,True,,auto,0
4,inseason,INTEGER,True,,auto,0
5,g,INTEGER,True,,auto,0
6,w,INTEGER,True,,auto,0
7,l,INTEGER,True,,auto,0
8,rank,NUMERIC,True,,auto,0
9,plyr_mgr,TEXT,True,,auto,0
