In [1]:
import os
import pandas as pd
from sqlalchemy import create_engine
import numpy as np

In [2]:
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
username = os.getenv('POSTGRES_USERNAME')
password = os.getenv('POSTGRES_PASSWORD')


postgres_connection_string = "postgres://{username}:{password}@{host}:{port}/{database}?gssencmode=disable".format(
    username=username,
    password=password,
    host="localhost",
    port="5432",
    database="lahman_baseball"
)

In [4]:
engine = create_engine(postgres_connection_string)

In [5]:
people_sql = "SELECT * FROM people;"
salaries_sql = "SELECT * FROM salaries;"
homegames_sql = "SELECT * FROM homegames;"
parks_sql = "SELECT * FROM parks;"
teams_sql = "SELECT * FROM teams;"
batting_sql = "SELECT * FROM batting;"
fielding_sql = "SELECT * FROM fielding;"
pitching_sql = "SELECT * FROM pitching;"
awards_sql = "SELECT * FROM awardsplayers;"
appearances_sql = "SELECT * FROM appearances;"

In [6]:
people_df = pd.read_sql(people_sql, con=engine)
salaries_df = pd.read_sql(salaries_sql, con=engine)
homegames_df = pd.read_sql(homegames_sql, con=engine)
parks_df = pd.read_sql(parks_sql, con=engine)
teams_df = pd.read_sql(teams_sql, con=engine)
batting_df = pd.read_sql(batting_sql, con=engine)
fielding_df = pd.read_sql(fielding_sql, con=engine)
pitching_df = pd.read_sql(pitching_sql, con=engine)
awardsplayers_df = pd.read_sql(awards_sql, con=engine)
appearances_df = pd.read_sql(appearances_sql, con=engine)

## Find the percentage of increase during the rise and fall of attendance throughout the years
---
#### Years during Spanish Flu, World War I
#### The Great Depression
#### World War II
#### The Korean War
#### Player Strikes (1981, 1994-95)
#### Record year (2007)
---

### Spanish Flu 1918-1920 & World War I

In [57]:
spanish_flu_ww1 = teams_df.loc[(teams_df['yearid'] >= 1915) & (teams_df['yearid'] <= 1925)]

In [58]:
pct_chg_spanish_flu_ww1 = spanish_flu_ww1.groupby(['yearid'])['attendance'].sum().to_frame().reset_index()

In [59]:
pct_chg_spanish_flu_ww1['pct_change'] = ((pct_chg_spanish_flu_ww1['attendance'].pct_change()) * 100).round(2)

In [60]:
pct_chg_spanish_flu_ww1

Unnamed: 0,yearid,attendance,pct_change
0,1915,4864826.0,
1,1916,6503519.0,33.68
2,1917,5219994.0,-19.74
3,1918,3080126.0,-40.99
4,1919,6532439.0,112.08
5,1920,9120875.0,39.62
6,1921,8607312.0,-5.63
7,1922,8816175.0,2.43
8,1923,8672406.0,-1.63
9,1924,9596083.0,10.65


### The Great Depression

In [61]:
great_depression = teams_df.loc[(teams_df['yearid'] >= 1925) & (teams_df['yearid'] <= 1935)]

In [62]:
pct_chg_great_depression = great_depression.groupby(['yearid'])['attendance'].sum().to_frame().reset_index()

In [63]:
pct_chg_great_depression['pct_change'] = ((pct_chg_great_depression['attendance'].pct_change()) * 100).round(2)

In [64]:
pct_chg_great_depression

Unnamed: 0,yearid,attendance,pct_change
0,1925,9540555.0,
1,1926,9832982.0,3.07
2,1927,9922868.0,0.91
3,1928,9102285.0,-8.27
4,1929,9588183.0,5.34
5,1930,10132262.0,5.67
6,1931,8467107.0,-16.43
7,1932,6974566.0,-17.63
8,1933,6089031.0,-12.7
9,1934,6963711.0,14.36


### World War II

In [65]:
world_war_2 = teams_df.loc[(teams_df['yearid'] >= 1935) & (teams_df['yearid'] <= 1950)]

In [66]:
pct_chg_world_war_2 = world_war_2.groupby(['yearid'])['attendance'].sum().to_frame().reset_index()

In [67]:
pct_chg_world_war_2['pct_change'] = ((pct_chg_world_war_2['attendance'].pct_change()) * 100).round(2)

In [68]:
pct_chg_world_war_2

Unnamed: 0,yearid,attendance,pct_change
0,1935,7345316.0,
1,1936,8082613.0,10.04
2,1937,8940063.0,10.61
3,1938,9006511.0,0.74
4,1939,8977779.0,-0.32
5,1940,9823484.0,9.42
6,1941,9689603.0,-1.36
7,1942,8553569.0,-11.72
8,1943,7465911.0,-12.72
9,1944,8772746.0,17.5


### The Korean War

In [73]:
korean_war = teams_df.loc[(teams_df['yearid'] >= 1945) & (teams_df['yearid'] <= 1955)]

In [74]:
pct_chg_korean_war = korean_war.groupby(['yearid'])['attendance'].sum().to_frame().reset_index()

In [75]:
pct_chg_korean_war['pct_change'] = ((pct_chg_korean_war['attendance'].pct_change()) * 100).round(2)

In [76]:
pct_chg_korean_war

Unnamed: 0,yearid,attendance,pct_change
0,1945,10841123.0,
1,1946,18523289.0,70.86
2,1947,19874539.0,7.29
3,1948,20920842.0,5.26
4,1949,20215365.0,-3.37
5,1950,17462977.0,-13.62
6,1951,16126676.0,-7.65
7,1952,14633044.0,-9.26
8,1953,14383797.0,-1.7
9,1954,15935883.0,10.79


### Strike (1981)

In [78]:
strike_1981 = teams_df.loc[(teams_df['yearid'] >= 1971) & (teams_df['yearid'] <= 1985)]

In [79]:
pct_chg_strike_1981 = strike_1981.groupby(['yearid'])['attendance'].sum().to_frame().reset_index()

In [80]:
pct_chg_strike_1981['pct_change'] = ((pct_chg_strike_1981['attendance'].pct_change()) * 100).round(2)

In [81]:
pct_chg_strike_1981

Unnamed: 0,yearid,attendance,pct_change
0,1971,29193417.0,
1,1972,26968268.0,-7.62
2,1973,30108926.0,11.65
3,1974,30025608.0,-0.28
4,1975,29789913.0,-0.78
5,1976,31318331.0,5.13
6,1977,38709779.0,23.6
7,1978,40636886.0,4.98
8,1979,43550398.0,7.17
9,1980,43014136.0,-1.23


### Strike (1994-95)

In [82]:
strike_1994 = teams_df.loc[(teams_df['yearid'] >= 1990) & (teams_df['yearid'] <= 2000)]

In [83]:
pct_chg_strike_1994 = strike_1994.groupby(['yearid'])['attendance'].sum().to_frame().reset_index()

In [84]:
pct_chg_strike_1994['pct_change'] = ((pct_chg_strike_1994['attendance'].pct_change()) * 100).round(2)

In [85]:
pct_chg_strike_1994

Unnamed: 0,yearid,attendance,pct_change
0,1990,54823768.0,
1,1991,56813760.0,3.63
2,1992,55870466.0,-1.66
3,1993,70257938.0,25.75
4,1994,50010016.0,-28.82
5,1995,50469236.0,0.92
6,1996,60097381.0,19.08
7,1997,63168689.0,5.11
8,1998,70601147.0,11.77
9,1999,70139380.0,-0.65


### Record Year (2007)

In [87]:
record_attendance = teams_df.loc[(teams_df['yearid'] >= 2000) & (teams_df['yearid'] <= 2020)]

In [88]:
pct_chg_record_attendance = record_attendance.groupby(['yearid'])['attendance'].sum().to_frame().reset_index()

In [89]:
pct_chg_record_attendance['pct_change'] = ((pct_chg_record_attendance['attendance'].pct_change()) * 100).round(2)

In [90]:
pct_chg_record_attendance

Unnamed: 0,yearid,attendance,pct_change
0,2000,71358907.0,
1,2001,72581101.0,1.71
2,2002,67944389.0,-6.39
3,2003,67630052.0,-0.46
4,2004,73022972.0,7.97
5,2005,74915268.0,2.59
6,2006,76043902.0,1.51
7,2007,79484718.0,4.52
8,2008,78624315.0,-1.08
9,2009,73430580.0,-6.61


In [36]:
#teams_df.loc[teams_df['teamid'] == 'NYA']

### Case 1 - Babe Ruth

In [29]:
babe_ruth = list(people_df['playerid'][people_df['namelast'] == 'Ruth'])

In [32]:
appearances_df.query('playerid in @babe_ruth')

Unnamed: 0,yearid,teamid,lgid,playerid,g_all,gs,g_batting,g_defense,g_p,g_c,...,g_2b,g_3b,g_ss,g_lf,g_cf,g_rf,g_of,g_dh,g_ph,g_pr
15028,1914,BOS,AL,ruthba01,5,3.0,5,4,4,0,...,0,0,0,0,0,0,0,0.0,0.0,0.0
15813,1915,BOS,AL,ruthba01,42,28.0,42,32,32,0,...,0,0,0,0,0,0,0,0.0,11.0,0.0
16432,1916,BOS,AL,ruthba01,68,40.0,68,44,44,0,...,0,0,0,0,0,0,0,0.0,24.0,0.0
16953,1917,BOS,AL,ruthba01,52,38.0,52,41,41,0,...,0,0,0,0,0,0,0,0.0,11.0,0.0
17467,1918,BOS,AL,ruthba01,95,89.0,95,91,20,0,...,0,0,0,47,12,0,59,0.0,4.0,0.0
17979,1919,BOS,AL,ruthba01,130,126.0,130,129,17,0,...,0,0,0,110,1,0,110,0.0,1.0,0.0
18496,1920,NYA,AL,ruthba01,142,141.0,142,141,1,0,...,0,0,0,32,25,86,141,0.0,1.0,0.0
19015,1921,NYA,AL,ruthba01,152,151.0,152,152,2,0,...,0,0,0,132,20,0,152,0.0,0.0,0.0
19549,1922,NYA,AL,ruthba01,110,110.0,110,110,0,0,...,0,0,0,71,0,39,110,0.0,0.0,0.0
20072,1923,NYA,AL,ruthba01,152,152.0,152,152,0,0,...,0,0,0,68,7,73,148,0.0,0.0,0.0


In [46]:
ruth_homerun_mask = (batting_df['playerid'] == 'ruthba01') & (batting_df['hr'].max())

In [53]:
batting_df.columns

Index(['playerid', 'yearid', 'stint', 'teamid', 'lgid', 'g', 'ab', 'r', 'h',
       'h2b', 'h3b', 'hr', 'rbi', 'sb', 'cs', 'bb', 'so', 'ibb', 'hbp', 'sh',
       'sf', 'gidp'],
      dtype='object')

In [57]:
home_run_leaders = batting_df.sort_values(by = ['hr', 'yearid', 'playerid'], ascending = False)

In [58]:
home_run_leaders.columns

Index(['playerid', 'yearid', 'stint', 'teamid', 'lgid', 'g', 'ab', 'r', 'h',
       'h2b', 'h3b', 'hr', 'rbi', 'sb', 'cs', 'bb', 'so', 'ibb', 'hbp', 'sh',
       'sf', 'gidp'],
      dtype='object')

In [60]:
home_run_leaders = home_run_leaders.drop(columns = ['stint', 'lgid', 'g', 'ab', 'r', 'h',
       'h2b', 'h3b', 'rbi', 'sb', 'cs', 'bb', 'so', 'ibb', 'hbp', 'sh',
       'sf', 'gidp'])

# Case 1, Jackie Robinson

#### When was Jackie Robinson signed to the Dodgers and broke the color barrier?

In [7]:
people_df['debut'][people_df['playerid'] == 'robinja02']

14632    1947-04-15
Name: debut, dtype: object

### ^^^Jackie Robinson made his debut in 1947
---

#### Let's see what awards he has won

In [108]:
awardsplayers_df[awardsplayers_df['playerid'] == 'robinja02']

Unnamed: 0,playerid,awardid,yearid,lgid,tie,notes
1799,robinja02,Rookie of the Year,1947,ML,,
1836,robinja02,Baseball Magazine All-Star,1948,NL,,2B
1878,robinja02,Baseball Magazine All-Star,1949,ML,,2B
1890,robinja02,Baseball Magazine All-Star,1949,NL,,2B
1901,robinja02,Most Valuable Player,1949,NL,,
1905,robinja02,TSN All-Star,1949,ML,,2B
1944,robinja02,Baseball Magazine All-Star,1950,NL,,2B
1959,robinja02,TSN All-Star,1950,ML,,2B
1980,robinja02,TSN All-Star,1951,ML,,2B
2001,robinja02,TSN All-Star,1952,ML,,2B


### ^^^As it turns out, Jackie Robinson won Rookie of the Year in 1947
---

In [10]:
teams_df['teamid'][teams_df['name'] == 'Houston Astros']

1445    HOU
1466    HOU
1486    HOU
1506    HOU
1526    HOU
1550    HOU
1574    HOU
1598    HOU
1622    HOU
1646    HOU
1670    HOU
1694    HOU
1718    HOU
1744    HOU
1770    HOU
1796    HOU
1822    HOU
1848    HOU
1874    HOU
1900    HOU
1926    HOU
1952    HOU
1978    HOU
2004    HOU
2030    HOU
2056    HOU
2082    HOU
2108    HOU
2136    HOU
2164    HOU
2192    HOU
2220    HOU
2248    HOU
2277    HOU
2307    HOU
2337    HOU
2367    HOU
2397    HOU
2427    HOU
2457    HOU
2486    HOU
2516    HOU
2546    HOU
2576    HOU
2606    HOU
2636    HOU
2666    HOU
2695    HOU
2725    HOU
2755    HOU
2785    HOU
2815    HOU
Name: teamid, dtype: object

In [19]:
teams_df['teamid'][teams_df['name'] == 'New York Yankees'].value_counts()

NYA    104
Name: teamid, dtype: int64

In [15]:
teams_df.columns

Index(['yearid', 'lgid', 'teamid', 'franchid', 'divid', 'rank', 'g', 'ghome',
       'w', 'l', 'divwin', 'wcwin', 'lgwin', 'wswin', 'r', 'ab', 'h', 'h2b',
       'h3b', 'hr', 'bb', 'so', 'sb', 'cs', 'hbp', 'sf', 'ra', 'er', 'era',
       'cg', 'sho', 'sv', 'ipouts', 'ha', 'hra', 'bba', 'soa', 'e', 'dp', 'fp',
       'name', 'park', 'attendance', 'bpf', 'ppf', 'teamidbr',
       'teamidlahman45', 'teamidretro'],
      dtype='object')

In [93]:
homegames_df.columns

Index(['year', 'league', 'team', 'park', 'span_first', 'span_last', 'games',
       'openings', 'attendance'],
      dtype='object')

In [94]:
dodgers_attendance_mask = (homegames_df['year'] >= 1937) & (homegames_df['year'] <= 1957) & (homegames_df['team'] == 'BRO')

In [95]:
dodgers_attendance = homegames_df.loc[dodgers_attendance_mask]

In [97]:
dodgers_teams_mask = (teams_df['yearid'] >= 1937) & (teams_df['yearid'] <= 1957) & (teams_df['teamid'] == 'BRO')

In [98]:
dodgers_teams = teams_df.loc[dodgers_teams_mask]

In [99]:
dodgers_teams.columns

Index(['yearid', 'lgid', 'teamid', 'franchid', 'divid', 'rank', 'g', 'ghome',
       'w', 'l', 'divwin', 'wcwin', 'lgwin', 'wswin', 'r', 'ab', 'h', 'h2b',
       'h3b', 'hr', 'bb', 'so', 'sb', 'cs', 'hbp', 'sf', 'ra', 'er', 'era',
       'cg', 'sho', 'sv', 'ipouts', 'ha', 'hra', 'bba', 'soa', 'e', 'dp', 'fp',
       'name', 'park', 'attendance', 'bpf', 'ppf', 'teamidbr',
       'teamidlahman45', 'teamidretro'],
      dtype='object')

In [100]:
dodgers_record = dodgers_teams[['yearid', 'rank', 'ghome', 'w', 'l', 'lgwin', 'wswin', 'attendance']]

In [104]:
dodgers_attendance[['year', 'attendance', 'games']]

Unnamed: 0,year,attendance,games
1101,1937,454551,76
1118,1938,728519,74
1136,1939,1048457,78
1153,1940,969439,81
1170,1941,975162,79
1187,1942,882336,79
1204,1943,692492,77
1221,1944,686971,77
1238,1945,1069629,78
1255,1946,1830974,79


### How did he do in that first season?

In [8]:
batting_1947 = batting_df[batting_df['yearid'] == 1947]

In [9]:
jackie_robinson = batting_1947[batting_1947['playerid'] == 'robinja02']

### Let's look at how he performed compared to his teammates

In [12]:
dodgers_bat_1947 = batting_1947[batting_1947['teamid'] == 'BRO']

In [15]:
dodgers_bat_1947['slugging'] = (((dodgers_bat_1947['h']) + (2 * dodgers_bat_1947['h2b']) + (3 * dodgers_bat_1947['h3b']) + (4 * dodgers_bat_1947['hr'])) / (dodgers_bat_1947['ab'])).round(3)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dodgers_bat_1947['slugging'] = (((dodgers_bat_1947['h']) + (2 * dodgers_bat_1947['h2b']) + (3 * dodgers_bat_1947['h3b']) + (4 * dodgers_bat_1947['hr'])) / (dodgers_bat_1947['ab'])).round(3)


In [17]:
dodgers_bat_1947.sort_values(by = 'slugging', ascending = False)

Unnamed: 0,playerid,yearid,stint,teamid,lgid,g,ab,r,h,h2b,...,sb,cs,bb,so,ibb,hbp,sh,sf,gidp,slugging
32748,bankhda01,1947,1,BRO,NL,6,4,2,1,0,...,0.0,,0,1.0,,1.0,0.0,,0.0,1.25
33149,ramsdwi01,1947,1,BRO,NL,2,1,0,1,0,...,0.0,,0,0.0,,0.0,0.0,,0.0,1.0
33079,meltoru01,1947,1,BRO,NL,4,1,0,1,0,...,0.0,,0,0.0,,0.0,0.0,,0.0,1.0
33040,lunddo01,1947,1,BRO,NL,11,20,5,6,2,...,0.0,,3,7.0,,0.0,0.0,,0.0,0.9
32941,higbeki01,1947,1,BRO,NL,4,5,0,1,1,...,0.0,,0,1.0,,0.0,0.0,,0.0,0.6
32881,furilca01,1947,1,BRO,NL,124,437,61,129,24,...,7.0,,34,24.0,,1.0,4.0,,17.0,0.526
33259,vaughar01,1947,1,BRO,NL,64,126,24,41,5,...,4.0,,27,11.0,,0.0,0.0,,2.0,0.516
32938,hermage01,1947,1,BRO,NL,79,189,36,52,7,...,5.0,,28,7.0,,3.0,5.0,,3.0,0.513
33151,reesepe01,1947,1,BRO,NL,142,476,81,135,24,...,7.0,,104,67.0,,2.0,8.0,,7.0,0.511
33270,walkedi02,1947,1,BRO,NL,148,529,77,162,31,...,6.0,,97,26.0,,1.0,10.0,,9.0,0.509


In [134]:
((jackie_robinson['h']) + (2 * jackie_robinson['h2b']) + (3 * jackie_robinson['h3b']) + (4 * jackie_robinson['hr'])) / (jackie_robinson['ab'])

33167    0.508475
dtype: float64

In [112]:
#use this to determine the earliest year sacrifice flies were recorded
#sf_recording_begins = batting_df[['yearid', 'sf']]
#sf_recording_begins = sf_recording_begins.dropna()
#sf_recording_begins.loc[(sf_recording_begins.sf >= 1)].sort_values(by = 'yearid')

In [103]:
#this is a good way to see 
#sf_recording_begins = batting_df.loc[(batting_df.sf > 0)]
#sf_recording_begins

### Accolades? (Hall of Fame, All Star)

In [64]:
bonds_salary

Unnamed: 0,yearid,teamid,lgid,playerid,salary
1103,1986,PIT,NL,bondsba01,60000.0
1776,1987,PIT,NL,bondsba01,100000.0
2399,1988,PIT,NL,bondsba01,220000.0
3098,1989,PIT,NL,bondsba01,360000.0
3927,1990,PIT,NL,bondsba01,850000.0
4659,1991,PIT,NL,bondsba01,2300000.0
5407,1992,PIT,NL,bondsba01,4800000.0
6406,1993,SFN,NL,bondsba01,4516666.0
7296,1994,SFN,NL,bondsba01,5166666.0
8265,1995,SFN,NL,bondsba01,8166666.0


In [9]:
mcgwire_salary = salaries_df[salaries_df['playerid'] == 'mcgwima01']

In [63]:
bonds_salary = salaries_df[salaries_df['playerid'] == 'bondsba01']

In [11]:
griffey_salary = salaries_df[salaries_df['playerid'] == 'griffke02']

In [12]:
nryan_salary = salaries_df[salaries_df['playerid'] == 'ryanno01']

In [13]:
#teams_df[teams_df['teamid'] == 'BSN']

In [14]:
#teams_df[teams_df['name'] == 'Chicago White Sox']

In [15]:
#homegames_df[homegames_df['year'] == 1918]

---
### Let's look at our first example, Jackie Robinson

In [87]:
bk_dodgers_homegames = homegames_df[homegames_df['team'] == 'BRO']

In [126]:
jackie_robinson_id = people_df[people_df['playerid'] == 'robinja02']

In [138]:
bk_dodgers_teams = teams_df[(teams_df['teamid'] == 'BRO') & (teams_df['name'] == 'Brooklyn Dodgers')]

In [143]:
bk_dodgers_teams['lgwin'].value_counts()

N    21
Y     7
Name: lgwin, dtype: int64

In [144]:
bk_dodgers_teams['wswin'].value_counts()

N    27
Y     1
Name: wswin, dtype: int64

In [66]:
sf_giants_homegames = homegames_df[homegames_df['team'] == 'SFN']

In [67]:
sf_giants_homegames['attendance'].mean().round(2)

1895639.36

In [68]:
sf_giants_homegames['mean_diff'] = sf_giants_homegames['attendance'] - 1895639.36

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sf_giants_homegames['mean_diff'] = sf_giants_homegames['attendance'] - 1895639.36


In [72]:
np.sqrt((sf_giants_homegames['mean_diff']**2).mean()).round(2)

947710.38

In [80]:
sf_giants_homegames.head()

Unnamed: 0,year,league,team,park,span_first,span_last,games,openings,attendance,mean_diff
1463,1958,NL,SFN,SFO01,1958-04-15,1958-09-28,77,75,1272857,-622782.36
1479,1959,NL,SFN,SFO01,1959-04-14,1959-09-20,77,77,1421630,-474009.36
1495,1960,NL,SFN,SFO02,1960-04-12,1960-10-02,77,76,1796356,-99283.36
1513,1961,NL,SFN,SFO02,1961-04-11,1961-09-20,77,74,1391251,-504388.36
1533,1962,NL,SFN,SFO02,1962-04-10,1962-10-01,82,77,1590136,-305503.36


In [57]:
#np.sqrt((sf_giants_homegames['attendance']**2).mean())

---

In [70]:
teams_df.query('name == "Montreal Expos"')

Unnamed: 0,yearid,lgid,teamid,franchid,divid,rank,g,ghome,w,l,...,dp,fp,name,park,attendance,bpf,ppf,teamidbr,teamidlahman45,teamidretro
1530,1969,NL,MON,WSN,E,6,162,81.0,52,110,...,179.0,0.97,Montreal Expos,Jarry Park,1212608.0,100,102,MON,MON,MON
1555,1970,NL,MON,WSN,E,6,162,80.0,73,89,...,193.0,0.977,Montreal Expos,Jarry Park,1424683.0,99,101,MON,MON,MON
1579,1971,NL,MON,WSN,E,5,162,80.0,71,90,...,164.0,0.976,Montreal Expos,Jarry Park,1290963.0,100,101,MON,MON,MON
1603,1972,NL,MON,WSN,E,5,156,78.0,70,86,...,141.0,0.978,Montreal Expos,Jarry Park,1142145.0,101,102,MON,MON,MON
1627,1973,NL,MON,WSN,E,4,162,81.0,79,83,...,156.0,0.974,Montreal Expos,Jarry Park,1246863.0,103,104,MON,MON,MON
1651,1974,NL,MON,WSN,E,4,161,80.0,79,82,...,157.0,0.976,Montreal Expos,Jarry Park,1019134.0,105,106,MON,MON,MON
1675,1975,NL,MON,WSN,E,5,162,81.0,75,87,...,179.0,0.973,Montreal Expos,Jarry Park,908292.0,104,106,MON,MON,MON
1699,1976,NL,MON,WSN,E,6,162,80.0,55,107,...,179.0,0.975,Montreal Expos,Jarry Park,646704.0,105,107,MON,MON,MON
1723,1977,NL,MON,WSN,E,5,162,81.0,75,87,...,128.0,0.98,Montreal Expos,Stade Olympique,1433757.0,97,97,MON,MON,MON
1749,1978,NL,MON,WSN,E,4,162,80.0,76,86,...,150.0,0.979,Montreal Expos,Stade Olympique,1427007.0,99,98,MON,MON,MON


In [89]:
lowest_attendance = homegames_df.groupby(['year', 'team'])['attendance'].min().to_frame()

## Clean up the connection!

In [23]:
#engine.dispose()