### ETL_Project_Team_1
* Team Revenue & Ticket Price Table - Clean & Merge Data

In [1]:
# import dependencies
import pandas as pd
from sqlalchemy import create_engine

In [2]:
# Create connection to database
rds_connection_string = "postgres:smitty77@localhost:5432/mlb_db"
engine = create_engine(f'postgresql://{rds_connection_string}')

In [3]:
# see what tables exist in database
engine.table_names()

['winslosses',
 'attendance',
 'revenue',
 'payroll',
 'salary',
 'capacity',
 'avgticketprice',
 'capacityattendance',
 'looktable']

In [4]:
# pull in the revenue table
revenue = pd.read_sql_query('Select * From revenue', con=engine)
revenue_df = pd.DataFrame(revenue).set_index("team_key")
revenue_df.head()

Unnamed: 0_level_0,team_longname,revenue
team_key,Unnamed: 1_level_1,Unnamed: 2_level_1
17,Arizona Diamondbacks,275
12,Atlanta Braves,344
28,Baltimore Orioles,251
7,Boston Red Sox,516
4,Chicago Cubs,452


In [5]:
# pull in the average ticket price table
avgticketprice = pd.read_sql_query('Select * From avgticketprice', con=engine)
avgticketprice_df = pd.DataFrame(avgticketprice).set_index("team_key")
avgticketprice_df.head()

Unnamed: 0_level_0,team_longname,avg_ticket_price,team_cityname,team_abrv,team_shortname
team_key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
17,Arizona Diamondbacks,59,Arizona,Arizona,Diamondbacks
12,Atlanta Braves,56,Atlanta,Atlanta,Braves
28,Baltimore Orioles,56,Baltimore,Baltimore,Orioles
7,Boston Red Sox,94,Boston,Boston,Red Sox
4,Chicago Cubs,128,Chicago,Chicago Cubs,Cubs


In [6]:
# merge the revenue df with the average ticket price df
merged_revenue_price = pd.merge(revenue_df, avgticketprice_df, on="team_key")
merged_revenue_price.head()

Unnamed: 0_level_0,team_longname_x,revenue,team_longname_y,avg_ticket_price,team_cityname,team_abrv,team_shortname
team_key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
17,Arizona Diamondbacks,275,Arizona Diamondbacks,59,Arizona,Arizona,Diamondbacks
12,Atlanta Braves,344,Atlanta Braves,56,Atlanta,Atlanta,Braves
28,Baltimore Orioles,251,Baltimore Orioles,56,Baltimore,Baltimore,Orioles
7,Boston Red Sox,516,Boston Red Sox,94,Boston,Boston,Red Sox
4,Chicago Cubs,452,Chicago Cubs,128,Chicago,Chicago Cubs,Cubs


In [7]:
# Select Columns to Keep
revenue_price = merged_revenue_price[["team_cityname", "team_shortname", "team_longname_x", "revenue", "avg_ticket_price"]]
revenue_price.head()

Unnamed: 0_level_0,team_cityname,team_shortname,team_longname_x,revenue,avg_ticket_price
team_key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
17,Arizona,Diamondbacks,Arizona Diamondbacks,275,59
12,Atlanta,Braves,Atlanta Braves,344,56
28,Baltimore,Orioles,Baltimore Orioles,251,56
7,Boston,Red Sox,Boston Red Sox,516,94
4,Chicago,Cubs,Chicago Cubs,452,128


In [8]:
revenueprice = revenue_price.rename(columns={"team_longname_x": "team_longname"})

In [9]:
# check out the datatypes in the new table 
revenueprice.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 30 entries, 17 to 16
Data columns (total 5 columns):
team_cityname       30 non-null object
team_shortname      30 non-null object
team_longname       30 non-null object
revenue             30 non-null int64
avg_ticket_price    30 non-null int64
dtypes: int64(2), object(3)
memory usage: 1.4+ KB


In [10]:
# push the new table to the database
revenueprice.to_sql(name='revenueprice', con=engine, if_exists='append', index=True)

In [11]:
# double check that it made it -- it did!! 
engine.table_names()

['winslosses',
 'attendance',
 'revenue',
 'payroll',
 'salary',
 'capacity',
 'avgticketprice',
 'capacityattendance',
 'revenueprice',
 'looktable']

In [None]:
# Export file as a CSV, without the Pandas index, but with the header
revenueprice.to_csv("revenueprice.csv", index=False, header=True)