In [None]:
import pandas as pd
from sqlalchemy import create_engine

### Extract CSVs into DataFrames

In [None]:
air_quality_file = "Resources/aqi_yearly_1980_to_2021.csv"
air_quality_file_df = pd.read_csv(air_quality_file)
air_quality_file_df.head()

In [None]:
rdc_listings_file = "Resources/RDC_Inventory_Core_Metrics_County.csv"
rdc_listings_file_df = pd.read_csv(rdc_listings_file)
rdc_listings_file_df.head()

### Transform Air Quality DataFrame

In [None]:
# Create a filtered dataframe from specific columns
air_quality_file_cols = ["State", "County", "Year","Median AQI"]
air_quality_file_transformed_df= air_quality_file_df[air_quality_file_cols].copy()
air_quality_file_transformed_df.head()

In [None]:
air_quality_filtered_df = air_quality_file_transformed_df.loc[air_quality_file_transformed_df["Year"] == 2021, :]
air_quality_filtered_df

In [None]:
air_quality_df = air_quality_filtered_df.rename(columns={"State": "state","County":"county","Year": "year","Median AQI": "median_aqi"})
air_quality_df

In [None]:
air_quality_df.index.name = 'id'
air_quality_df

### Transform Listings DataFrame

In [None]:
# Create a filtered dataframe from specific columns
rdc_listings_file_cols = ["month_date_yyyymm", "county_name", "total_listing_count", "average_listing_price", "median_listing_price", "active_listing_count", "median_days_on_market"]
listings_file_transformed_df= rdc_listings_file_df[rdc_listings_file_cols].copy()
listings_file_transformed_df

In [None]:
listings_file_transformed_df[['County', 'State']] = listings_file_transformed_df['county_name'].str.split(', ', 1, expand=True)
listings_file_transformed_df

In [None]:
df = listings_file_transformed_df[["month_date_yyyymm", "County", "State","total_listing_count", "average_listing_price", "median_listing_price", "active_listing_count", "median_days_on_market"]]
df

In [None]:
df["State"] = df["State"].str.upper()
df["County"] = df["County"].str.title()
df

In [None]:
#Bring in state name - state abbreviation conversion table
state_abbreviation_file = "Resources/state_abbreviation.csv"
state_abbreviation_file_df = pd.read_csv(state_abbreviation_file)
state_abbreviation_file_df.head()

In [None]:
merge_df = pd.merge(df, state_abbreviation_file_df, left_on="State", right_on="Code")
merge_df['Average_price']=merge_df['average_listing_price'].astype('int64')
# merge_df['Average_price']=merge_df['average_listing_price'].astype('int64')
merge_df

In [None]:
listings_df = merge_df[["County", "State_x","State_y","total_listing_count", "Average_price", "median_listing_price", "active_listing_count", "median_days_on_market"]]
listings_df = listings_df.rename(columns={"month_date_yyyymm": "current_month","County":"county","State_x":"state_initial","State_y": "state","Average_price": "average_price"})

listings_df

In [None]:
listings_df.index.name = 'id'

In [None]:
listings_df

### Create database connection

In [None]:
connection_string = "postgres:bootcamp@localhost:5432/realestate_db"
engine = create_engine(f'postgresql://{connection_string}')

In [None]:
# Confirm tables
engine.table_names()

### Load DataFrames into database

In [None]:
air_quality_df.to_sql(name='air_quality', con=engine, if_exists='append', index=True)

In [None]:
listings_df.to_sql(name='listings', con=engine, if_exists='append', index=True)