In [None]:
import pandas as pd
#from pandas.io.json import json_normalize
import json
from sqlalchemy import create_engine
import numpy as np

## NYC Tree Data

In [None]:
json_file = json.load(open('Resources/NYC_Tree_Census_2015.json'))
print(len(json_file["meta"]["view"]["columns"]))

column_names = []

for i in range(len(json_file["meta"]["view"]["columns"])):
    column_names.append(json_file["meta"]["view"]["columns"][i]["name"])
    
print(column_names)

tree_census_df = pd.DataFrame(json_file["data"], columns = column_names)
tree_census_df.head()


In [None]:
tree_columns = ['tree_id', 'health', 'zipcode', 'boroname', 'address']
tree_transform = tree_census_df[tree_columns].copy()
tree_transform = tree_transform.rename(columns={'tree_id':'id',
                                                   'health':'tree_health',
                                                   'zipcode':'zipcode_id',
                                                   'boroname':'borough',
                                                   'address':'address'})
tree_transformed = tree_transform.dropna(how='any',axis=0)
tree_transformed.set_index('id', inplace=True)
tree_transformed.head()

In [None]:
connection_string = "postgres:postgres@localhost:5432/etl-project_db"
engine = create_engine(f'postgresql://{connection_string}')

## NYC Property Sales Data

In [None]:
# Read csv file
property_data = pd.read_csv("Resources/nyc-rolling-sales.csv")
property_data.head()

In [None]:
# Only show needed columns
property_data = property_data[["BOROUGH", "NEIGHBORHOOD", "ADDRESS","SALE PRICE"]]
property_data

In [None]:
# Replace - with 0
property_sales_df = property_data.replace("-",0, regex=True)

In [None]:
# Drop any rows with sale price $0
property_sales_df = property_sales_df.replace(0, np.nan)
property_sales_df = property_sales_df.dropna(how='any', axis=0)
property_sales_df.head()

In [None]:
# Check data types
property_sales_df.dtypes

In [None]:
# Change data type to integer
property_sales_df["SALE PRICE"] = pd.to_numeric(property_sales_df["SALE PRICE"])
property_sales_df.head()

In [None]:
# Confirm data type
property_sales_df.dtypes

In [None]:
# Rename columns
property_sales_df = property_sales_df.rename(columns = {"BOROUGH" : "borough_id",
                                                        "NEIGHBORHOOD" : "neighborhood",
                                                        "ADDRESS" : "address",
                                                        "SALE PRICE" : "sale_price"})
property_sales_df.head()

In [None]:
# Create borough name table
borough_table = pd.DataFrame({  'borough_id' : [1,2,3,4,5],
                                'borough':['Manhattan','Bronx','Brooklyn','Queens','Staten Island']})
borough_table

In [None]:
# Merge borough name dataframe and borugh ID dataframe
property_sales_df = pd.merge(borough_table, property_sales_df, on = 'borough_id')
property_sales_df

## Connect to SQL

In [None]:
# Create engine
rds_connection_string = "postgres:postgres@localhost:5432/etl-project_db"
engine = create_engine(f'postgresql://{rds_connection_string}')

In [None]:
# load tree dataframe into SQL
tree_transformed.to_sql(name='tree_census', con=engine, if_exists='append', index=True)

In [None]:
# Load property_sales_df data to sql table
property_sales_df.to_sql(name='property_sales', con=engine, if_exists='append', index=False)

In [None]:
# Get table names
engine.table_names()