In [None]:
# dependencies go here
import pandas as pd
from sqlalchemy import create_engine
from config import pw
import os

## set up database

We set up the `minneapolis_housing` database and related tables in Postgresql:

* `neighborhood`
* `home_value`
* `crime`

Code for setting up the tables can be found in the queries.sql file accompanying this notebook.

**Note: Refactor this to work with SQLAlchemy, if time permits**

## load data
### Neighborhood datasets

In [None]:
# Stacy start

In [None]:
# Load Minneapolis city dataset for neighborhoods
n_path = os.path.join('.', 'data', 'Minneapolis_Neighborhoods.csv')
neighborhood = pd.read_csv(n_path)
neighborhood_df = neighborhood[['FID', 'BDNAME']].copy()
neighborhood_df.rename(columns={
    'FID': 'id',
    'BDNAME': 'neighborhood'
}, inplace=True)

'''
NOTES:
Noticed there wasn't a codebook that explained what the cryptic column names mean, so used FID based on the fact
that it is a unique ID.
'''
neighborhood_df.head()

In [None]:
# Load Minneapolis neighborhood census dataset 
census_path = os.path.join('.', 'data', 'census_2010.xls')
census = pd.read_excel(census_path, header=None)
census_df = census.iloc[6:, :2]
census_df.rename(columns={
    0: 'neighborhood',
    1: 'population_2010'
}, inplace=True)
census_df.head()

In [None]:
# merge the dfs together into one neighborhood df
neighborhoods = neighborhood_df.merge(census_df, how='outer', on='neighborhood')
neighborhoods.head()

In [None]:
# Check dataframe for null values

mask = pd.isnull(neighborhoods.id)
print(neighborhoods[mask])

mask = pd.isnull(neighborhoods.population_2010)
print(neighborhoods[mask])

# No population data for South Uptown (44) or Kenwood (74)
# No ids for CARAG (87) or Kenwood (88)

In [None]:
# deal with null values 

# Kenwood (74) should be updated with Kenwood (88)'s pop data 
neighborhoods.loc[73, 'population_2010'] = neighborhoods.loc[88, 'population_2010']
neighborhoods.tail(20)

# create id for CARAG (87)
neighborhoods.loc[87, 'id'] = '88'

# Drop rows 88, 89
neighborhoods.drop([88,89], inplace=True)

In [None]:
# Stacy end

In [None]:
# Jenna start
#Load Assessors Parcel Data 2019 CSV & Create dataframe
csv_file = "data/Assessors_Parcel_Data_2019.csv"
assessors_df = pd.read_csv(csv_file)
assessors_df.head()

In [None]:
#Create a new dataframe with select columns
assessors_parcel_df = assessors_df[['NEIGHBORHOOD', 
                                    'FORMATTED_ADDRESS',
                                   'PROPERTY_TYPE',
                                   'LANDUSE',
                                   'TOTALVALUE',
                                   'BELOWGROUNDAREA',
                                   'ABOVEGROUNDAREA',
                                   'BEDROOMS',
                                   'PARCEL_AREA_SQFT',
                                   'X',
                                   'Y']]
assessors_parcel_df.head()

In [None]:
#Rename columns

In [None]:
#Filter out property types that are not residential

In [None]:
#Calculated Columns

In [None]:
# Jenna end

In [None]:
# Katrina start
#Loading data for police incidents and creating a dataframe for the data
csv_file = "data/Police_Incidents_2019.csv"
police_incidents_df = pd.read_csv(csv_file)
police_incidents_df.head()

In [None]:
# Katrina end
#Create new dataframe with certain columns
incidents_df = police_incidents_df[['description',
                                    'neighborhood',
                                    'X',
                                    'Y']]
incidents_df

## transforming data

Things to transform all the data
1. Lower-case the neighborhood names on both dfs
2. Merge dfs on neighborhood names (into new crime df)
3. Drop neighborhood name from new crime df
4. Do same merge and neighborhood drop on assessor df

In [None]:
# Stacy start

In [None]:
neighborhoods.neighborhood = neighborhoods.neighborhood.str.lower()

In [None]:
# Stacy end

In [None]:
# Jenna start

In [None]:
# Jenna end

In [None]:
# Katrina start

In [None]:
# Katrina end

## adding to postgres

In [None]:
# Stacy start

In [None]:
conn = f"postgres:{pw}@localhost:5432/minneapolis_housing"
engine = create_engine(f'postgresql://{conn}')

In [None]:
engine.table_names()

In [None]:
# load neighborhoods df into database table
neighborhoods.to_sql(name='neighborhood', con=engine, if_exists='replace', index=False)

In [None]:
# Check that data is loaded into postgres
pd.read_sql_query('select * from neighborhood', con=engine).head()

In [None]:
# Stacy end

In [1]:
## Pulling it all together in an SQLalchemy query/pandas df for aggregate table