In [None]:
# Dependencies and Setup
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from matplotlib import rcParams
import scipy.stats as sts
import os
from collections import Counter
import requests
import json

# Google API Key
from api_keys import gkey

In [None]:
#setting background of figure black
plt.style.use('ggplot')

In [None]:
#Set Fonts/Plot Style
rcParams['figure.figsize'] = [15.0, 10.0]
rcParams['figure.dpi'] = 80
rcParams['savefig.dpi'] = 100
rcParams['font.size'] = 20
rcParams['legend.fontsize'] = 'large'
rcParams['figure.titlesize'] = 'medium'
rcParams['font.family'] = 'sans-serif'
rcParams['font.sans-serif'] = ['Arial']
rcParams['xtick.direction'] = 'in'
rcParams['ytick.direction'] = 'in'
rcParams['xtick.top'] = True
rcParams['ytick.right'] = True
rcParams['xtick.major.size'] = 10
rcParams['ytick.major.size'] = 10
rcParams['xtick.minor.size'] = 5
rcParams['ytick.minor.size'] = 5
rcParams['xtick.minor.visible'] = True
rcParams['ytick.minor.visible'] = True

## Looking for 2012 US census data

In [None]:
# 2012 census data for all US zipcodes
#reading 2012 combined census data
path_comb_2012=os.path.join('output_census', 'census_comb_2012.csv')
usa_2012=pd.read_csv(path_comb_2012)

In [None]:
#looking for usa_2012 dataframe
usa_2012.head()

In [None]:
#columns of 2012  dataframe
usa_2012.columns

In [None]:
#shape of the dataframe
usa_2012.shape

In [None]:
#removing the rows with house value less than 1
usa_2012=usa_2012[usa_2012['House Value']>0]

In [None]:
#looking for null value
usa_2012.isna().sum()

In [None]:
#removing the rows with lat and lng missing value
usa_2012.dropna(subset=['Lat', 'Lng', 
                        'Public Transport Rate','Personal Transport Rate'], 
                         inplace=True)

In [None]:
#shape of the clean dataframe
usa_2012.shape

In [None]:
#info of the dataframe
usa_2012.info()

In [None]:
#describing the dataframe
usa_2012.describe()

## Looking for 2012 California census data

In [None]:
#reading 2012 combined california census data
path_ca_2012=os.path.join('output_census', 'census_ca_2012.csv')
ca_2012=pd.read_csv(path_ca_2012)

## Looking for 2014 US census data

In [None]:
# 2014 census data for all US zipcodes
#reading 2014 combined census data
path_comb_2014=os.path.join('output_census', 'census_comb_2014.csv')
usa_2014=pd.read_csv(path_comb_2014)

In [None]:
#looking for usa_2014 dataframe
usa_2014.head()

In [None]:
#columns of 2014  dataframe
usa_2014.columns

In [None]:
#shape of the dataframe
usa_2014.shape

In [None]:
#removing the rows with house value less than 1
usa_2014=usa_2014[usa_2014['House Value']>0]

In [None]:
#looking for null value
usa_2014.isna().sum()

In [None]:
#removing the rows with lat and lng missing value
usa_2014.dropna(subset=['Lat', 'Lng', 
                        'Public Transport Rate','Personal Transport Rate'], 
                         inplace=True)

In [None]:
#shape of the clean dataframe
usa_2014.shape

In [None]:
#info of the dataframe
usa_2014.info()

In [None]:
#describing the dataframe
usa_2014.describe()

## Looking for 2014 California census data

In [None]:
#reading 2014 combined california census data
path_ca_2014=os.path.join('output_census', 'census_ca_2014.csv')
ca_2014=pd.read_csv(path_ca_2014)

## Looking for 2015 US census data

In [None]:
# 2015 census data for all US zipcodes
#reading 2015 combined census data
path_comb_2015=os.path.join('output_census', 'census_comb_2015.csv')
usa_2015=pd.read_csv(path_comb_2015)

In [None]:
#looking for usa_2015 dataframe
usa_2015.head()

In [None]:
#columns of 2015  dataframe
usa_2015.columns

In [None]:
#shape of the dataframe
usa_2015.shape

In [None]:
#removing the rows with house value less than 1
usa_2015=usa_2015[usa_2015['House Value']>0]

In [None]:
#looking for null value
usa_2015.isna().sum()

In [None]:
#removing the rows with lat and lng missing value
usa_2015.dropna(subset=['Lat', 'Lng', 
                        'Public Transport Rate','Personal Transport Rate'], 
                         inplace=True)

In [None]:
#shape of the clean dataframe
usa_2015.shape

In [None]:
#info of the dataframe
usa_2015.info()

In [None]:
#describing the dataframe
usa_2015.describe()

## Looking for 2015 California census data

In [None]:
#reading 2015 combined california census data
path_ca_2015=os.path.join('output_census', 'census_ca_2015.csv')
ca_2015=pd.read_csv(path_ca_2015)

## Looking for 2017 US census data

In [None]:
# 2017 census data for all US zipcodes
#reading 2017 combined census data
path_comb_2017=os.path.join('output_census', 'census_comb_2017.csv')
usa_2017=pd.read_csv(path_comb_2017)

In [None]:
#looking for usa_2017 dataframe
usa_2017.head()

In [None]:
#columns of 20147  dataframe
usa_2017.columns

In [None]:
#shape of the dataframe
usa_2017.shape

In [None]:
#removing the rows with house value less than 1
usa_2017=usa_2017[usa_2017['House Value']>0]

In [None]:
#looking for null value
usa_2017.isna().sum()

In [None]:
#removing the rows with lat and lng missing value
usa_2017.dropna(subset=['Lat', 'Lng', 
                        'Public Transport Rate','Personal Transport Rate'], 
                         inplace=True)

In [None]:
#shape of the clean dataframe
usa_2017.shape

In [None]:
#info of the dataframe
usa_2017.info()

In [None]:
#describing the dataframe
usa_2017.describe()

## Looking for 2017 California census data

In [None]:
#reading 2017 combined california census data
path_ca_2017=os.path.join('output_census', 'census_ca_2017.csv')
ca_2017=pd.read_csv(path_ca_2017)

## Looking for 2019 US census data

In [None]:
# 2019 census data for all US zipcodes
#reading 2019 combined census data
path_comb_2019=os.path.join('output_census', 'census_comb_2019.csv')
usa_2019=pd.read_csv(path_comb_2019)

In [None]:
#looking for usa_2014 dataframe
usa_2019.head()

In [None]:
#columns of 2014  dataframe
usa_2019.columns

In [None]:
#shape of the dataframe
usa_2019.shape

In [None]:
#removing the rows with house value less than 1
usa_2019=usa_2019[usa_2019['House Value']>0]

In [None]:
#looking for null value
usa_2019.isna().sum()

In [None]:
#removing the rows with lat and lng missing value
usa_2019.dropna(subset=['Lat', 'Lng', 
                        'Public Transport Rate','Personal Transport Rate'], 
                         inplace=True)

In [None]:
#shape of the clean dataframe
usa_2019.shape

In [None]:
#info of the dataframe
usa_2019.info()

In [None]:
#describing the dataframe
usa_2019.describe()

## Looking for 2019 California census data

In [None]:
#reading 2019 combined california census data
path_ca_2019=os.path.join('output_census', 'census_ca_2019.csv')
ca_2019=pd.read_csv(path_ca_2019)

In [None]:
#plotting
fig, ax=plt.subplots(figsize=(15,15))
census_2012.plot(kind='scatter', x='Lng', y='Lat', alpha=0.2, 
                s=census_2012['Population']/100,label='Population',
                 c='House Value', cmap=plt.get_cmap('jet'),
                colorbar=True, ax=ax)
#ax.set_xlimit([-175,-125])
plt.show()

In [None]:
#plotting
fig, ax=plt.subplots(figsize=(15,15))
census_ca.plot(kind='scatter', x='Lng', y='Lat', alpha=0.2, 
                s=census_ca['Population']/100,label='Population',
                 c='House Value', cmap=plt.get_cmap('jet'),
                colorbar=True, ax=ax)
#ax.set_ylimit([-125,-175])
plt.show()

In [None]:
#plotting
fig, ax=plt.subplots(figsize=(15,15))
census_ca.plot(kind='scatter', x='Lng', y='Lat', alpha=0.2, 
                s=census_ca['Population']/100,label='Population',
                 c='Household Income', cmap=plt.get_cmap('jet'),
                colorbar=True, ax=ax)
#ax.set_ylimit([-125,-175])
plt.show()

In [None]:
#plotting all correlation using seaborn heatmap
corr=census_ca.corr()
import seaborn as sns
fig, ax=plt.subplots(figsize=(15,15))
sns.heatmap(corr,vmin=-1, vmax=1, annot=True, ax=ax, cmap='BrBG')
plt.show()

In [None]:
attrb=['Population', 'Median Age', 'Household Income',
       'Per Capita Income', 'Poverty Rate', 'Unemployment Rate', 'House Value',
       'House Construction Year', 'Monthly Owner Cost', 'Monthly Rent']
sns.pairplot(census_ca[attrb], diag_kind="hist")
plt.show()