In [4]:
# Dependencies and Setup
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from matplotlib import rcParams
import scipy.stats as sts
import os
from collections import Counter
import requests
import json

# Google API Key
from api_keys import gkey

In [5]:
#setting background of figure black
plt.style.use('ggplot')

In [7]:
#Set Fonts/Plot Style
rcParams['figure.figsize'] = [15.0, 10.0]
rcParams['figure.dpi'] = 80
rcParams['savefig.dpi'] = 100
rcParams['font.size'] = 20
rcParams['legend.fontsize'] = 'large'
rcParams['figure.titlesize'] = 'medium'
rcParams['font.family'] = 'sans-serif'
rcParams['font.sans-serif'] = ['Arial']
rcParams['xtick.direction'] = 'in'
rcParams['ytick.direction'] = 'in'
rcParams['xtick.top'] = True
rcParams['ytick.right'] = True
rcParams['xtick.major.size'] = 10
rcParams['ytick.major.size'] = 10
rcParams['xtick.minor.size'] = 5
rcParams['ytick.minor.size'] = 5
rcParams['xtick.minor.visible'] = True
rcParams['ytick.minor.visible'] = True

## Reading clean California census data for plotting

In [8]:
#reading 2012 clean california census data
path_ca_2012=os.path.join('output_census', 'ca_2012_fg.csv')
ca_2012=pd.read_csv(path_ca_2012)

#reading 2014 clean california census data
path_ca_2014=os.path.join('output_census', 'ca_2014_fg.csv')
ca_2014=pd.read_csv(path_ca_2014)

#reading 2015 clean california census data
path_ca_2015=os.path.join('output_census', 'ca_2015_fg.csv')
ca_2015=pd.read_csv(path_ca_2015)

#reading 2017 clean california census data
path_ca_2017=os.path.join('output_census', 'ca_2017_fg.csv')
ca_2017=pd.read_csv(path_ca_2017)

#reading 2019 clean california census data
path_ca_2019=os.path.join('output_census', 'ca_2019_fg.csv')
ca_2019=pd.read_csv(path_ca_2019)

## California Census 2012 Figures

In [None]:
ca_2012.head()

In [None]:
ca_2012.columns

In [None]:
#plotting scatter plot of lat and lng
fig, ax=plt.subplots(figsize=(15,15))
ca_2012.plot(kind='scatter', x='Lng', y='Lat', alpha=0.4, 
                s=ca_2012['Population']/100,label='Population',
                 c='House Value', cmap=plt.get_cmap('rainbow'),
                colorbar=True, ax=ax)

plt.show()

In [None]:
#plotting scatter plot of lat and lng
fig, ax=plt.subplots(figsize=(15,15))
ca_2012.plot(kind='scatter', x='Lng', y='Lat', alpha=0.3, 
                s=ca_2012['Population']/100,label='Population',
                 c='Household Income', cmap=plt.get_cmap('hsv'),
                colorbar=True, ax=ax)

plt.show()

In [None]:
#selecting only important columns
ca_12=ca_2012[['City','Population', 'House Value','Household Income',
       'Poverty Rate', 'Unemployment Rate','Monthly Owner Cost', 'Monthly Rent',
       'Public Transport Rate', 'Uneducated Rate' ]]

In [None]:
#using groupby method to groupby the data according to city
ca_12=ca_2012.groupby('City').agg({'Population':'sum',
                              'House Value':'mean',
                               'Household Income':'mean',
                               'Poverty Rate':'mean',
                               'Unemployment Rate':'mean',
                               'Monthly Owner Cost':'mean',
                               'Monthly Rent':'mean',
                               'Public Transport Rate':'mean',
                               'Uneducated Rate':'mean'}).reset_index()

In [None]:
#sorting data according to the population
ca_12=ca_12.sort_values(by=['House Value',
                                'Monthly Owner Cost','Household Income',
                                'Poverty Rate','Unemployment Rate',
                                'Public Transport Rate','Monthly Rent', 
                                'Uneducated Rate' ], ascending=False)

In [None]:
#plotting all correlation using seaborn heatmap
corr_12=ca_12.corr()
import seaborn as sns
fig, ax=plt.subplots(figsize=(15,15))
sns.heatmap(corr_12,vmin=-1, vmax=1, ax=ax, cmap='BrBG')#annot=True,
plt.show()

In [None]:
#city with higher house value
ca_12.nlargest(10, 'House Value')

## California Census 2019 Figures

In [None]:
#selecting only important columns
ca_19=ca_2019[['City','Population', 'House Value','Household Income',
       'Poverty Rate', 'Unemployment Rate','Monthly Owner Cost', 'Monthly Rent',
       'Public Transport Rate', 'Personal Transport Rate','College Rate','White Population Rate',
       'Uneducated Rate' ]]

In [None]:
#using groupby method to groupby the data accordint to city
ca_19_city=ca_19.groupby('City').agg({'Population':'sum',
                              'House Value':'mean',
                               'Household Income':'mean',
                               'Poverty Rate':'mean',
                               'Unemployment Rate':'mean',
                               'Monthly Owner Cost':'mean',
                               'Monthly Rent':'mean',
                               'Public Transport Rate':'mean',
                               'Personal Transport Rate':'mean',
                                'College Rate':'mean',
                                'White Population Rate':'mean',      
                               'Uneducated Rate':'mean'}).reset_index()

In [None]:
#sorting data according to the population
ca_19_city=ca_19_city.sort_values(by=['House Value',
                                'Monthly Owner Cost','Household Income',
                                'Poverty Rate','Unemployment Rate',
                                'Public Transport Rate','Monthly Rent', 
                                'Uneducated Rate' ], ascending=False)

In [None]:
#city with higher house value
ca_19_city.nlargest(10, 'House Value')

In [None]:
#plotting all correlation using seaborn heatmap
corr_19=ca_19.corr()
import seaborn as sns
fig, ax=plt.subplots(figsize=(15,15))
sns.heatmap(corr_19,vmin=-1, vmax=1, ax=ax, cmap='BrBG')#annot=True,
plt.show()

In [None]:
corr_19=ca_19.corr()
corr_19.style.background_gradient(cmap='coolwarm')

In [None]:
c1 = corr_19.abs().unstack()
c1.sort_values(ascending = False)[12:40]

In [None]:
#plotting
fig, ax=plt.subplots(figsize=(15,15))
ca_2019.plot(kind='scatter', x='Lng', y='Lat', alpha=0.2, 
                s=ca_2019['Population']/100,label='Population',
                 c='House Value', cmap=plt.get_cmap('jet'),
                colorbar=True, ax=ax)

plt.show()