# EV Station 
---

In [174]:
# Dependencies
from census import Census
from api_keys import (census_key, g_key)
import gmaps
import numpy as np
import pandas as pd
import requests
import time
from scipy.stats import linregress
from matplotlib import pyplot as plt


# Census API Key
c = Census(census_key, year=2020)

## Data Retrieval

In [175]:
# Run Census Search to retrieve data on all zip codes (2013 ACS5 Census)
# See: https://github.com/CommerceDataService/census-wrapper for library documentation
# See: https://gist.github.com/afhaque/60558290d6efd892351c4b64e5c01e9b for labels
census_data = c.acs5.get(("B01003_001E", "B17001_002E","B11001_001E","B19013_001E"), {'for': 'zip code tabulation area:*'})


# Convert to DataFrame
census_pd = pd.DataFrame(census_data)

# Column Reordering
census_pd = census_pd.rename(columns={"B01003_001E": "Population",
                                      "B17001_002E": "Poverty Count",
                                      "B11001_001E": "Number of Households",
                                      "B19013_001E": "Household Income",
                                      "zip code tabulation area": "Zipcode"})

# Add in Poverty Rate (Poverty Count / Population)
census_pd["Poverty Rate"] = 100 * \
    census_pd["Poverty Count"].astype(
        int) / census_pd["Population"].astype(int)

# Final DataFrame
census_pd = census_pd[["Zipcode", "Population", "Poverty Rate", "Number of Households","Household Income"]]

# Visualize
print(len(census_pd))
census_pd.head()

33120


Unnamed: 0,Zipcode,Population,Poverty Rate,Number of Households,Household Income
0,601,17599.0,64.105915,5818.0,11757.0
1,602,39209.0,52.100283,12719.0,16190.0
2,603,50135.0,50.216416,19009.0,16645.0
3,606,6304.0,64.911168,1959.0,13387.0
4,610,27590.0,45.498369,9120.0,18741.0


In [176]:
EVStation = pd.read_csv("Wildfire-v4.csv")
EVStation.head()
len(EVStation
)

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


15040

In [177]:
EVStation.head()

Unnamed: 0,CAT_INDICATOR,policy,effdte,covgcd,accidentyear,indemnity_netpaid,indemnity_paid,indemnity_reserved,indemnity_recovered,losscause,...,windhailzoneconfidence_geocode,windhailzoneconfidence_p_geocode,etl_load_date_geocode,locnum_geocode,maxEffdte_geocode,locnum,bldnum,constr,conyr,edsno
0,N,DFH0002270,10/25/2005,LIMITA,2006,84517.04,84517.04,0.0,0.0,10081,...,,,,1.0,1.0,,,,,
1,N,DFO0097557,3/23/2000,LIMITA,2000,15200.0,15200.0,0.0,0.0,10081,...,,,,,,,,,,
2,N,DFO0100615,3/19/1999,LIMITA,1999,6843.16,6843.16,0.0,0.0,10081,...,,,,,,,,,,
3,N,DFO0110366,8/5/1998,LIMITA,1998,15000.0,15000.0,0.0,0.0,10081,...,,,,,,,,,,
4,N,DFO0136866,6/27/2000,LIMITA,2001,881.9,881.9,0.0,0.0,10081,...,,,,,,,,,,


In [178]:
EVStation=EVStation[EVStation['accidentyear']>=2020]
len(EVStation)

1239

In [179]:
CA = EVStation[EVStation['locst']=='CA']
CA.head()

Unnamed: 0,CAT_INDICATOR,policy,effdte,covgcd,accidentyear,indemnity_netpaid,indemnity_paid,indemnity_reserved,indemnity_recovered,losscause,...,windhailzoneconfidence_geocode,windhailzoneconfidence_p_geocode,etl_load_date_geocode,locnum_geocode,maxEffdte_geocode,locnum,bldnum,constr,conyr,edsno
135,Y,HOS1283472,8/10/2020,LIMITC,2020,36054.41,36054.41,0.0,0.0,10247,...,,,,1,1.0,,,,,
136,Y,HOS1283472,8/10/2020,LIMITA,2020,206133.93,206133.93,0.0,0.0,10247,...,,,,1,1.0,,,,,
137,Y,HOS1283472,8/10/2020,LIMITB,2020,18437.97,18437.97,5742.29,0.0,10247,...,,,,1,1.0,,,,,
243,Y,HOS1283505,9/3/2020,LIMITA,2020,762764.63,762764.63,0.0,0.0,10247,...,,,,1,1.0,,,,,
244,Y,HOS1283505,9/3/2020,LIMITC,2020,110321.55,110321.55,0.0,0.0,10247,...,,,,1,1.0,,,,,


In [180]:
len(CA)

155

In [181]:
EVStation2= EVStation.groupby('policy',as_index=False).agg({'indemnity_paid':'sum','indemnity_reserved':'sum','effdte':'first','accidentyear':'first','loczip':'first','standardizedaddress_latitude_geocode':'min','standardizedaddress_lgtde_geocode':'min'})

len(EVStation2)                                                          

492

In [182]:
EVStation2.head()

Unnamed: 0,policy,indemnity_paid,indemnity_reserved,effdte,accidentyear,loczip,standardizedaddress_latitude_geocode,standardizedaddress_lgtde_geocode
0,DFH0015234,110000.0,40000.0,5/13/2021,2021,43731.0,39.725837,-82.039873
1,DFI0018587,129809.1,0.0,8/9/2019,2020,19963.0,38.911836,-75.303394
2,DFI0019985,39327.26,0.0,5/12/2021,2021,86441.0,35.612738,-114.261964
3,DFI0021979,0.0,0.0,5/28/2020,2021,85349.0,32.499743,-114.785255
4,DFI0022602,3000.0,0.0,2/13/2019,2020,85348.0,33.7672,-113.6255


## Combine Data

In [183]:
# # Import the zip station data. Use dtype="object" to ensure all data is read in accurately.
# census_data_original = pd.read_csv(
#     "merged.csv", dtype="object", encoding="utf-8")

# # Visualize
# census_data_original.head()

In [184]:
# # Merge the two data sets along zip code
# census_data_complete = pd.merge(
#     census_data_original, census_pd, how="left", on=["Zipcode", "Zipcode"])

# # Remove rows missing data
# census_data_complete = (census_data_complete[census_data_complete["Household Income"]>0]).dropna()

# # Visualize
# census_data_complete.head()

## Heatmap of Poverty Rate

In [185]:
# Configure gmaps with API key
gmaps.configure(api_key=g_key)

In [186]:
# fig = gmaps.figure()

# income_layer = gmaps.heatmap_layer(locations, weights=household_income/10000, 
#                                  dissipating=False, max_intensity=100,
#                                  point_radius = 1)

# fig.add_layer(income_layer)

# fig

In [187]:
# Plot 20K+ EV station-this code is too large and causes to crash
locations = EVStation2[["standardizedaddress_latitude_geocode", "standardizedaddress_lgtde_geocode"]].astype(float)

# fig =gmaps.figure()

station_layer = gmaps.symbol_layer(
    locations, fill_color='rgba(0, 150, 0, 0.4)',
    stroke_color='rgba(0, 0, 150, 0.4)', scale=2,
#     info_box_content=[f"Bank amount: {bank}" for bank in bank_rate]
)


fig.add_layer(station_layer)

fig

Figure(layout=FigureLayout(height='420px'))

In [188]:
# Store 'Lat' and 'Lng' into  locations 
EVStation.dropna()
locations = EVStation[["standardizedaddress_latitude_geocode", "standardizedaddress_lgtde_geocode"]].astype(float)

# Convert Poverty Rate to float and store
# HINT: be sure to handle NaN values

poverty_rate = EVStation["indemnity_paid"].astype(float)
# household_income = EVStation["Household Income"].astype(int)
# Create a poverty Heatmap layer
fig = gmaps.figure()

heat_layer = gmaps.heatmap_layer(locations, weights=poverty_rate, 
                                 dissipating=False, max_intensity=3000,
                                 point_radius = .25)

fig.add_layer(heat_layer)

fig

Figure(layout=FigureLayout(height='420px'))

In [190]:
# Store 'Lat' and 'Lng' into  locations 
locations = EVStation[["standardizedaddress_latitude_geocode", "standardizedaddress_lgtde_geocode"]].astype(float)

# Convert Poverty Rate to float and store
# HINT: be sure to handle NaN values

poverty_rate = EVStation["tiv_wins751"].astype(float)
# household_income = EVStation["Household Income"].astype(int)
# Create a poverty Heatmap layer
fig = gmaps.figure()

tiv_layer = gmaps.heatmap_layer(locations, weights=poverty_rate, 
                                 dissipating=False, max_intensity=3000,
                                 point_radius = .25)

fig.add_layer(tiv_layer)

fig

Figure(layout=FigureLayout(height='420px'))

In [None]:
# Create a combined map
fig = gmaps.figure()

fig.add_layer(heat_layer)
fig.add_layer(bank_layer)

fig

## Statistical Analysis

#### Summary Statistics

In [None]:
# # Mean, median, mode for Poverty Rate
# poverty_mean = round(census_data_complete['Poverty Rate'].astype('float').mean(), 2)
# poverty_median = round(census_data_complete['Poverty Rate'].astype('float').median(), 2)
# poverty_mode = round(census_data_complete['Poverty Rate'].astype('float').mode(), 2)

# print(f"Poverty Rate Mean: {poverty_mean}")
# print(f"Poverty Rate Median {poverty_median}")
# print(f"Poverty Rate mode {poverty_mode}")

In [None]:
# # Mean, median, mode for Bank Count
# bank_mean = round(census_data_complete['counts'].astype('float').mean(), 2)
# bank_median = round(census_data_complete['counts'].astype('float').median(), 2)
# bank_mode = round(census_data_complete['counts'].astype('float').mode(), 2)

# print(f"Bank Count Mean: {bank_mean}")
# print(f"Bank Count Median {bank_median}")
# print(f"Bank Count mode {bank_mode}")

In [None]:
# # Mean, median, mode for Population
# population_mean = round(census_data_complete['Population'].astype('float').mean(), 2)
# population_median = round(census_data_complete['Population'].astype('float').median(), 2)
# population_mode = round(census_data_complete['Population'].astype('float').mode(), 2)

# print(f"Population Mean: {population_mean}")
# print(f"Population Median {population_median}")
# print(f"Population mode {population_mode}")

#### Linear Regression

In [None]:
# ## Convert to floats and store Poverty Rate and Bank Count as x and y values
# x_values = census_data_complete['Poverty Rate'].astype('float')
# y_values = census_data_complete['counts'].astype('float')

# # Run linear regression
# (slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
# regress_values = x_values * slope + intercept
# line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# # Plot scatter plot
# plt.scatter(x_values,y_values)

# # Plot regression line
# plt.plot(x_values,regress_values,"r-")
# plt.annotate(line_eq,(6,10),fontsize=15,color="red")

# # Label plot
# plt.xlabel('Poverty Rate')
# plt.ylabel('counts')

# # Print r square value
# print(f"R squared: {rvalue**2}")
# print(f"std dev: {stderr}")
# # Show plot
# plt.show()

In [None]:
# census_data_complete['counts'] = census_data_complete['counts'].astype('float')
# smallercounty = census_data_complete.loc[census_data_complete['counts']<50]

# ## Convert to floats and store Poverty Rate and Bank Count as x and y values
# x_values = smallercounty['Poverty Rate'].astype('float')
# y_values = smallercounty['counts'].astype('float')

# # Run linear regression
# (slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
# regress_values = x_values * slope + intercept
# line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# # Plot scatter plot
# plt.scatter(x_values,y_values)

# # Plot regression line
# plt.plot(x_values,regress_values,"r-")
# plt.annotate(line_eq,(6,10),fontsize=15,color="red")

# # Label plot
# plt.xlabel('Poverty Rate')
# plt.ylabel('counts')

# # Print r square value
# print(f"R squared: {rvalue**2}")
# print(f"std dev: {stderr}")
# # Show plot
# plt.show()

In [None]:
# ## Convert to floats and store Poverty Rate and Bank Count as x and y values
# x_values = census_data_complete['Household Income'].astype('float')
# y_values = census_data_complete['counts'].astype('float')

# # Run linear regression
# (slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
# regress_values = x_values * slope + intercept
# line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# # Plot scatter plot
# plt.scatter(x_values,y_values)

# # Plot regression line
# plt.plot(x_values,regress_values,"r-")
# plt.annotate(line_eq,(6,10),fontsize=15,color="red")

# # Label plot
# plt.xlabel('Household Income')
# plt.ylabel('counts')

# # Print r square value
# print(f"R squared: {rvalue**2}")
# print(f"std dev: {stderr}")
# # Show plot
# plt.show()

In [None]:
# census_data_complete['counts'] = census_data_complete['counts'].astype('float')
# smallercounty = census_data_complete.loc[census_data_complete['counts']<50]

# ## Convert to floats and store Poverty Rate and Bank Count as x and y values
# x_values = smallercounty['Household Income'].astype('float')
# y_values = smallercounty['counts'].astype('float')

# # Run linear regression
# (slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
# regress_values = x_values * slope + intercept
# line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# # Plot scatter plot
# plt.scatter(x_values,y_values)

# # Plot regression line
# plt.plot(x_values,regress_values,"r-")
# plt.annotate(line_eq,(6,10),fontsize=15,color="red")

# # Label plot
# plt.xlabel('Household Income')
# plt.ylabel('counts')

# # Print r square value
# print(f"R squared: {rvalue**2}")
# print(f"std dev: {stderr}")
# # Show plot
# plt.show()

### Analysis
* There is a very weak correlation between poverty rates and bank counts. Keep in mind that linear regression will not consider other factors such as population or size of the city.

In [None]:
# # census_data_complete['counts'] = census_data_complete['counts'].astype('float')
# census_data_complete['Household Income'] = census_data_complete['Household Income'].astype('float')
# smallerincome = smallercounty.loc[(smallercounty['Household Income']<100000)]

# ## Convert to floats and store Poverty Rate and Bank Count as x and y values
# x_values = smallerincome ['Household Income'].astype('float')
# y_values = smallerincome ['counts'].astype('float')

# # Run linear regression
# (slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
# regress_values = x_values * slope + intercept
# line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# # Plot scatter plot
# plt.scatter(x_values,y_values)

# # Plot regression line
# plt.plot(x_values,regress_values,"r-")
# plt.annotate(line_eq,(6,10),fontsize=15,color="red")

# # Label plot
# plt.xlabel('Household Income')
# plt.ylabel('counts')

# # Print r square value
# print(f"R squared: {rvalue**2}")
# print(f"std dev: {stderr}")
# # Show plot
# plt.show()