# How many zero and low emissions vehicles are registered in California?

### Load Python tools

In [1]:
import pandas as pd
import geopandas as gpd
import matplotlib
import matplotlib.pyplot as plt
import json
import jenkspy
import numpy as np
from pywaffle import Waffle
from altair import datum
import altair as alt
import altair_latimes as lat



In [2]:
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')
pd.options.display.max_columns = 50
pd.options.display.max_rows = 1000
pd.options.display.float_format = '{:,.2f}'.format
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [3]:
# CARB looks for ways to meet our 5 million zero-emission vehicle target by 2030, the conversion of public and private fleets to zero emission cars and trucks offers an important opportunity to get a large number of carbon-polluting fleet vehicles off the road.
# https://ww2.arb.ca.gov/sites/default/files/2018-12/zero_emission_fleet_letter_080118.pdf

### Read data from California Department of Motor Vehicles - current as of Jan. 1, 2020

In [4]:
# https://data.ca.gov/dataset/vehicle-fuel-type-count-by-zip-code
url = 'https://data.ca.gov/dataset/15179472-adeb-4df6-920a-20640d02b08c/resource/4254a06d-9937-4083-9441-65597dd267e8/download/vehicle-count-as-of-1-1-2020.csv'

In [5]:
src = pd.read_csv(url, low_memory=False)

### Clean up field names

In [6]:
src.columns = src.columns.str.strip().str.lower().str.replace(' ', '_', regex=False)\
                    .str.replace('(', '', regex=False).str.replace(')', '', regex=False).str.replace('-','_', regex=False)

In [7]:
src.rename(columns={"zip_code": "zip"}, inplace=True)

### How many vehicles are we talking about? 

In [8]:
'{:,.0f}'.format(src.vehicles.sum())

'31,233,841'

In [9]:
src.model_year.value_counts()

<2007    86632
2017     48075
2016     44892
2015     44780
2018     44322
2014     42015
2013     41089
2019     40583
2012     38944
2007     34198
2011     34037
2008     33778
2010     29790
2009     26670
2020     11342
Unk       1232
2021        15
Name: model_year, dtype: int64

### Remove older vehicles

In [10]:
vehicles = src[src['model_year'] != '<2007']

In [11]:
'{:,.0f}'.format(vehicles.vehicles.sum())

'20,302,263'

### How many don't rely solely on gasoline?

In [12]:
nogas = vehicles[(vehicles['fuel'] != 'Gasoline')]

In [13]:
zev = vehicles[vehicles['fuel'] == 'Battery Electric']

In [14]:
cvrp = vehicles[(vehicles['fuel'] == 'Battery Electric') |\
               (vehicles['fuel'] == 'Hydrogen Fuel Cell') |\
              (vehicles['fuel'] == 'Plug-in Hybrid')]

In [15]:
'{:,.0f}'.format(nogas.vehicles.sum())

'3,449,912'

### How many are battery electric, or otherwise CVRP eligible?

In [16]:
'{:,.0f}'.format(cvrp.vehicles.sum())

'561,323'

In [17]:
'{:,.0f}'.format(zev.vehicles.sum())

'306,297'

### Share of newer CA vehicles that don't rely solely on gas?

In [18]:
'{:,.1f}%'.format((nogas.vehicles.sum() / vehicles.vehicles.sum())*100)

'17.0%'

### Share of CA vehicles that are battery electric, or otherwise CVRP eligible?

In [19]:
'{:,.1f}%'.format((cvrp.vehicles.sum() / vehicles.vehicles.sum())*100)

'2.8%'

---

### ZIP codes points

In [20]:
zips_point = gpd.read_file('../../../data/gis/zipcodes.geojson')

In [21]:
zips_point['zip'] = zips_point['zip'].astype(str)

In [22]:
ca_zips_point = zips_point[zips_point['state'] == 'CA']

### ZIP codes boundaries

In [23]:
# Filtered CA from this national file maintained by Esri: 
# https://services.arcgis.com/P3ePLMYs2RVChkJx/arcgis/rest/services/USA_ZIP_Code_Areas_anaylsis/FeatureServer/0/
zips_poly_pop = gpd.read_file('../../../data/gis/ca-zip-codes-esri.geojson')
zips_poly = gpd.read_file('../../../data/gis/ca-zip-codes-esri-demographics.geojson')

In [24]:
zips_poly.columns = zips_poly.columns.str.strip().str.lower().str.replace(' ', '_', regex=False)\
                    .str.replace('(', '', regex=False).str.replace(')', '', regex=False).str.replace('-','_', regex=False)

In [25]:
zips_poly.dropna(inplace=True)

In [26]:
zips_poly.rename(columns={"zip_code": "zip"}, inplace=True)

In [27]:
zips_poly.head()

Unnamed: 0,objectid,zip,po_name,pop2012,white,black,ameri_es,asian,hawn_pi,hispanic,other,mult_race,med_age,ave_hh_sz,marhh_chd,families,ave_fam_sz,hse_units,sqmi,geometry
0,4800,94002,Belmont,26832,19634,477,84,5762,221,3517,1170,1744,41.0,2.42,2925,7671,2.98,12304,5.9,"POLYGON ((-122.27703 37.53436, -122.27687 37.5..."
1,4801,94010,Burlingame,41102,31610,482,93,10244,188,5133,1865,2166,42.6,2.43,4798,12179,3.06,20020,13.0,"POLYGON ((-122.37728 37.60562, -122.37738 37.6..."
2,4900,93442,Morro Bay,10873,9395,47,104,281,10,1602,645,322,48.9,2.09,593,2749,2.7,6609,43.2,"POLYGON ((-120.77255 35.46196, -120.77270 35.4..."
3,4901,93445,Oceano,7633,4683,60,114,155,7,3389,1477,283,34.7,2.86,541,1582,3.42,2871,2.0,"POLYGON ((-120.63624 35.12213, -120.63591 35.1..."
4,4802,94015,Daly City,61575,14279,2138,168,35940,482,9775,4520,2939,39.0,3.1,4637,13991,3.54,20103,5.7,"POLYGON ((-122.50283 37.70813, -122.50242 37.7..."


---

### ZIP codes demographics from mapping giant Esri

In [28]:
#https://services.arcgis.com/P3ePLMYs2RVChkJx/arcgis/rest/services/WealthiestZipCodes2017/FeatureServer/0/
zips_wealth = gpd.read_file('../../../data/gis/WealthiestZipCodesCA.geojson')

In [29]:
zips_wealth.head()

Unnamed: 0,OBJECTID,ID,NAME,ST_ABBREV,HAI_CY,INCMORT_CY,WLTHRNK_CY,DOMSTATE,DOMCOUNTY,COUNTY_NAME,DOMCBSA,CBSA_NAME,TOTPOP_CY,TOTHH_CY,MEDAGE_CY,AVGHINC_CY,AVGVAL_CY,AVGNW_CY,Shape__Area,Shape__Length,geometry
0,29014,90001,Los Angeles,CA,55,44.1,27675.0,6,6037,Los Angeles County,31080,"Los Angeles-Long Beach-Anaheim, CA Metropolita...",59907,13454,27.1,46931,377591,82915,12747932.74,17936.35,"POLYGON ((-118.24754 33.98908, -118.24776 33.9..."
1,29015,90002,Los Angeles,CA,59,41.2,27690.0,6,6037,Los Angeles County,31080,"Los Angeles-Long Beach-Anaheim, CA Metropolita...",53573,12115,26.7,46332,331694,96786,11348270.82,16650.99,"POLYGON ((-118.24745 33.96015, -118.25169 33.9..."
2,29016,90003,Los Angeles,CA,52,46.6,28202.0,6,6037,Los Angeles County,31080,"Los Angeles-Long Beach-Anaheim, CA Metropolita...",71967,16771,27.1,43020,358057,66091,13659740.57,23821.37,"MULTIPOLYGON (((-118.28268 33.98926, -118.2826..."
3,29017,90004,Los Angeles,CA,21,114.5,20202.0,6,6037,Los Angeles County,31080,"Los Angeles-Long Beach-Anaheim, CA Metropolita...",63987,23021,35.8,65893,909580,232653,10904998.97,22878.23,"MULTIPOLYGON (((-118.33852 34.06891, -118.3371..."
4,29018,90005,Los Angeles,CA,22,111.5,26028.0,6,6037,Los Angeles County,31080,"Los Angeles-Long Beach-Anaheim, CA Metropolita...",41844,17067,35.8,53191,836569,105569,5390883.48,19394.84,"POLYGON ((-118.31879 34.05514, -118.31861 34.0..."


In [30]:
zips_wealth_slim = zips_wealth[['ID','NAME', 'AVGHINC_CY', 'AVGNW_CY', 'TOTPOP_CY']]

In [31]:
zips_esri = zips_wealth_slim.rename(columns={"ID": "zip",
                                                    'NAME':'name',
                                 'AVGHINC_CY':'avg_house_income',
                                 'AVGNW_CY':'avg_net_worth',
                                 'TOTPOP_CY':'population', })

In [32]:
zips_esri.head()

Unnamed: 0,zip,name,avg_house_income,avg_net_worth,population
0,90001,Los Angeles,46931,82915,59907
1,90002,Los Angeles,46332,96786,53573
2,90003,Los Angeles,43020,66091,71967
3,90004,Los Angeles,65893,232653,63987
4,90005,Los Angeles,53191,105569,41844


In [33]:
zips_esri.to_csv('../../usc/data/cars/zip_demographics.csv', index=False)
vehicles[vehicles['zip'] != 'OOS'].to_csv('../../usc/data/cars/vehicle-count.csv', index=False)

---

### Group the vehicles and count them by the registration ZIP code

In [34]:
zipcodes = vehicles.groupby(['zip']).agg({'vehicles':'sum'}).reset_index()

In [35]:
zipcodes.sort_values(by='vehicles', ascending=False).head(10)
zipcodes['zip'] = zipcodes['zip'].astype(str)

### Merge the registration zip codes and merge with Esri zipcode polygons

In [36]:
zips = pd.merge(zips_esri, zipcodes, left_on='zip', right_on='zip')

In [37]:
zips.head()

Unnamed: 0,zip,name,avg_house_income,avg_net_worth,population,vehicles
0,90001,Los Angeles,46931,82915,59907,17803
1,90002,Los Angeles,46332,96786,53573,16227
2,90003,Los Angeles,43020,66091,71967,21244
3,90004,Los Angeles,65893,232653,63987,22734
4,90005,Los Angeles,53191,105569,41844,12518


---

### Group by duty. Which are most common among ALL vehicles? 

In [38]:
# About 2.5 are listed as OTHER/UNK
duty_totals = vehicles.groupby(['duty']).agg('sum').reset_index()
duty_totals.head()

Unnamed: 0,duty,vehicles
0,Heavy,635013
1,Light,19667250


### Group by make. Which are most common? 

In [39]:
# About 2.5 are listed as OTHER/UNK
make = vehicles[vehicles['make'] != 'OTHER/UNK'].groupby(['make', 'zip']).agg('sum').reset_index()

In [40]:
most_make = make.groupby(['make']).agg('sum').reset_index().sort_values(by='vehicles', ascending=False)

In [41]:
most_make.head()

Unnamed: 0,make,vehicles
87,TOYOTA,3511211
33,HONDA,2403082
26,FORD,1744654
63,NISSAN,1380468
18,CHEVROLET,1316712


In [42]:
makelist = most_make.make.to_list()

In [43]:
popular_makes = make[make['make'].isin(makelist)]

In [44]:
make_zip = pd.DataFrame(pd.pivot_table(popular_makes, values='vehicles', \
                            index=['zip'], columns=['make'], aggfunc=np.sum, fill_value=0).reset_index())

In [45]:
make_zip.columns = make_zip.columns.str.strip().str.lower().str.replace(' ', '_', regex=False)\
                    .str.replace('(', '', regex=False).str.replace(')', '', regex=False).str.replace('-','_', regex=False)

### Which make is most common in each ZIP code? 

In [46]:
make_zip["total"] = make_zip[['acura', 'alexander_dennis', 'alfa_romeo', 'allianz',
       'am_general', 'american_lafrance', 'aston_martin', 'audi', 'autocar',
       'bentley', 'blue_bird', 'bluebird', 'bmw', 'buick', 'cadillac',
       'capacity', 'capacity_of_texas_inc', 'caterpillar', 'chevrolet',
       'chrysler', 'crane_carrier', 'dodge', 'emergency_one', 'evobus',
       'ferrari', 'fiat', 'ford', 'freightliner', 'genesis', 'gillig', 'gmc',
       'hino', 'hme', 'honda', 'hummer', 'hyundai', 'ic_bus', 'ic_bus,_llc',
       'infiniti', 'international', 'isuzu', 'jaguar', 'jeep', 'kalmar',
       'kalmar_industries_usa_llc', 'kenworth', 'kia',
       'kovatch_moblie_equipment_corp.', 'lamborghini', 'land_rover', 'lexus',
       'lincoln', 'mack', 'maserati', 'mazda', 'mercedes_benz', 'mercury',
       'mini', 'mitsubishi', 'mitsubishi_fuso', 'motor_coach_industries',
       'new_flyer', 'newflyer', 'nissan', 'north_american_bus_industries',
       'orion', 'peterbilt', 'pierce', 'pierce_manufacturing', 'pontiac',
       'porsche', 'prevost', 'ram', 'rolls_royce', 'saab', 'saturn', 'scion',
       'smart', 'smith', 'spartan', 'sterling', 'subaru', 'suzuki', 'temsa',
       'tesla', 'thomas_built_buses', 'thomas_bus', 'toyota', 'ud_trucks',
       'unk', 'van_hool', 'volkswagen', 'volvo', 'vpg', 'western_star',
       'workhorse']].sum(axis=1)

In [47]:
make_zip.sort_values(by='total', ascending=False).head(5)

make,zip,acura,alexander_dennis,alfa_romeo,allianz,am_general,american_lafrance,aston_martin,audi,autocar,bentley,blue_bird,bluebird,bmw,buick,cadillac,capacity,capacity_of_texas_inc,caterpillar,chevrolet,chrysler,crane_carrier,dodge,emergency_one,evobus,...,ram,rolls_royce,saab,saturn,scion,smart,smith,spartan,sterling,subaru,suzuki,temsa,tesla,thomas_built_buses,thomas_bus,toyota,ud_trucks,unk,van_hool,volkswagen,volvo,vpg,western_star,workhorse,total
1604,OOS,2816,0,67,0,0,0,0,4486,35,48,13,20,6959,1494,4456,18,0,0,58535,4594,0,28449,0,0,...,18597,0,32,292,1481,90,91,0,87,8481,39,0,2422,12,105,54844,0,28266,53,9682,2785,0,0,20,581522
42,90045,316,0,14,0,0,0,0,750,0,0,0,0,1859,1628,605,0,0,0,9801,3347,0,8409,0,0,...,1442,0,0,0,59,0,0,0,0,624,0,0,352,0,0,17164,0,7300,0,3324,266,0,0,0,118622
957,94080,758,0,0,0,0,0,0,463,0,0,0,0,1570,434,571,0,0,0,13569,1761,0,4681,0,28,...,992,0,0,16,286,0,0,0,0,1139,0,13,334,0,0,16979,0,3917,0,2415,81,0,0,0,105927
487,92336,440,0,0,0,0,0,0,461,0,0,0,0,1412,158,440,0,0,0,5573,502,0,1248,0,0,...,459,0,0,68,589,0,0,0,0,585,0,0,218,0,0,9746,0,723,0,869,329,0,0,0,53255
417,92154,370,0,0,0,0,0,0,309,0,0,0,0,1103,185,209,0,0,0,2988,484,0,1353,0,0,...,298,0,0,88,448,0,0,0,92,564,13,0,65,0,0,8078,0,358,0,2050,292,0,0,0,48944


In [48]:
make_zip[['zip', 'total']].to_csv('output/total_makes_zip.csv')

In [49]:
make_total = pd.DataFrame(make_zip[['zip', 'total']])

In [50]:
make_zip['winner'] = make_zip[['acura', 'alexander_dennis', 'alfa_romeo', 'allianz',
       'am_general', 'american_lafrance', 'aston_martin', 'audi', 'autocar',
       'bentley', 'blue_bird', 'bluebird', 'bmw', 'buick', 'cadillac',
       'capacity', 'capacity_of_texas_inc', 'caterpillar', 'chevrolet',
       'chrysler', 'crane_carrier', 'dodge', 'emergency_one', 'evobus',
       'ferrari', 'fiat', 'ford', 'freightliner', 'genesis', 'gillig', 'gmc',
       'hino', 'hme', 'honda', 'hummer', 'hyundai', 'ic_bus', 'ic_bus,_llc',
       'infiniti', 'international', 'isuzu', 'jaguar', 'jeep', 'kalmar',
       'kalmar_industries_usa_llc', 'kenworth', 'kia',
       'kovatch_moblie_equipment_corp.', 'lamborghini', 'land_rover', 'lexus',
       'lincoln', 'mack', 'maserati', 'mazda', 'mercedes_benz', 'mercury',
       'mini', 'mitsubishi', 'mitsubishi_fuso', 'motor_coach_industries',
       'new_flyer', 'newflyer', 'nissan', 'north_american_bus_industries',
       'orion', 'peterbilt', 'pierce', 'pierce_manufacturing', 'pontiac',
       'porsche', 'prevost', 'ram', 'rolls_royce', 'saab', 'saturn', 'scion',
       'smart', 'smith', 'spartan', 'sterling', 'subaru', 'suzuki', 'temsa',
       'tesla', 'thomas_built_buses', 'thomas_bus', 'toyota', 'ud_trucks',
       'unk', 'van_hool', 'volkswagen', 'volvo', 'vpg', 'western_star',
       'workhorse']].idxmax(axis=1)

In [51]:
make_zip.head(10)

make,zip,acura,alexander_dennis,alfa_romeo,allianz,am_general,american_lafrance,aston_martin,audi,autocar,bentley,blue_bird,bluebird,bmw,buick,cadillac,capacity,capacity_of_texas_inc,caterpillar,chevrolet,chrysler,crane_carrier,dodge,emergency_one,evobus,...,rolls_royce,saab,saturn,scion,smart,smith,spartan,sterling,subaru,suzuki,temsa,tesla,thomas_built_buses,thomas_bus,toyota,ud_trucks,unk,van_hool,volkswagen,volvo,vpg,western_star,workhorse,total,winner
0,90001,52,0,0,0,0,0,0,11,0,0,0,0,344,14,57,0,0,0,1670,181,0,663,0,0,...,0,0,27,235,0,0,0,0,51,0,0,0,0,0,3306,0,158,0,191,0,0,0,0,16031,toyota
1,90002,20,0,0,0,0,0,0,13,0,0,0,0,240,13,72,0,0,0,1561,193,0,641,0,0,...,0,0,38,221,0,0,0,0,41,0,0,0,0,0,3009,0,111,0,212,0,0,0,0,14720,toyota
2,90003,83,0,0,0,0,0,0,41,0,0,0,0,332,11,102,0,0,0,1808,252,0,818,0,0,...,0,0,37,338,0,0,0,0,25,0,0,0,0,0,4355,0,159,0,277,0,0,0,0,19648,toyota
3,90004,227,0,32,0,0,0,0,563,0,0,0,0,1075,0,37,0,0,0,680,11,0,220,0,0,...,0,0,0,238,0,0,0,0,422,0,0,200,0,0,5250,0,179,0,678,114,0,0,0,21021,toyota
4,90005,62,0,0,0,0,0,0,241,0,0,0,0,759,0,0,0,0,0,260,0,0,52,0,0,...,0,0,0,39,0,0,0,0,192,0,0,58,0,0,2638,0,108,0,320,0,0,0,0,10873,toyota
5,90006,62,0,0,0,0,0,0,113,0,0,0,0,564,0,0,0,0,0,664,12,0,203,0,0,...,0,0,0,178,0,0,0,0,119,0,0,26,0,0,4477,0,131,0,320,28,0,0,0,15710,toyota
6,90007,0,0,0,0,0,0,0,182,0,0,0,0,414,0,0,0,0,0,509,14,0,153,0,0,...,0,0,0,57,0,0,0,0,90,0,0,27,0,0,2206,0,102,0,287,0,0,0,0,9051,toyota
7,90008,36,0,0,0,0,0,0,200,0,0,0,0,499,49,171,0,0,0,785,159,0,273,0,0,...,0,0,0,14,0,0,0,0,91,0,0,54,0,0,1801,0,55,0,328,25,0,0,0,10786,toyota
8,90010,0,0,0,0,0,0,0,81,0,0,0,0,317,0,77,0,0,0,60,0,0,60,0,0,...,0,0,0,0,0,0,0,0,17,0,0,39,0,0,1455,0,364,0,12,15,0,0,0,4959,toyota
9,90011,171,0,0,0,0,0,0,101,0,0,0,0,416,0,127,0,0,0,2643,267,0,1071,0,0,...,0,0,49,416,0,0,0,0,115,0,0,0,0,0,5994,0,195,0,394,11,0,0,0,27191,toyota


In [52]:
make_zip_top_ten = pd.DataFrame(make_zip[['zip','toyota','honda','ford','chevrolet','nissan',\
                               'bmw','hyundai','lexus','mercedes_benz','kia', 'total']])

In [53]:
make_zip_top_ten.to_csv('output/make_zip_top_ten.csv')

In [54]:
make_zip_top_ten.head()

make,zip,toyota,honda,ford,chevrolet,nissan,bmw,hyundai,lexus,mercedes_benz,kia,total
0,90001,3306,2471,1032,1670,2564,344,398,170,169,1034,16031
1,90002,3009,2149,926,1561,2447,240,371,176,174,1113,14720
2,90003,4355,2978,1179,1808,3106,332,433,247,303,1408,19648
3,90004,5250,3144,774,680,1812,1075,778,1176,889,686,21021
4,90005,2638,1769,341,260,877,759,610,822,640,401,10873


---

## Isolate vehicle makes to include only Teslas

In [55]:
tesla = vehicles[(vehicles['make'] == 'TESLA')]

In [56]:
len(tesla)

3212

### Group by ZIP code and count the vehicles

In [57]:
tesla_grouped = tesla.groupby(['zip']).agg({'vehicles':'sum'}).reset_index()

### Merge with dataframe that includes all vehicle counts by ZIP code

In [58]:
tesla_zips = pd.merge(tesla_grouped, zips, on='zip')

### Rename the columns

In [59]:
tesla_zips.rename(columns={'median': 'income','zip': 'zip', \
                           'vehicles_x':'teslas', 'vehicles_y':'all_vehicles'}, inplace=True)

### Normalize Tesla ownership to a rate per 1,000 vehicles

In [60]:
tesla_zips['tesla_rate_1k'] = ((tesla_zips.teslas / tesla_zips.all_vehicles) * 1000).round(2)

In [61]:
tesla_zips.sort_values(by='tesla_rate_1k', ascending=False).head(10)

Unnamed: 0,zip,teslas,name,avg_house_income,avg_net_worth,population,all_vehicles,tesla_rate_1k
563,94027,527,Atherton,297732,3680197,7230,5156,102.21
560,94022,1227,Los Altos,248970,3041961,20626,13820,88.78
608,94301,764,Palo Alto,224861,1760729,18694,9376,81.48
561,94024,1210,Los Altos,259185,3442702,23198,14929,81.05
744,95070,1586,Saratoga,243915,3157204,32730,21471,73.87
564,94028,358,Portola Valley,263238,3322715,6639,4854,73.75
416,92657,573,Newport Coast,244565,2836260,11464,8343,68.68
640,94539,1931,Fremont,198198,2070851,54673,31788,60.75
731,95030,568,Los Gatos,233429,2476366,13618,9373,60.6
772,95138,710,San Jose,181584,1376395,19789,11943,59.45


In [62]:
tesla_breaks \
= jenkspy.jenks_breaks(tesla_zips.tesla_rate_1k, nb_class=6)
tesla_breaks

[0.33, 5.64, 12.65, 23.47, 39.12, 60.75, 102.21]

In [63]:
tesla_zips.to_csv('output/tesla_zips.csv')

In [64]:
tesla_zips_slim = pd.DataFrame(tesla_zips[['zip','name','teslas','all_vehicles','tesla_rate_1k', 'avg_house_income', 'avg_net_worth']])

In [65]:
tesla_zips_slim.sort_values(by='tesla_rate_1k', ascending=False).head(10)

Unnamed: 0,zip,name,teslas,all_vehicles,tesla_rate_1k,avg_house_income,avg_net_worth
563,94027,Atherton,527,5156,102.21,297732,3680197
560,94022,Los Altos,1227,13820,88.78,248970,3041961
608,94301,Palo Alto,764,9376,81.48,224861,1760729
561,94024,Los Altos,1210,14929,81.05,259185,3442702
744,95070,Saratoga,1586,21471,73.87,243915,3157204
564,94028,Portola Valley,358,4854,73.75,263238,3322715
416,92657,Newport Coast,573,8343,68.68,244565,2836260
640,94539,Fremont,1931,31788,60.75,198198,2070851
731,95030,Los Gatos,568,9373,60.6,233429,2476366
772,95138,San Jose,710,11943,59.45,181584,1376395


### How predictive is income  

In [66]:
tesla_zips.head()

Unnamed: 0,zip,teslas,name,avg_house_income,avg_net_worth,population,all_vehicles,tesla_rate_1k
0,90004,200,Los Angeles,65893,232653,63987,22734,8.8
1,90005,58,Los Angeles,53191,105569,41844,12518,4.63
2,90006,26,Los Angeles,42242,39289,61184,17324,1.5
3,90007,27,Los Angeles,38564,39966,45381,10707,2.52
4,90008,54,Los Angeles,56286,284531,34600,12611,4.28


In [67]:
tesla_zips_corr = tesla_zips_slim[['tesla_rate_1k', 'avg_house_income']]

In [68]:
corr = tesla_zips_corr.corr(method ='pearson')

In [69]:
print(corr)

                  tesla_rate_1k  avg_house_income
tesla_rate_1k              1.00              0.83
avg_house_income           0.83              1.00


In [70]:
alt.Chart(tesla_zips_slim).mark_circle(size=60).encode(
    x=alt.X('tesla_rate_1k:Q', title='Tesla rate per 1,000 vehicles', axis=alt.Axis(tickCount=6)),
    y=alt.Y('avg_net_worth:Q', title='Avg net worth', axis=alt.Axis(tickCount=5, format='$,N')),
    tooltip=['zip:N', 'teslas:O', 'avg_net_worth:Q', 'tesla_rate_1k:Q']
).properties(width=500, height=500)

In [71]:
tesla_zips_slim.to_csv('output/tesla_zips_slim.csv')

---

### Group by model year. Which years have the most vehicles? 

In [72]:
model_year = vehicles.groupby(['model_year']).agg('sum').reset_index()

In [73]:
model_year.sort_values(by='model_year', ascending=False)

Unnamed: 0,model_year,vehicles
15,Unk,2966
14,2021,45
13,2020,278197
12,2019,1883333
11,2018,2106886
10,2017,2131800
9,2016,1980056
8,2015,1977629
7,2014,1646052
6,2013,1601310


In [74]:
model_year_chart = alt.Chart(model_year).mark_bar().encode(
).mark_bar().encode(
    y=alt.Y('model_year:N', title=" ", axis=alt.Axis(format='', tickCount=5)),
    x=alt.X("vehicles:Q", title="Vehicles", axis=alt.Axis(format='', tickCount=5))
)

model_year_chart_text = model_year_chart.mark_text(
    align='left',
    baseline='middle',
    dx=5
).encode(text=alt.Text('vehicles:Q', format=',')
)

(model_year_chart + model_year_chart_text).properties(height=500, \
                                                      width=700, title='California vehicles, by model year')

### Export model year table for graphics

In [75]:
model_year.to_csv('output/model_year.csv')

---

## Fuel types

### Which types of alternative fuel models are most common?

In [76]:
fuel = vehicles.groupby(['fuel']).agg('sum').reset_index()

In [77]:
fuel.head(9)

Unnamed: 0,fuel,vehicles
0,Battery Electric,306297
1,Diesel and Diesel Hybrid,804002
2,Flex-Fuel,1033910
3,Gasoline,16852351
4,Hybrid Gasoline,1026857
5,Hydrogen Fuel Cell,6638
6,Natural Gas,21096
7,Other,2724
8,Plug-in Hybrid,248388


In [78]:
fuel.fuel.tolist()

['Battery Electric',
 'Diesel and Diesel Hybrid',
 'Flex-Fuel',
 'Gasoline',
 'Hybrid Gasoline',
 'Hydrogen Fuel Cell',
 'Natural Gas',
 'Other',
 'Plug-in Hybrid']

In [79]:
# Diesel and Diesel Hybrid + Flex-Fuel + Gasoline + Hybrid Gasoline
'{:,.0f}'.format((fuel.iloc[1,1] + fuel.iloc[2,1] + fuel.iloc[3,1] + fuel.iloc[4,1]))

'19,717,120'

# Chart the fuel type counts

In [80]:
chart_fuels = alt.Chart(fuel).mark_bar().encode(
    y=alt.Y('fuel:N', title=' ',
        sort=alt.EncodingSortField(
            field="vehicles",  # The field to use for the sort
            op="sum",  # The operation to run on the field prior to sorting
            order="descending"  # The order to sort in
        ), axis=alt.Axis(format='', tickCount=5)),
    x=alt.X("sum(vehicles):Q", title="Vehicles", axis=alt.Axis(format='', tickCount=5))
)

chart_fuels_text = chart_fuels.mark_text(
    align='left',
    baseline='middle',
    dx=3  # Nudges text to right so it doesn't appear on top of the bar
).encode(text=alt.Text('sum(vehicles)', format=',')
)


(chart_fuels + chart_fuels_text)\
.properties(height=400, width=500, title='CA alternative fuel vehicles by type')

### Export fuel type count table for graphics

In [81]:
fuel.to_csv('output/fuel.csv')

--- 

## Where are these vehicles?

In [82]:
zip_code = vehicles.groupby(['zip']).agg('sum').reset_index()

In [83]:
zip_code_zev = zev.groupby(['zip']).agg('sum').reset_index()

### Which ZIP codes have the most alternative fuel vehicles? (Airport areas, it seems)

In [84]:
zip_code_zev.sort_values(by='vehicles',\
    ascending=False).head(10)

Unnamed: 0,zip,vehicles
1991,OOS,3706
1225,94539,2902
1381,95014,2596
1250,94568,2197
1262,94582,2098
1418,95070,2085
1224,94538,2035
1394,95035,1872
508,92130,1862
1138,94087,1860


### Pivot on ZIP code and widen out the dataframe to count vehicle types across them

In [85]:
sum_by_zip = pd.pivot_table(vehicles, values='vehicles', \
                            index=['zip'], columns=['fuel'], aggfunc=np.sum, fill_value=0).reset_index()

sum_by_zip.columns = sum_by_zip.columns.str.strip().str.lower().str.replace(' ', '_', regex=False)\
                    .str.replace('(', '', regex=False).str.replace(')', '', regex=False).str.replace('-','_', regex=False)

### Which type is most common in each ZIP code? 

In [86]:
sum_by_zip["total"] = sum_by_zip.sum(axis=1)

  sum_by_zip["total"] = sum_by_zip.sum(axis=1)


In [87]:
sum_by_zip.sort_values(by='total', ascending=False).head(5)

fuel,zip,battery_electric,diesel_and_diesel_hybrid,flex_fuel,gasoline,hybrid_gasoline,hydrogen_fuel_cell,natural_gas,other,plug_in_hybrid,total
4771,OOS,3706,47674,62755,447534,18479,72,1182,73,2167,583642
45,90045,573,580,9879,104244,4717,25,154,1,468,120641
2393,94080,538,1366,5482,96815,3912,7,32,0,329,108481
1269,92336,370,2061,2448,48045,1884,11,31,1,555,55406
1112,92154,173,4660,2134,42547,1366,0,8,0,175,51063


### Group the lesser-used fuel types into an 'other' category

In [88]:
sum_by_zip['other'] = sum_by_zip.apply\
    (lambda x: x['hydrogen_fuel_cell'] + x['natural_gas'] + x['other'], axis=1)
sum_by_zip.drop(['hydrogen_fuel_cell', 'natural_gas'], axis=1, inplace=True)
sum_by_zip.drop([0], inplace=True)

### Which non-gas vehicle is most popular — the 'winner' — in each zip?

In [89]:
sum_by_zip['winner'] = \
sum_by_zip[['hybrid_gasoline','battery_electric','diesel_and_diesel_hybrid',\
                   'flex_fuel','plug_in_hybrid', 'other']].idxmax(axis=1)

In [90]:
sum_by_zip.sort_values(by='total', ascending=False).head(10)

fuel,zip,battery_electric,diesel_and_diesel_hybrid,flex_fuel,gasoline,hybrid_gasoline,other,plug_in_hybrid,total,winner
4771,OOS,3706,47674,62755,447534,18479,1327,2167,583642,flex_fuel
45,90045,573,580,9879,104244,4717,180,468,120641,flex_fuel
2393,94080,538,1366,5482,96815,3912,39,329,108481,flex_fuel
1269,92336,370,2061,2448,48045,1884,43,555,55406,flex_fuel
1112,92154,173,4660,2134,42547,1366,8,175,51063,diesel_and_diesel_hybrid
792,91709,931,1170,1672,42517,2516,75,908,49789,hybrid_gasoline
329,90650,211,1129,2005,42688,1470,43,329,47875,flex_fuel
793,91710,415,2493,2617,38839,1628,907,539,47438,flex_fuel
1548,92683,524,697,1281,41328,2291,58,587,46766,hybrid_gasoline
619,91342,220,1799,2205,40115,1913,18,368,46638,flex_fuel


### De-slugify the 'winner' category

In [91]:
sum_by_zip['winner'] = sum_by_zip['winner'].str.replace('_', ' ', regex=False).str.capitalize()

In [92]:
sum_by_zip['altshare'] = (((sum_by_zip['total']-sum_by_zip['gasoline'])/sum_by_zip['total'])*100).round()

In [93]:
sum_by_zip['batteryshare'] = ((sum_by_zip['battery_electric']/sum_by_zip['total'])*100)

In [94]:
sum_by_zip.sort_values(by='batteryshare', ascending=False).head(10)

fuel,zip,battery_electric,diesel_and_diesel_hybrid,flex_fuel,gasoline,hybrid_gasoline,other,plug_in_hybrid,total,winner,altshare,batteryshare
2332,94007,1,0,0,0,0,0,0,1,Battery electric,100.0,100.0
4504,98456,1,0,0,0,0,0,0,1,Battery electric,100.0,100.0
100,90106,1,0,0,0,0,0,0,1,Battery electric,100.0,100.0
2321,93965,1,0,0,0,0,0,0,1,Battery electric,100.0,100.0
119,90203,1,0,0,0,0,0,0,1,Battery electric,100.0,100.0
3006,95217,1,0,0,1,0,0,0,2,Battery electric,50.0,50.0
547,91217,1,0,0,1,0,0,0,2,Battery electric,50.0,50.0
2353,94031,1,0,0,1,0,0,0,2,Battery electric,50.0,50.0
2693,94656,1,0,0,1,0,0,0,2,Battery electric,50.0,50.0
2800,94944,1,0,0,1,0,0,0,2,Battery electric,50.0,50.0


### Use the "jenks" method to set fair breaks for total field

In [95]:
breaks \
= jenkspy.jenks_breaks(sum_by_zip.batteryshare, nb_class=7)

In [96]:
breaks

[0.0,
 1.0714285714285714,
 3.6193029490616624,
 9.129231156411224,
 20.0,
 33.33333333333333,
 50.0,
 100.0]

---

### Export merged polygon geodataframe as GeoJSON

In [97]:
zips_poly.to_file('../../../data/gis/zips_poly.geojson', driver='GeoJSON')

In [98]:
zips_poly_merged = zips_poly.merge(sum_by_zip, on='zip')

In [99]:
zips_poly_merged.columns

Index(['objectid', 'zip', 'po_name', 'pop2012', 'white', 'black', 'ameri_es',
       'asian', 'hawn_pi', 'hispanic', 'other_x', 'mult_race', 'med_age',
       'ave_hh_sz', 'marhh_chd', 'families', 'ave_fam_sz', 'hse_units', 'sqmi',
       'geometry', 'battery_electric', 'diesel_and_diesel_hybrid', 'flex_fuel',
       'gasoline', 'hybrid_gasoline', 'other_y', 'plug_in_hybrid', 'total',
       'winner', 'altshare', 'batteryshare'],
      dtype='object')

In [100]:
# zips_poly_merged_drop = ['objectid', 'zip', 'po_name', 'sqmi', 
#                         'battery_electric', 'diesel_and_diesel_hybrid', 'flex_fuel',
#                          'gasoline', 'hybrid_gasoline', 'other_y', 'plug_in_hybrid', 'total',
#                          'winner', 'altshare']
# zips_poly_merged.drop(zips_poly_merged_drop, inplace=True, axis=1)

In [101]:
zips_poly_merged.to_file('/Users/mhustiles/data/data/GIS/zips_poly_merged.geojson', driver='GeoJSON')

DriverError: Failed to create GeoJSON datasource: /Users/mhustiles/data/data/GIS/zips_poly_merged.geojson: /Users/mhustiles/data/data/GIS/zips_poly_merged.geojson: No such file or directory

### Export with Tesla totals

In [None]:
tesla_zips_poly_merged = zips_poly.merge(tesla_zips_slim, on='zip')

In [None]:
tesla_zips_poly_merged.to_file('/Users/mhustiles/data/github/notebooks/\
california-fuel-vehicles/output/tesla_zips.geojson', driver='GeoJSON')

In [None]:
tesla_zips_poly_merged.head()

### Merge ZIP points with Tesla & electric totals

In [None]:
teslas_poimt_merged = ca_zips_point.merge(tesla_zips_slim, on='zip')

In [None]:
teslas_poimt_merged.sort_values(by='tesla_rate_1k', ascending=False).head(10)

In [None]:
teslas_poimt_merged.to_file('output/teslas_point_merged.geojson', driver='GeoJSON')

### Merge ZIP points with all vehicle totals

In [None]:
ca_zips_point_merged = ca_zips_point.merge(sum_by_zip, on='zip')

In [None]:
ca_zips_point_merged.plot()

### Export merged points geodataframe as GeoJSON

In [None]:
ca_zips_point_merged.head(5)

In [None]:
ca_zips_point_merged.to_file('/Users/mhustiles/data/data/fuel/ca_zips_point_merged.geojson', driver='GeoJSON')

### Convert polygons to mbtiles for Mapbox. Export.

In [None]:
!tippecanoe --force -r1 -pk -pf -Z5 -z13 -o \
/Users/mhustiles/data/github/notebooks/california-fuel-vehicles/output/ca_zips_poly_merged.mbtiles \
/Users/mhustiles/data/github/notebooks/california-fuel-vehicles/output/ca_zips_poly_merged.geojson

### Convert points to mbtiles for Mapbox. Export.

In [None]:
!tippecanoe --force -r1 -pk -pf -Z5 -z13 -o \
/Users/mhustiles/data/github/notebooks/california-fuel-vehicles/output/ca_zips_point_merged.mbtiles \
/Users/mhustiles/data/github/notebooks/california-fuel-vehicles/output/ca_zips_point_merged.geojson

### Convert Tesla polygons to mbtiles for Mapbox. Export.

In [None]:
!tippecanoe --force -r1 -pk -pf -Z5 -z13 -o \
/Users/mhustiles/data/github/notebooks/california-fuel-vehicles/output/teslas_poly_merged.mbtiles \
/Users/mhustiles/data/github/notebooks/california-fuel-vehicles/output/teslas_poly_merged.geojson

---

## Waffle plot

In [None]:
fuel.head(9)

In [None]:
fuel.vehicles.sum()

In [None]:
fuel.iloc[0,1] + fuel.iloc[5,1] + fuel.iloc[6,1] + fuel.iloc[7,1]

In [None]:
fuel['per_1000'] =  ((fuel['vehicles'] / 18911128) * 1000).round(2)

In [None]:
fuel

In [None]:
fig = plt.figure(
    FigureClass=Waffle, 
    rows=21,
    values=fuel.per_1000,
    labels=list(fuel.fuel),
    figsize=(30,20),
    icons='car-side',
    colors=['#ec8431','#e6e6e6','#e6e6e6','#e6e6e6','#e6e6e6','#e6e6e6','#e6e6e6', '#e6e6e6', '#e6e6e6'],
    legend={
        'loc': 'lower left',
        'bbox_to_anchor': (0, -0.4),
        'ncol': len(fuel),
        'framealpha': 0,
        'fontsize': 0
    }
)

See related [Twitter thread](https://twitter.com/stiles/status/1193416749116358656)