# Driving Towards a Brighter Future: 
## EV Adaption Rates in California and its Effect on PM 2.5 Emissions



In [2]:
# install pip if not already downloaded
pip install openpyxl

SyntaxError: invalid syntax (373123575.py, line 2)

In [3]:
# import
import pandas as pd 

### Data Source 1: Cleaning and Manipulation 
#### Our primary datasource was the California Vehicle Population dataset, which contains information on number of vehicles registered in each count or zip code by make, model, and fuel type. This file was uploaded to Github and stored in vehicle_data folder. Manually inspecting the vehicle data reveals there are three work sheets, where 'County' shows the registered cars per each county in California. Since we are studying the effects by county, this is the dataset we are interested in. 

In [4]:
# Let's create df from csv file 
countyvehicle_df = pd.read_excel('vehicle_data/california_vehicle.xlsx',sheet_name="County")

In [5]:
## We've isolated the worksheet, now let's take a look at the shape ##
countyvehicle_df.shape

(33542, 7)

In [6]:
## We see there are 33542 rows and 7 columns, now let's see what that looks like ##
countyvehicle_df.head()

Unnamed: 0,Data Year,County,Dashboard Fuel Type Group,Fuel Type,Make,Model,Number of Vehicles
0,2010,Alameda,Battery Electric (BEV),Battery Electric (BEV),Ford,Ranger,3
1,2010,Alameda,Battery Electric (BEV),Battery Electric (BEV),Tesla,Roadster,17
2,2010,Alameda,Diesel,Diesel,,,10939
3,2010,Alameda,Gasoline,Flex Fuel,,,10974
4,2010,Alameda,Gasoline,Gasoline,,,840577


In [7]:
## We're not interested in the Make or Model of the car, so let's drop those values ##
countyvehicle_df = countyvehicle_df.drop(columns=['Make','Model'])
countyvehicle_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 33542 entries, 0 to 33541
Data columns (total 5 columns):
 #   Column                     Non-Null Count  Dtype 
---  ------                     --------------  ----- 
 0   Data Year                  33542 non-null  int64 
 1   County                     33542 non-null  object
 2   Dashboard Fuel Type Group  33542 non-null  object
 3   Fuel Type                  33542 non-null  object
 4   Number of Vehicles         33542 non-null  int64 
dtypes: int64(2), object(3)
memory usage: 1.3+ MB


In [8]:
# Let's see how many unique Counties there are in California
countyvehicle_df['County'].unique()

array(['Alameda', 'Alpine', 'Amador', 'Butte', 'Calaveras', 'Colusa',
       'Contra Costa', 'Del Norte', 'El Dorado', 'Fresno', 'Glenn',
       'Humboldt', 'Imperial', 'Inyo', 'Kern', 'Kings', 'Lake', 'Lassen',
       'Los Angeles', 'Madera', 'Marin', 'Mariposa', 'Mendocino',
       'Merced', 'Modoc', 'Mono', 'Monterey', 'Napa', 'Nevada', 'Orange',
       'Out Of State', 'Placer', 'Plumas', 'Riverside', 'Sacramento',
       'San Benito', 'San Bernardino', 'San Diego', 'San Francisco',
       'San Joaquin', 'San Luis Obispo', 'San Mateo', 'Santa Barbara',
       'Santa Clara', 'Santa Cruz', 'Shasta', 'Sierra', 'Siskiyou',
       'Solano', 'Sonoma', 'Stanislaus', 'Sutter', 'Tehama', 'Trinity',
       'Tulare', 'Tuolumne', 'Ventura', 'Yolo', 'Yuba', 'Out of State'],
      dtype=object)

In [9]:
# There are vehicles present that are from out of state. Let's see what years have vehicles from out of state 
for year in countyvehicle_df['Data Year'].unique():
    out_of_state_2023 = countyvehicle_df[(countyvehicle_df['Data Year'] == year) & 
                                         (countyvehicle_df['County'] == 'Out of State')]
    
    total_out_of_state_2023 = out_of_state_2023['Number of Vehicles'].sum()
    if total_out_of_state_2023 != 0:
        print(f"The total number of vehicles for 'Out of State' in {year} is: {total_out_of_state_2023}")

The total number of vehicles for 'Out of State' in 2021 is: 403531
The total number of vehicles for 'Out of State' in 2022 is: 409644


In [10]:
# Since out of state is only present for two years (2021-2022), we'll drop those values and note our assumption that they have a marginal impact. 
calcountyvehicle_df = countyvehicle_df[~countyvehicle_df['County'].isin(['Out of State'])]

In [11]:
# Next, let's take a look at the fuel types that are used. We're interested in impact of EVs and non-fossil fuel based cars. 
# There are two columns that cover the fuel type, 'Dashboard Fuel Type Group' and 'Fuel Type' that are fully populated (length 33542). 
# Let's take a look at the difference between the two.
fuel_types_1 = calcountyvehicle_df['Dashboard Fuel Type Group'].unique()
fuel_types_2 = calcountyvehicle_df['Fuel Type'].unique()

fuel_types_df = pd.DataFrame({
    'Fuel Type': pd.Series(fuel_types_2),
    'Dashboard Fuel Type Group': pd.Series(fuel_types_1)
})

fuel_types_df


Unnamed: 0,Fuel Type,Dashboard Fuel Type Group
0,Battery Electric (BEV),Battery Electric (BEV)
1,Diesel,Diesel
2,Flex Fuel,Gasoline
3,Gasoline,Gasoline Hybrid
4,Gasoline Hybrid,Other
5,Natural Gas,Fuel Cell (FCEV)
6,Propane,Plug-in Hybrid (PHEV)
7,Fuel Cell (FCEV),
8,Plug-in Hybrid (PHEV),


In [12]:
# It appears Dashboard Fuel Type Group goes ahead and clusters them together. 
# We're going to make our own the assumption and group together vehicles based on fuel type: 
# Diesel, Flex Fuel, Gasoline, Gasoline Hybrid, Natural Gas, and Propane will be treated as Fossil Fuels, while
# Battery Electric, Fuel Cell, and Plug-in Hybrid will be treated as EVs. 
fossil_fuels = ['Diesel', 'Flex Fuel', 'Gasoline', 'Gasoline Hybrid', 'Natural Gas', 'Propane']
evs = ['Battery Electric (BEV)', 'Fuel Cell (FCEV)', 'Plug-in Hybrid (PHEV)']
def categorize_fuel(fuel_type):
    if fuel_type in fossil_fuels:
        return 'Fossil Fuel'
    elif fuel_type in evs:
        return 'EV'
    else:
        return 'Other'
calcountyvehicle_df.loc[:, 'Fuel Category'] = calcountyvehicle_df['Fuel Type'].apply(categorize_fuel)
# Print the DataFrame to check the new column
calcountyvehicle_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  calcountyvehicle_df.loc[:, 'Fuel Category'] = calcountyvehicle_df['Fuel Type'].apply(categorize_fuel)


Unnamed: 0,Data Year,County,Dashboard Fuel Type Group,Fuel Type,Number of Vehicles,Fuel Category
0,2010,Alameda,Battery Electric (BEV),Battery Electric (BEV),3,EV
1,2010,Alameda,Battery Electric (BEV),Battery Electric (BEV),17,EV
2,2010,Alameda,Diesel,Diesel,10939,Fossil Fuel
3,2010,Alameda,Gasoline,Flex Fuel,10974,Fossil Fuel
4,2010,Alameda,Gasoline,Gasoline,840577,Fossil Fuel
