In [1]:
from pathlib import Path
import pandas as pd

In [2]:
# Load the data for States vehicle registrations and population
df = pd.read_csv(
    Path("../../../../data/processed_data/states_analysis.csv"),
)

# Display the data
print("Shape:", df.shape)
df

Shape: (357, 15)


Unnamed: 0,year,state,population,electric,phev,hev,biodiesel,ethanol,cng,propane,hydrogen,methanol,gasoline,diesel,unknown
0,2016,Alabama,4863525.0,500,900,29100,0,428300,20100,0,0,0,3777300,126500,53900
1,2016,Alaska,741456.0,200,200,5000,0,55700,4900,0,0,0,525900,44800,19400
2,2016,Arizona,6941072.0,4700,4400,89600,0,427300,17500,0,0,100,4805000,179500,112800
3,2016,Arkansas,2989918.0,200,500,19100,0,320500,12600,0,0,0,2097800,96800,22200
4,2016,California,39167117.0,141500,116700,966700,0,1322600,80600,0,1300,400,27241000,710400,115500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
352,2022,Virginia,8679099.0,56600,21700,198400,40000,496200,300,0,0,0,6643300,153700,31900
353,2022,Washington,7784477.0,104100,31400,270200,67500,348300,100,100,0,0,5650700,277400,52700
354,2022,West Virginia,1774035.0,1900,1400,18300,15600,127500,100,0,0,0,1267500,45700,10900
355,2022,Wisconsin,5890543.0,15700,10000,105200,46500,549700,300,0,0,0,4577400,144500,26900


### Calculate EV Adoption Rate

In [3]:
# Calculate the total number of vehicles in each state for each year
df['total_vehicles'] = df[['electric', 'phev', 'hev', 'biodiesel', 'ethanol', 'cng',
                           'propane', 'hydrogen', 'methanol', 'gasoline', 'diesel', 'unknown']].sum(axis=1)

# Calculate the adoption rate of electric vehicles in each state by dividing...
# the number of electric vehicles by the total number of vehicles in each state
df["ev_adoption_rate"] = (df["electric"] / df["total_vehicles"] )

# Calculate the adoption rate of gasoline vehicles in each state by dividing...
# the number of gasoline vehicles by the total number of vehicles in each state
df["gas_adoption_rate"] = (df["gasoline"] / df["total_vehicles"] )

print("Shape:", df.shape)
df

Shape: (357, 18)


Unnamed: 0,year,state,population,electric,phev,hev,biodiesel,ethanol,cng,propane,hydrogen,methanol,gasoline,diesel,unknown,total_vehicles,ev_adoption_rate,gas_adoption_rate
0,2016,Alabama,4863525.0,500,900,29100,0,428300,20100,0,0,0,3777300,126500,53900,4436600,0.000113,0.851395
1,2016,Alaska,741456.0,200,200,5000,0,55700,4900,0,0,0,525900,44800,19400,656100,0.000305,0.801555
2,2016,Arizona,6941072.0,4700,4400,89600,0,427300,17500,0,0,100,4805000,179500,112800,5640900,0.000833,0.851814
3,2016,Arkansas,2989918.0,200,500,19100,0,320500,12600,0,0,0,2097800,96800,22200,2569700,0.000078,0.816360
4,2016,California,39167117.0,141500,116700,966700,0,1322600,80600,0,1300,400,27241000,710400,115500,30696700,0.004610,0.887424
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
352,2022,Virginia,8679099.0,56600,21700,198400,40000,496200,300,0,0,0,6643300,153700,31900,7642100,0.007406,0.869303
353,2022,Washington,7784477.0,104100,31400,270200,67500,348300,100,100,0,0,5650700,277400,52700,6802500,0.015303,0.830680
354,2022,West Virginia,1774035.0,1900,1400,18300,15600,127500,100,0,0,0,1267500,45700,10900,1488900,0.001276,0.851300
355,2022,Wisconsin,5890543.0,15700,10000,105200,46500,549700,300,0,0,0,4577400,144500,26900,5476200,0.002867,0.835872


In [4]:
# Inspect the data types
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 357 entries, 0 to 356
Data columns (total 18 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   year               357 non-null    int64  
 1   state              357 non-null    object 
 2   population         350 non-null    float64
 3   electric           357 non-null    int64  
 4   phev               357 non-null    int64  
 5   hev                357 non-null    int64  
 6   biodiesel          357 non-null    int64  
 7   ethanol            357 non-null    int64  
 8   cng                357 non-null    int64  
 9   propane            357 non-null    int64  
 10  hydrogen           357 non-null    int64  
 11  methanol           357 non-null    int64  
 12  gasoline           357 non-null    int64  
 13  diesel             357 non-null    int64  
 14  unknown            357 non-null    int64  
 15  total_vehicles     357 non-null    int64  
 16  ev_adoption_rate   357 non

In [5]:
# Save the DataFrame to a CSV file
def save_csv_file(df, file_path):
    """ 
    Save a DataFrame to a CSV file at the specified file path.

    Parameters:
    - df: DataFrame to save
    - file_path: Path to save the CSV file
    """
    
    # Check if the parent directory exists
    if not file_path.parent.exists():
        print(f"Error: The directory `{file_path.parent}` does not exist.")
        return
    
    if file_path.exists():
        print(f"File `{file_path.name}` already exists. Overwriting file.")
        file_path.unlink()
    
    # Save the DataFrame to the specified file path
    df.to_csv(file_path, index=False)
    print(f"File saved as `{file_path.name}`")

In [6]:
# Save the DataFrame to a CSV file
file_name = "states_adoption_rates.csv"
file_path = Path(f"../../../../data/processed_data/{file_name}")
save_csv_file(df, file_path)

File `states_adoption_rates.csv` already exists. Overwriting file.
File saved as `states_adoption_rates.csv`
