In [1]:
from pathlib import Path
import datetime as dt
import pandas as pd

In [2]:
# Create a function to save the DataFrames to CSV files
def save_csv_file(df, file_path):
    """ 
    Save a DataFrame to a CSV file at the specified file path.

    Parameters:
    - df: DataFrame to save
    - file_path: Path to save the CSV file
    """
    
    # Check if the parent directory exists
    if not file_path.parent.exists():
        print(f"Error: The directory `{file_path.parent}` does not exist.")
        return
    
    if file_path.exists():
        print(f"File `{file_path.name}` already exists. Overwriting file.")
        file_path.unlink()
    
    # Save the DataFrame to the specified file path
    df.to_csv(file_path, index=False)
    print(f"File saved as `{file_path.name}`")

### Florida Main Dataset

In [3]:
# Load the main DataFrame that we will be merging the data into
fl_df = pd.read_csv(
    Path("../../../../../data/processed_data/fl_ev_main_dataset.csv"),
)

# Sort the DataFrame by year, state, and county
fl_df.sort_values(["year", "state", "county"], inplace=True)

print("Shape:", fl_df.shape)
fl_df

Shape: (268, 7)


Unnamed: 0,year,state,county,zip_codes,population,ev_registrations,median_income
0,2018,FL,Alachua County,"[32601, 32602, 32603, 32604, 32605, 32606, 326...",268851.0,766.0,49078
1,2018,FL,Baker County,"[32040, 32063, 32072, 32087]",28353.0,5.0,61769
2,2018,FL,Bay County,"[32401, 32402, 32403, 32404, 32405, 32406, 324...",186240.0,300.0,51829
3,2018,FL,Bradford County,"[32042, 32044, 32058, 32091, 32622]",27752.0,22.0,46197
4,2018,FL,Brevard County,"[32754, 32775, 32780, 32781, 32782, 32783, 327...",595203.0,1881.0,54359
...,...,...,...,...,...,...,...
263,2021,FL,Volusia County,"[32105, 32114, 32115, 32116, 32117, 32118, 321...",566368.0,1458.0,56786
264,2021,FL,Wakulla County,"[32326, 32327, 32346, 32355, 32358]",34270.0,38.0,72941
265,2021,FL,Walton County,"[32422, 32433, 32434, 32435, 32439, 32454, 324...",80021.0,314.0,68111
266,2021,FL,Washington County,"[32427, 32428, 32437, 32462, 32463]",24867.0,14.0,41806


### Florida EV Station Dataset

In [4]:
# Load the DataFrame with the number of EV stations in Florida
# Showing increases in the number of EV stations each year
fl_stations_df = pd.read_csv(
    Path("../../../../../data/processed_data/fl_evstation_progression.csv"),
)

# Create a new column for the state (to match the format of the main DataFrame)
fl_stations_df["state"] = "FL"

print("Shape:", fl_stations_df.shape)
fl_stations_df

Shape: (418, 5)


Unnamed: 0,year,county,ev_station_count,cumulative_ev_stations,state
0,2011,Broward County,3,3,FL
1,2011,Citrus County,1,1,FL
2,2011,Columbia County,1,1,FL
3,2011,Duval County,3,3,FL
4,2011,Hillsborough County,5,5,FL
...,...,...,...,...,...
413,2024,Sumter County,1,7,FL
414,2024,Suwannee County,1,2,FL
415,2024,Taylor County,1,3,FL
416,2024,Volusia County,20,103,FL


### Merge the two Datasets

In [5]:
fl_df_clean = fl_df.merge(
    fl_stations_df,
    how="left",
    left_on=["year", "state", "county"],
    right_on=["year", "state", "county"],
)

print("Shape:", fl_df_clean.shape)
fl_df_clean

Shape: (268, 9)


Unnamed: 0,year,state,county,zip_codes,population,ev_registrations,median_income,ev_station_count,cumulative_ev_stations
0,2018,FL,Alachua County,"[32601, 32602, 32603, 32604, 32605, 32606, 326...",268851.0,766.0,49078,4.0,12.0
1,2018,FL,Baker County,"[32040, 32063, 32072, 32087]",28353.0,5.0,61769,,
2,2018,FL,Bay County,"[32401, 32402, 32403, 32404, 32405, 32406, 324...",186240.0,300.0,51829,,
3,2018,FL,Bradford County,"[32042, 32044, 32058, 32091, 32622]",27752.0,22.0,46197,,
4,2018,FL,Brevard County,"[32754, 32775, 32780, 32781, 32782, 32783, 327...",595203.0,1881.0,54359,4.0,21.0
...,...,...,...,...,...,...,...,...,...
263,2021,FL,Volusia County,"[32105, 32114, 32115, 32116, 32117, 32118, 321...",566368.0,1458.0,56786,9.0,37.0
264,2021,FL,Wakulla County,"[32326, 32327, 32346, 32355, 32358]",34270.0,38.0,72941,,
265,2021,FL,Walton County,"[32422, 32433, 32434, 32435, 32439, 32454, 324...",80021.0,314.0,68111,1.0,10.0
266,2021,FL,Washington County,"[32427, 32428, 32437, 32462, 32463]",24867.0,14.0,41806,,


In [6]:
# Remove the `ev_station_count` column and keep the `cumulative_ev_station_count` column
fl_df_clean.drop(columns=["ev_station_count"], inplace=True)

print("Shape:", fl_df_clean.shape)
fl_df_clean

Shape: (268, 8)


Unnamed: 0,year,state,county,zip_codes,population,ev_registrations,median_income,cumulative_ev_stations
0,2018,FL,Alachua County,"[32601, 32602, 32603, 32604, 32605, 32606, 326...",268851.0,766.0,49078,12.0
1,2018,FL,Baker County,"[32040, 32063, 32072, 32087]",28353.0,5.0,61769,
2,2018,FL,Bay County,"[32401, 32402, 32403, 32404, 32405, 32406, 324...",186240.0,300.0,51829,
3,2018,FL,Bradford County,"[32042, 32044, 32058, 32091, 32622]",27752.0,22.0,46197,
4,2018,FL,Brevard County,"[32754, 32775, 32780, 32781, 32782, 32783, 327...",595203.0,1881.0,54359,21.0
...,...,...,...,...,...,...,...,...
263,2021,FL,Volusia County,"[32105, 32114, 32115, 32116, 32117, 32118, 321...",566368.0,1458.0,56786,37.0
264,2021,FL,Wakulla County,"[32326, 32327, 32346, 32355, 32358]",34270.0,38.0,72941,
265,2021,FL,Walton County,"[32422, 32433, 32434, 32435, 32439, 32454, 324...",80021.0,314.0,68111,10.0
266,2021,FL,Washington County,"[32427, 32428, 32437, 32462, 32463]",24867.0,14.0,41806,


In [7]:
# Check for missing values or NaNs
fl_df_clean.isnull().sum()

year                        0
state                       0
county                      4
zip_codes                   4
population                  4
ev_registrations            0
median_income               4
cumulative_ev_stations    133
dtype: int64

In [8]:
# Replace the NaNs in the `cumulative_ev_stations` column with 0
fl_df_clean["cumulative_ev_stations"] = fl_df_clean["cumulative_ev_stations"].fillna(0).infer_objects(copy=False)

# Confirm that there are no missing values or NaNs in the `cumulative_ev_stations` column
fl_df_clean.isnull().sum()

year                      0
state                     0
county                    4
zip_codes                 4
population                4
ev_registrations          0
median_income             4
cumulative_ev_stations    0
dtype: int64

In [9]:
# Inspect the Data Types
fl_df_clean.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 268 entries, 0 to 267
Data columns (total 8 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   year                    268 non-null    int64  
 1   state                   268 non-null    object 
 2   county                  264 non-null    object 
 3   zip_codes               264 non-null    object 
 4   population              264 non-null    float64
 5   ev_registrations        268 non-null    float64
 6   median_income           264 non-null    object 
 7   cumulative_ev_stations  268 non-null    float64
dtypes: float64(3), int64(1), object(4)
memory usage: 16.9+ KB


In [10]:
# Convert the `cumulative_ev_stations` and `ev_registrations` columns to integers
fl_df_clean = fl_df_clean.astype(
    {
        "cumulative_ev_stations": int,
        "ev_registrations": int,
    }
)

fl_df_clean

Unnamed: 0,year,state,county,zip_codes,population,ev_registrations,median_income,cumulative_ev_stations
0,2018,FL,Alachua County,"[32601, 32602, 32603, 32604, 32605, 32606, 326...",268851.0,766,49078,12
1,2018,FL,Baker County,"[32040, 32063, 32072, 32087]",28353.0,5,61769,0
2,2018,FL,Bay County,"[32401, 32402, 32403, 32404, 32405, 32406, 324...",186240.0,300,51829,0
3,2018,FL,Bradford County,"[32042, 32044, 32058, 32091, 32622]",27752.0,22,46197,0
4,2018,FL,Brevard County,"[32754, 32775, 32780, 32781, 32782, 32783, 327...",595203.0,1881,54359,21
...,...,...,...,...,...,...,...,...
263,2021,FL,Volusia County,"[32105, 32114, 32115, 32116, 32117, 32118, 321...",566368.0,1458,56786,37
264,2021,FL,Wakulla County,"[32326, 32327, 32346, 32355, 32358]",34270.0,38,72941,0
265,2021,FL,Walton County,"[32422, 32433, 32434, 32435, 32439, 32454, 324...",80021.0,314,68111,10
266,2021,FL,Washington County,"[32427, 32428, 32437, 32462, 32463]",24867.0,14,41806,0


In [11]:
# Reorder columns
columns_order = ["year", "state", "county", "zip_codes", "population",
                 "ev_registrations", "cumulative_ev_stations", "median_income"]

fl_df_clean = fl_df_clean[columns_order]

print("Shape:", fl_df_clean.shape)
fl_df_clean

Shape: (268, 8)


Unnamed: 0,year,state,county,zip_codes,population,ev_registrations,cumulative_ev_stations,median_income
0,2018,FL,Alachua County,"[32601, 32602, 32603, 32604, 32605, 32606, 326...",268851.0,766,12,49078
1,2018,FL,Baker County,"[32040, 32063, 32072, 32087]",28353.0,5,0,61769
2,2018,FL,Bay County,"[32401, 32402, 32403, 32404, 32405, 32406, 324...",186240.0,300,0,51829
3,2018,FL,Bradford County,"[32042, 32044, 32058, 32091, 32622]",27752.0,22,0,46197
4,2018,FL,Brevard County,"[32754, 32775, 32780, 32781, 32782, 32783, 327...",595203.0,1881,21,54359
...,...,...,...,...,...,...,...,...
263,2021,FL,Volusia County,"[32105, 32114, 32115, 32116, 32117, 32118, 321...",566368.0,1458,37,56786
264,2021,FL,Wakulla County,"[32326, 32327, 32346, 32355, 32358]",34270.0,38,0,72941
265,2021,FL,Walton County,"[32422, 32433, 32434, 32435, 32439, 32454, 324...",80021.0,314,10,68111
266,2021,FL,Washington County,"[32427, 32428, 32437, 32462, 32463]",24867.0,14,0,41806


In [13]:
# Save the DataFrame to a CSV file
file_name = "fl_ev_main_dataset.csv"
file_path = Path(f"../../../../../data/processed_data/{file_name}")
save_csv_file(fl_df_clean, file_path)

File saved as `fl_main_dataset.csv`
