In [1]:
from pathlib import Path
import datetime as dt
import pandas as pd

In [2]:
# Create a function to save the DataFrames to CSV files
def save_csv_file(df, file_path):
    """ 
    Save a DataFrame to a CSV file at the specified file path.

    Parameters:
    - df: DataFrame to save
    - file_path: Path to save the CSV file
    """
    
    # Check if the parent directory exists
    if not file_path.parent.exists():
        print(f"Error: The directory `{file_path.parent}` does not exist.")
        return
    
    if file_path.exists():
        print(f"File `{file_path.name}` already exists. Overwriting file.")
        file_path.unlink()
    
    # Save the DataFrame to the specified file path
    df.to_csv(file_path, index=False)
    print(f"File saved as `{file_path.name}`")

### Texas EV Registrations DataFrame (Clean)

In [3]:
#  Load the EV registrations data for Florida
tx_ev_df = pd.read_csv(
    Path("../../../../../data/processed_data/tx_ev_registrations.csv"),
    parse_dates=["registration_date"],
)

#  Sort the data by registration date
tx_ev_df.sort_values("registration_date", inplace=True)

print("Shape:", tx_ev_df.shape)
tx_ev_df

Shape: (2274866, 7)


Unnamed: 0,registration_date,state,county,zip_code,make,model,year
619961,2017-07-01,TX,Travis County,78721,NISSAN,LEAF,2013
345572,2017-07-01,TX,Harris County,77008,CHEVROLET,VOLT,2012
1936284,2017-07-01,TX,Comal County,78130,NISSAN,LEAF,2015
2094853,2017-07-01,TX,Dallas County,75254,CHEVROLET,VOLT,2017
847936,2017-07-01,TX,Hidalgo County,78501,TESLA,MODEL X,2017
...,...,...,...,...,...,...,...
118603,2024-07-01,TX,Travis County,78739,TESLA,MODEL 3,2019
1411725,2024-07-01,TX,Denton County,75007,TESLA,MODEL Y,2023
115202,2024-07-01,TX,Denton County,76226,TESLA,MODEL 3,2023
922701,2024-07-01,TX,Nueces County,78405,FORD,FOCUS,2017


In [4]:
select_columns = ["registration_date", "state", "county", "zip_code"]

tx_ev_df = tx_ev_df[select_columns]

print("Shape:", tx_ev_df.shape)
tx_ev_df

Shape: (2274866, 4)


Unnamed: 0,registration_date,state,county,zip_code
619961,2017-07-01,TX,Travis County,78721
345572,2017-07-01,TX,Harris County,77008
1936284,2017-07-01,TX,Comal County,78130
2094853,2017-07-01,TX,Dallas County,75254
847936,2017-07-01,TX,Hidalgo County,78501
...,...,...,...,...
118603,2024-07-01,TX,Travis County,78739
1411725,2024-07-01,TX,Denton County,75007
115202,2024-07-01,TX,Denton County,76226
922701,2024-07-01,TX,Nueces County,78405


In [5]:
# Inspect the data types of the EV registrations DataFrame
tx_ev_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2274866 entries, 619961 to 74877
Data columns (total 4 columns):
 #   Column             Dtype         
---  ------             -----         
 0   registration_date  datetime64[ns]
 1   state              object        
 2   county             object        
 3   zip_code           object        
dtypes: datetime64[ns](1), object(3)
memory usage: 86.8+ MB


In [6]:
# Check for missing values 
# `zip_codes` has missing values
# `county` has missing values marked as "Unknown" so it is not seen as missing
tx_ev_df.isnull().sum()

registration_date       0
state                   0
county               4686
zip_code                0
dtype: int64

In [7]:
filter_rows = tx_ev_df["county"].isnull()

tx_ev_df.loc[filter_rows]

Unnamed: 0,registration_date,state,county,zip_code
2269181,2017-07-01,TX,,Error
1626947,2017-07-01,TX,,Error
474055,2017-08-01,TX,,Error
1944968,2017-08-01,TX,,Error
1964946,2017-08-01,TX,,Error
...,...,...,...,...
555978,2024-06-01,TX,,Error
1840462,2024-06-01,TX,,Error
338853,2024-06-01,TX,,Error
1498072,2024-06-01,TX,,Error


### Texas Population DataFrame (Clean)

In [8]:
# Load the population data for Florida
tx_pop_df = pd.read_csv(
    Path("../../../../../data/processed_data/tx_population.csv"),
    parse_dates=["date"],
)

# Sort the data by date
tx_pop_df.sort_values("date", inplace=True)

# Rename the `date` column to `year`
tx_pop_df.rename(columns={"date": "year"}, inplace=True)

print("Shape:", tx_pop_df.shape)
tx_pop_df

Shape: (3811, 5)


Unnamed: 0,year,state,county,zip_codes,population
0,2010-01-01,TX,Anderson County,"[75763, 75779, 75801, 75802, 75803, 75832, 758...",58493
161,2010-01-01,TX,McMullen County,"[78007, 78072]",711
162,2010-01-01,TX,Medina County,"[78009, 78016, 78039, 78056, 78059, 78066, 788...",46114
163,2010-01-01,TX,Menard County,"[76841, 76848, 76859]",2230
164,2010-01-01,TX,Midland County,"[79701, 79702, 79703, 79704, 79705, 79706, 797...",136974
...,...,...,...,...,...
3646,2024-01-01,TX,Grayson County,"[75092, 75090, 75020, 76273, 75495, 75021, 750...",141272
3647,2024-01-01,TX,Gregg County,"[75605, 75604, 75662, 75601, 75647, 75603, 75693]",130580
3648,2024-01-01,TX,Grimes County,"[77868, 77363, 77831, 77861, 77830, 77876]",29742
3682,2024-01-01,TX,Jones County,"[79601, 79553, 79525, 79501, 79520, 79503]",37863


In [9]:
# Inspect the data types of the population DataFrame
tx_pop_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3811 entries, 0 to 3810
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   year        3811 non-null   datetime64[ns]
 1   state       3811 non-null   object        
 2   county      3811 non-null   object        
 3   zip_codes   3811 non-null   object        
 4   population  3811 non-null   int64         
dtypes: datetime64[ns](1), int64(1), object(3)
memory usage: 178.6+ KB


In [10]:
# Inspect the missing values in the population DataFrame
tx_pop_df.isnull().sum()

year          0
state         0
county        0
zip_codes     0
population    0
dtype: int64

### Compare values we will merge with on both DataFrames and ensure it matches

In [11]:
# Inspect both County Values in both DataFrames
# To ensure that the county names are the same in both DataFrames, we will compare the unique county values in each DataFrame.
main_df_counties = set(tx_ev_df['county'])
population_df_counties = set(tx_pop_df['county']) 

# Check for counties in the main_df but not in the population_df
print("Counties in main_df but not in population_df:", main_df_counties.difference(population_df_counties))
print("Counties in population_df but not in main_df:", population_df_counties.difference(main_df_counties))

Counties in main_df but not in population_df: {nan, 'Mclennan County'}
Counties in population_df but not in main_df: {'Hartley County', 'Reagan County', 'Crane County', 'Terrell County', 'Kenedy County', 'Kent County', 'McMullen County', 'Otero County', 'Glasscock County', 'Motley County', 'Schleicher County', 'Cottle County'}


#### Inspect `county` column value of `Mclennan County` on both DataFrames

In [12]:
# Check if the county names are the same in both DataFrames

# Inspect first the value `McLennan County` in the `tx_ev_df` DataFrame
main_county_name = "Mclennan County"
filter_rows = tx_ev_df["county"] == main_county_name

# Display the rows where the county is 'Mclennan County'
tx_ev_df.loc[filter_rows].head(2)

Unnamed: 0,registration_date,state,county,zip_code
2273851,2023-08-01,TX,Mclennan County,76702
2273854,2023-08-01,TX,Mclennan County,76702


In [13]:
# Inspect second the value `McLennan County` in the `tx_pop_df` DataFrame
# Value has a capital `L` in the `tx_pop_df` DataFrame
pop_county_name = "McLennan County"
filter_rows = tx_ev_df["county"] == pop_county_name

tx_pop_df.loc[filter_rows].head(2)

Unnamed: 0,year,state,county,zip_codes,population
180,2010-01-01,TX,Orange County,"[77611, 77626, 77630, 77631, 77632, 77639, 776...",82012
232,2010-01-01,TX,Val Verde County,"[78837, 78840, 78841, 78842, 78843, 78847, 78871]",48971


In [14]:
# Change the value of 'Mclennan County' to 'McLennan County' in the main DataFrame using .loc
tx_ev_df.loc[tx_ev_df["county"] == main_county_name, "county"] = pop_county_name

# Confirm changes have been made to the main DataFrame
tx_ev_df.loc[tx_ev_df["county"] == pop_county_name].head(2)

Unnamed: 0,registration_date,state,county,zip_code
1103269,2017-07-01,TX,McLennan County,76712
474034,2017-07-01,TX,McLennan County,76710


In [15]:
# Create a new `year` column in the `tx_ev_df` DataFrame
tx_ev_df = tx_ev_df.copy()
tx_ev_df["year"] = tx_ev_df.loc[:, "registration_date"].dt.year

print("Shape:", tx_ev_df.shape)
tx_ev_df

Shape: (2274866, 5)


Unnamed: 0,registration_date,state,county,zip_code,year
619961,2017-07-01,TX,Travis County,78721,2017
345572,2017-07-01,TX,Harris County,77008,2017
1936284,2017-07-01,TX,Comal County,78130,2017
2094853,2017-07-01,TX,Dallas County,75254,2017
847936,2017-07-01,TX,Hidalgo County,78501,2017
...,...,...,...,...,...
118603,2024-07-01,TX,Travis County,78739,2024
1411725,2024-07-01,TX,Denton County,75007,2024
115202,2024-07-01,TX,Denton County,76226,2024
922701,2024-07-01,TX,Nueces County,78405,2024


### Prepare EV Registration DataFrame for Merge

In [16]:
# Group the EV registrations DataFrame by `year`, `state`, `county`, and `registration_date`...
# to get the total number of registrations per specific dates grouped by year, state, and county
# Create column for the total number of registrations called `registrations`
tx_ev_df_group = tx_ev_df.groupby(['year', 'state', 'county', "registration_date"], dropna=False).size().reset_index(name='ev_registrations')

print("Shape:", tx_ev_df_group.shape)
tx_ev_df_group.sort_values(by=["county", "registration_date", "year"])

Shape: (10620, 5)


Unnamed: 0,year,state,county,registration_date,ev_registrations
0,2017,TX,Anderson County,2017-07-01,1
391,2018,TX,Anderson County,2018-03-01,1
392,2018,TX,Anderson County,2018-05-01,1
393,2018,TX,Anderson County,2018-07-01,4
394,2018,TX,Anderson County,2018-09-01,3
...,...,...,...,...,...
10615,2024,TX,,2024-02-01,82
10616,2024,TX,,2024-03-01,112
10617,2024,TX,,2024-04-01,75
10618,2024,TX,,2024-05-01,53


In [17]:
# Check for missing values in the grouped DataFrame
tx_ev_df_group.isnull().sum()

year                  0
state                 0
county               84
registration_date     0
ev_registrations      0
dtype: int64

In [18]:
# Inspect to see if Unknown values are kept after grouping
# Important to keep as they have valuable information on registration counts
tx_ev_df_group.loc[tx_ev_df_group["county"].isna(), :]

Unnamed: 0,year,state,county,registration_date,ev_registrations
385,2017,TX,,2017-07-01,2
386,2017,TX,,2017-08-01,3
387,2017,TX,,2017-09-01,7
388,2017,TX,,2017-10-01,5
389,2017,TX,,2017-11-01,1
...,...,...,...,...,...
10615,2024,TX,,2024-02-01,82
10616,2024,TX,,2024-03-01,112
10617,2024,TX,,2024-04-01,75
10618,2024,TX,,2024-05-01,53


### Prepare Population DataFrame for Merge

In [19]:
# Change the value of the year from DateTime to int ready for merging
tx_pop_df['year'] = tx_pop_df['year'].dt.year

print("Shape:", tx_pop_df.shape)
tx_pop_df

Shape: (3811, 5)


Unnamed: 0,year,state,county,zip_codes,population
0,2010,TX,Anderson County,"[75763, 75779, 75801, 75802, 75803, 75832, 758...",58493
161,2010,TX,McMullen County,"[78007, 78072]",711
162,2010,TX,Medina County,"[78009, 78016, 78039, 78056, 78059, 78066, 788...",46114
163,2010,TX,Menard County,"[76841, 76848, 76859]",2230
164,2010,TX,Midland County,"[79701, 79702, 79703, 79704, 79705, 79706, 797...",136974
...,...,...,...,...,...
3646,2024,TX,Grayson County,"[75092, 75090, 75020, 76273, 75495, 75021, 750...",141272
3647,2024,TX,Gregg County,"[75605, 75604, 75662, 75601, 75647, 75603, 75693]",130580
3648,2024,TX,Grimes County,"[77868, 77363, 77831, 77861, 77830, 77876]",29742
3682,2024,TX,Jones County,"[79601, 79553, 79525, 79501, 79520, 79503]",37863


In [20]:
# Create a list of unique years in the EV registrations DataFrame
# This will be used to filter the population DataFrame
# and keep only the years that are present in the EV registrations DataFrame

select_years = tx_ev_df_group['year'].unique()
select_years

array([2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024])

In [21]:
# Filter the population DataFrame to keep only the years present in the EV registrations DataFrame
filter_years = tx_pop_df['year'].isin(select_years)

# Create a new DataFrame with the filtered population data
tx_pop_filtered = tx_pop_df.loc[filter_years, :]

# Reset the index of the filtered population DataFrame
tx_pop_filtered.reset_index(drop=True, inplace=True)

tx_pop_filtered

Unnamed: 0,year,state,county,zip_codes,population
0,2017,TX,Nacogdoches County,"[75760, 75788, 75937, 75943, 75944, 75946, 759...",65317
1,2017,TX,Motley County,"[79234, 79244, 79256]",1226
2,2017,TX,Morris County,"[75568, 75571, 75636, 75638, 75668]",12375
3,2017,TX,Moore County,"[79013, 79029, 79086, 79105]",21604
4,2017,TX,Montgomery County,"[77301, 77302, 77303, 77304, 77305, 77306, 773...",571615
...,...,...,...,...,...
2028,2024,TX,Grayson County,"[75092, 75090, 75020, 76273, 75495, 75021, 750...",141272
2029,2024,TX,Gregg County,"[75605, 75604, 75662, 75601, 75647, 75603, 75693]",130580
2030,2024,TX,Grimes County,"[77868, 77363, 77831, 77861, 77830, 77876]",29742
2031,2024,TX,Jones County,"[79601, 79553, 79525, 79501, 79520, 79503]",37863


### Merge the Population DataFrame with EV Registration DataFrame

In [22]:
# Merge the filtered population DataFrame with the EV registrations DataFrame
# This will allow us to see the adoption rate of EVs per county in Florida
# We will see the number of registrations per county and the population per county
tx_ev_adoption_df = pd.merge(tx_pop_filtered, tx_ev_df_group, how='outer', on=['year', 'state', 'county'])

tx_ev_adoption_df.sort_values(['year', 'state', 'county'], inplace=True)

print("Shape:", tx_ev_adoption_df.shape)
tx_ev_adoption_df

Shape: (11031, 7)


Unnamed: 0,year,state,county,zip_codes,population,registration_date,ev_registrations
0,2017,TX,Anderson County,"[75763, 75779, 75801, 75802, 75803, 75832, 758...",58175.0,2017-07-01,1.0
1,2017,TX,Andrews County,[79714],17603.0,2017-12-01,1.0
2,2017,TX,Angelina County,"[75901, 75902, 75903, 75904, 75915, 75941, 759...",87572.0,2017-08-01,1.0
3,2017,TX,Angelina County,"[75901, 75902, 75903, 75904, 75915, 75941, 759...",87572.0,2017-09-01,1.0
4,2017,TX,Aransas County,"[78358, 78381, 78382]",25392.0,2017-08-01,1.0
...,...,...,...,...,...,...,...
11026,2024,TX,,,,2024-02-01,82.0
11027,2024,TX,,,,2024-03-01,112.0
11028,2024,TX,,,,2024-04-01,75.0
11029,2024,TX,,,,2024-05-01,53.0


In [23]:
# Check null values after merging to keep the necessary `nan` values
# and remove the unnecessary `nan` values
tx_ev_adoption_df.isnull().sum()

year                   0
state                  0
county                84
zip_codes             84
population            84
registration_date    411
ev_registrations     411
dtype: int64

In [24]:
tx_ev_adoption_df.loc[tx_ev_adoption_df["ev_registrations"].isnull(), :]

Unnamed: 0,year,state,county,zip_codes,population,registration_date,ev_registrations
7,2017,TX,Armstrong County,"[79019, 79094]",1867.0,NaT,
13,2017,TX,Bailey County,"[79320, 79324, 79344, 79347]",7066.0,NaT,
21,2017,TX,Baylor County,[76380],3555.0,NaT,
36,2017,TX,Borden County,[79738],670.0,NaT,
37,2017,TX,Bosque County,"[76634, 76637, 76644, 76649, 76652, 76665, 766...",18295.0,NaT,
...,...,...,...,...,...,...,...
10816,2024,TX,Schleicher County,"[76936, 76935, 76841]",4381.0,NaT,
10822,2024,TX,Shackelford County,"[76430, 79533, 76464]",3111.0,NaT,
10852,2024,TX,Sterling County,[76951],1392.0,NaT,
10874,2024,TX,Terrell County,"[79848, 79781, 78851]",927.0,NaT,


In [25]:
tx_ev_adoption_df.loc[tx_ev_adoption_df["zip_codes"].isnull()]

Unnamed: 0,year,state,county,zip_codes,population,registration_date,ev_registrations
508,2017,TX,,,,2017-07-01,2.0
509,2017,TX,,,,2017-08-01,3.0
510,2017,TX,,,,2017-09-01,7.0
511,2017,TX,,,,2017-10-01,5.0
512,2017,TX,,,,2017-11-01,1.0
...,...,...,...,...,...,...,...
11026,2024,TX,,,,2024-02-01,82.0
11027,2024,TX,,,,2024-03-01,112.0
11028,2024,TX,,,,2024-04-01,75.0
11029,2024,TX,,,,2024-05-01,53.0


In [26]:
# Remove the unnecessary rows that have `nan` values 
# present in the `registrations` column...
tx_ev_adoption_df.dropna(subset=['ev_registrations'], inplace=True)

# Confirm the removal of the unnecessary rows
tx_ev_adoption_df.isna().sum()

year                  0
state                 0
county               84
zip_codes            84
population           84
registration_date     0
ev_registrations      0
dtype: int64

In [27]:
# Check the missing values in `zip_codes` column to see if they are necessary
# Confirmed that the missing values are necessary due to containing important numerical data for registrations
tx_ev_adoption_df.loc[tx_ev_adoption_df["zip_codes"].isnull(), :]

Unnamed: 0,year,state,county,zip_codes,population,registration_date,ev_registrations
508,2017,TX,,,,2017-07-01,2.0
509,2017,TX,,,,2017-08-01,3.0
510,2017,TX,,,,2017-09-01,7.0
511,2017,TX,,,,2017-10-01,5.0
512,2017,TX,,,,2017-11-01,1.0
...,...,...,...,...,...,...,...
11026,2024,TX,,,,2024-02-01,82.0
11027,2024,TX,,,,2024-03-01,112.0
11028,2024,TX,,,,2024-04-01,75.0
11029,2024,TX,,,,2024-05-01,53.0


In [28]:
# Sort the DataFrame by year, state, and county
tx_ev_adoption_df.sort_values(['year', 'state', 'county', 'registration_date'], inplace=True)

print("Shape:", tx_ev_adoption_df.shape)
tx_ev_adoption_df

Shape: (10620, 7)


Unnamed: 0,year,state,county,zip_codes,population,registration_date,ev_registrations
0,2017,TX,Anderson County,"[75763, 75779, 75801, 75802, 75803, 75832, 758...",58175.0,2017-07-01,1.0
1,2017,TX,Andrews County,[79714],17603.0,2017-12-01,1.0
2,2017,TX,Angelina County,"[75901, 75902, 75903, 75904, 75915, 75941, 759...",87572.0,2017-08-01,1.0
3,2017,TX,Angelina County,"[75901, 75902, 75903, 75904, 75915, 75941, 759...",87572.0,2017-09-01,1.0
4,2017,TX,Aransas County,"[78358, 78381, 78382]",25392.0,2017-08-01,1.0
...,...,...,...,...,...,...,...
11026,2024,TX,,,,2024-02-01,82.0
11027,2024,TX,,,,2024-03-01,112.0
11028,2024,TX,,,,2024-04-01,75.0
11029,2024,TX,,,,2024-05-01,53.0


In [29]:
# Inspect the data types of the DataFrame
tx_ev_adoption_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 10620 entries, 0 to 11030
Data columns (total 7 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   year               10620 non-null  int32         
 1   state              10620 non-null  object        
 2   county             10536 non-null  object        
 3   zip_codes          10536 non-null  object        
 4   population         10536 non-null  float64       
 5   registration_date  10620 non-null  datetime64[ns]
 6   ev_registrations   10620 non-null  float64       
dtypes: datetime64[ns](1), float64(2), int32(1), object(3)
memory usage: 622.3+ KB


### Group EV Adoption by Year

In [30]:
# Create a new DataFrame to calculate the EV adoption rate per year per county in Florida
# Do not want to overwrite the original DataFrame
tx_ev_adopt_year = tx_ev_adoption_df.copy()

# Group the DataFrame by `year` and `county` to get the total number of registrations per year per county
tx_ev_adopt_year['ev_registrations'] = tx_ev_adopt_year.groupby(['year', 'county'], dropna=False)['ev_registrations'].transform('sum')

# Remove duplicates in `year` and `county` to get the unique values after grouping
tx_ev_adopt_year.drop_duplicates(subset=['year', 'county'], inplace=True)

# Drop the unnecessary columns
tx_ev_adopt_year.drop(columns=['registration_date'], inplace=True)

print("Shape:", tx_ev_adopt_year.shape)
tx_ev_adopt_year

Shape: (1630, 6)


Unnamed: 0,year,state,county,zip_codes,population,ev_registrations
0,2017,TX,Anderson County,"[75763, 75779, 75801, 75802, 75803, 75832, 758...",58175.0,1.0
1,2017,TX,Andrews County,[79714],17603.0,1.0
2,2017,TX,Angelina County,"[75901, 75902, 75903, 75904, 75915, 75941, 759...",87572.0,2.0
4,2017,TX,Aransas County,"[78358, 78381, 78382]",25392.0,1.0
5,2017,TX,Archer County,"[76351, 76366, 76370, 76379, 76389]",8783.0,2.0
...,...,...,...,...,...,...
11014,2024,TX,Yoakum County,"[79355, 79376]",1685.0,6.0
11016,2024,TX,Young County,"[76450, 76374, 76460, 76481]",17309.0,43.0
11022,2024,TX,Zapata County,"[78076, 78067, 78564]",13896.0,6.0
11024,2024,TX,Zavala County,"[78839, 78872, 78829]",9700.0,3.0


In [31]:
tx_ev_adopt_year.isnull().sum()

year                0
state               0
county              8
zip_codes           8
population          8
ev_registrations    0
dtype: int64

In [32]:
tx_ev_adopt_year.loc[tx_ev_adopt_year["county"].isnull(), :]

Unnamed: 0,year,state,county,zip_codes,population,ev_registrations
508,2017,TX,,,,22.0
1526,2018,TX,,,,58.0
2769,2019,TX,,,,109.0
4209,2020,TX,,,,169.0
5963,2021,TX,,,,348.0
7848,2022,TX,,,,928.0
9918,2023,TX,,,,2553.0
11025,2024,TX,,,,499.0


In [33]:
# Sort the DataFrame by year and county
tx_ev_adopt_year.sort_values(['year', 'county'], inplace=True)

print("Shape:", tx_ev_adopt_year.shape)
tx_ev_adopt_year

Shape: (1630, 6)


Unnamed: 0,year,state,county,zip_codes,population,ev_registrations
0,2017,TX,Anderson County,"[75763, 75779, 75801, 75802, 75803, 75832, 758...",58175.0,1.0
1,2017,TX,Andrews County,[79714],17603.0,1.0
2,2017,TX,Angelina County,"[75901, 75902, 75903, 75904, 75915, 75941, 759...",87572.0,2.0
4,2017,TX,Aransas County,"[78358, 78381, 78382]",25392.0,1.0
5,2017,TX,Archer County,"[76351, 76366, 76370, 76379, 76389]",8783.0,2.0
...,...,...,...,...,...,...
11014,2024,TX,Yoakum County,"[79355, 79376]",1685.0,6.0
11016,2024,TX,Young County,"[76450, 76374, 76460, 76481]",17309.0,43.0
11022,2024,TX,Zapata County,"[78076, 78067, 78564]",13896.0,6.0
11024,2024,TX,Zavala County,"[78839, 78872, 78829]",9700.0,3.0


In [34]:
# Save the DataFrame to a CSV file
file_name = "tx_ev_registration_population.csv"
file_path = Path(f"../../../../../data/processed_data/{file_name}")
save_csv_file(tx_ev_adopt_year, file_path)

File `tx_ev_registration_population.csv` already exists. Overwriting file.
File saved as `tx_ev_registration_population.csv`
