In [1]:
from pathlib import Path
import datetime as dt
import pandas as pd

In [2]:
def process_state_data(population_file, registration_file):
    # Load the datasets
    population_df = pd.read_csv(
        Path(f"../../../../../data/processed_data/{population_file}"),
        parse_dates=["date"]
    )

    registration_df = pd.read_csv(
        Path(f"../../../../../data/processed_data/veh_registrations/{registration_file}")
    )

    # Group the population data by year
    population_df = population_df.groupby("date").agg({"population": "sum"}).reset_index()
    population_df["date"] = population_df["date"].dt.year

    # Merge the datasets on the year/date column
    df = pd.merge(registration_df, population_df, left_on="year", right_on="date", how="left")

    # Drop the redundant date column
    df.drop(columns=["date"], inplace=True)

    # Reorder columns
    cols_to_order = ['year', 'state', 'population']
    new_columns = cols_to_order + \
        [col for col in df.columns if col not in cols_to_order]
    df = df[new_columns]

    return df

In [3]:
# List of files
files = [
    ("fl_population.csv", "florida_veh_registrations.csv"),
    ("ca_population.csv", "california_veh_registrations.csv"),
    ("tx_population.csv", "texas_veh_registrations.csv")
]

# Process each state's data
for population_file, registration_file in files:
    state_df = process_state_data(population_file, registration_file)
    
    prefix = population_file.split("_")[0]
    file_name = f"{prefix}_state_analysis.csv"
    file_path = Path(f"../../../../../data/processed_data/{file_name}")

    if not file_path.parent.exists():
        print(f"Error: The directory `{file_path.parent}` does not exist.")
        break
    
    if file_path.exists():
        print(f"File `{file_path.name}` already exists. Overwriting file.")
        file_path.unlink()

    state_df.to_csv(file_path, index=False)
    print(f"File saved as `{file_path.name}`")

    display(state_df)

File `fl_state_analysis.csv` already exists. Overwriting file.
File saved as `fl_state_analysis.csv`


Unnamed: 0,year,state,population,electric,phev,hev,biodiesel,ethanol,cng,propane,hydrogen,methanol,gasoline,diesel,unknown
0,2016,Florida,20613477,11600,10100,207100,0,1168900,18000,0,0,0,13929200,353300,133600
1,2017,Florida,20963613,15900,13400,219700,0,1318500,16800,0,0,0,14267800,369900,131600
2,2018,Florida,21244317,27400,17400,226900,0,1423900,15500,0,0,0,14541500,386500,135600
3,2019,Florida,21477737,40300,20400,235300,0,1486600,14200,0,0,0,14726700,408500,139200
4,2020,Florida,21591299,58200,22400,248500,0,1513000,13100,0,0,0,14925500,433700,127300
5,2021,Florida,21830708,95600,32200,287000,129300,1154600,600,100,0,0,15595900,336900,97400
6,2022,Florida,22245521,168000,45800,338700,150900,1137200,400,100,0,0,15846500,343500,97200


File `ca_state_analysis.csv` already exists. Overwriting file.
File saved as `ca_state_analysis.csv`


Unnamed: 0,year,state,population,electric,phev,hev,biodiesel,ethanol,cng,propane,hydrogen,methanol,gasoline,diesel,unknown
0,2016,California,39167117,141500,116700,966700,0,1322600,80600,0,1300,400,27241000,710400,115500
1,2017,California,39358497,189700,159600,1039300,0,1495800,79300,0,3200,400,28171500,738600,122200
2,2018,California,39461588,273500,215000,1085300,0,1615200,77700,0,9400,300,28646700,761600,129800
3,2019,California,39512223,349700,247300,1154200,0,1698700,76200,0,12700,300,29210100,785800,136900
4,2020,California,39503200,425300,265500,1228700,0,1745100,75300,0,14000,300,29642700,825900,116700
5,2021,California,39145060,563100,315300,1355900,163600,1343200,12600,1500,11800,0,30512600,710500,10400
6,2022,California,39040616,903600,361100,1514000,183900,1338000,10300,1500,14900,0,31059000,725300,8200


File `tx_state_analysis.csv` already exists. Overwriting file.
File saved as `tx_state_analysis.csv`


Unnamed: 0,year,state,population,electric,phev,hev,biodiesel,ethanol,cng,propane,hydrogen,methanol,gasoline,diesel,unknown
0,2016,Texas,27914410,11900,8000,205800,0,2596600,64700,0,0,0,18245400,961200,213100
1,2017,Texas,28295273,16100,10900,217100,0,2813400,56900,0,0,0,18252800,982900,194300
2,2018,Texas,28628666,24500,14700,227700,0,3005900,51300,0,0,0,18453800,1019300,175300
3,2019,Texas,28995881,38400,18100,244600,0,3200700,47300,0,100,0,19293900,1070200,178000
4,2020,Texas,29234361,52200,20400,262300,0,3250000,43600,0,100,0,19609700,1107000,163600
5,2021,Texas,29561286,80900,30600,304700,376300,2422300,2200,1600,0,0,20599100,765100,131100
6,2022,Texas,30029848,149000,42800,361800,420800,2398800,1400,1400,0,0,21065800,771000,133200
