In [None]:
import pandas as pd
import os
from urllib.request import urlretrieve
pd.options.mode.chained_assignment = None  # default='warn'

def categorize_fuel(fuel_type):
    """
    Categorize fuel types into 'Fossil Fuel', 'EV', or 'Other'.
    
    Args:
        fuel_type (str): The fuel type to categorize.
    
    Returns:
        str: The category of the fuel type.
    """
    fossil_fuels = ['Diesel', 'Flex Fuel', 'Gasoline', 'Gasoline Hybrid', 'Natural Gas', 'Propane']
    evs = ['Battery Electric (BEV)', 'Fuel Cell (FCEV)', 'Plug-in Hybrid (PHEV)']
    
    if fuel_type in fossil_fuels:
        return 'Fossil Fuel'
    elif fuel_type in evs:
        return 'EV'
    else:
        return 'Other'
def download_file(url, local_filename):
    """
    Download a file from a URL to a local filename.
    
    Args:
        url (str): The URL of the file to download.
        local_filename (str): The local file path to save the downloaded file.
    """
    try:
        urlretrieve(url, local_filename)
        print(f"File downloaded successfully and saved as {local_filename}.")
    except Exception as e:
        raise Exception(f"An error occurred while downloading the file: {e}")
    
def fueltype_per_countyyear_totals_df(excel_file):
    """
    Generate a DataFrame summarizing the number of vehicles by year, county, and fuel category.
    
    Args:
        excel_file (str): Path to the Excel file containing vehicle data.
    
    Returns:
        pd.DataFrame: A DataFrame with totals of vehicles by year, county, and fuel category.
    
    Raises:
        FileNotFoundError: If the specified Excel file is not found.
        ValueError: If the sheet 'County' is not found in the Excel file or if there are issues with the data format.
        """
    excel_file = os.path.join(os.getcwd(), excel_file)

    if not os.path.exists(excel_file):
        print(f"File {excel_file} not found locally. Attempting to download...")
        URL = 'https://www.energy.ca.gov/filebrowser/download/6311?fid=6311#block-symsoft-page-title'
        download_file(URL, excel_file)
    
    # Pull County Sheet from excel file ##
    try:
        # Read the Excel file
        countyvehicle_df = pd.read_excel(excel_file, sheet_name="County")
    except FileNotFoundError:
        raise FileNotFoundError(f"The file {excel_file} was not found.")
    except ValueError as e:
        raise ValueError("An error occurred while reading the Excel file. Ensure the file contains a sheet named 'County'.") from e
    except Exception as e:
        raise Exception("An unexpected error occurred while reading the Excel file.") from e
    
    ## Drop Irrelevent Columns ##
    try:
        countyvehicle_df = countyvehicle_df.drop(columns=['Make', 'Model'])
    except KeyError as e:
        raise KeyError("The specified columns to drop do not exist in the DataFrame.") from e
    
    
    ## Remove unwanted County Information ##
    cal_countyvehicle_df = countyvehicle_df[~countyvehicle_df['County'].isin(['Out of State'])]
    
    ## Simply fuel types and categories ##
    cal_countyvehicle_df.loc[:, 'Fuel Category'] = cal_countyvehicle_df['Fuel Type'].apply(categorize_fuel)
    
    ## Create final data frame based on year, county, fuel category, and number of vehicles ##
    cartype_df = cal_countyvehicle_df.groupby(['Data Year', 'County', 'Fuel Category'])['Number of Vehicles'].sum().reset_index()  
    for year in cartype_df['Data Year'].unique():
        for county in cartype_df['County'].unique():
            # Get the subset of the dataframe for this year and county
            subset = cartype_df[(cartype_df['Data Year'] == year) & 
                                                       (cartype_df['County'] == county)]

            # Check if 'EV' exists in the 'Fuel Category' for this year and county
            if 'EV' not in subset['Fuel Category'].values:
                # If not, create a new row with 0 vehicles for 'EV'
                new_row_ev = pd.DataFrame({'Data Year': [year], 'County': [county], 'Fuel Category': ['EV'], 'Number of Vehicles': [0]})
                cartype_df = pd.concat([cartype_df, new_row_ev], ignore_index=True)

            # Check if 'Fossil Fuel' exists in the 'Fuel Category' for this year and county
            if 'Fossil Fuel' not in subset['Fuel Category'].values:
                # If not, create a new row with 0 vehicles for 'Fossil Fuel'
                new_row_fossil = pd.DataFrame({'Data Year': [year], 'County': [county], 'Fuel Category': ['Fossil Fuel'], 'Number of Vehicles': [0]})
                cartype_df = pd.concat([cartype_df, new_row_fossil], ignore_index=True)

    # Optionally, sort the dataframe by 'Data Year', 'County', and 'Fuel Category' for better readability
    cartype_df = cartype_df.sort_values(by=['Data Year', 'County', 'Fuel Category']).reset_index(drop=True)
    return cartype_df
fueltype_per_countyyear_totals_df('vehicle_data/california_vehicle.xlsx') 