In [1]:
import pandas as pd
from pathlib import Path
import plotly.express as px
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import dash_bootstrap_components as dbc
import sys, os
import plotly.graph_objects as go


In [64]:
clean_data = os.path.abspath('/Users/macpro/Documents/GitHub/fuel-electric-hybrid-vehicle-ml/data/predicted-data')


In [65]:
filename_2022_1995 = 'vehicle_data_with_clusters.csv'
master_df = pd.read_csv(Path(clean_data,f'{filename_2022_1995}'))


In [73]:
# Count the unique models for each make
total_models_df = master_df.groupby('make_')['model.1_'].nunique().reset_index()

# Rename the columns to make them more descriptive
total_models_df.columns = ['make_', 'total_models']



In [84]:
total_models_df

Unnamed: 0,make_,total_models
0,acura,31
1,alfa romeo,11
2,aston martin,22
3,audi,155
4,bentley,21
5,bmw,265
6,bugatti,5
7,buick,28
8,cadillac,52
9,chevrolet,128


In [108]:
master_df

Unnamed: 0,vehicle_id,vehicleclass_,make_,model.1_,model_year,cylinders_,fuelconsumption_city(l/100km),fuelconsumption_hwy(l/100km),fuelconsumption_comb(l/100km),co2emissions_(g/km),...,consumption_city(kwh/100km),fuelconsumption_hwy(kwh/100km),fuelconsumption_comb(kwh/100km),fuelconsumption_city(le/100km),fuelconsumption_hwy(le/100km),fuelconsumption_comb(le/100km),range_(km),hybrid_in_fuel,hybrid_in_electric,aggregate_levels
0,fuel-only_1,full-size,acura,integra,2023,4.0,7.9,6.3,7.2,167,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,0
1,fuel-only_2,full-size,acura,integra a-spec,2023,4.0,8.1,6.5,7.4,172,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,0
2,fuel-only_3,full-size,acura,integra a-spec,2023,4.0,8.9,6.5,7.8,181,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,0
3,fuel-only_4,suv - small,acura,mdx sh-awd,2023,6.0,12.6,9.4,11.2,263,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,0
4,fuel-only_5,suv - standard,acura,mdx sh-awd type s,2023,6.0,13.8,11.2,12.4,291,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13091,electric_420,suv - small,volkswagen,id.4,2023,0.0,0.0,0.0,0.0,0,...,18.2,21.2,19.6,2.0,2.4,2.2,336.0,1,0,0
13092,electric_421,suv - small,volkswagen,id.4 pro,2023,0.0,0.0,0.0,0.0,0,...,18.2,21.3,19.6,2.0,2.4,2.2,443.0,1,0,0
13093,electric_422,suv - small,volkswagen,id.4 awd pro,2023,0.0,0.0,0.0,0.0,0,...,20.1,22.6,21.2,2.3,2.5,2.4,410.0,1,0,0
13094,electric_423,suv - small,volvo,c40 recharge twin,2023,0.0,0.0,0.0,0.0,0,...,22.2,26.1,23.9,2.5,2.9,2.7,364.0,1,0,0


In [140]:
vehicle_type_count = master_df['vehicle_type'].value_counts().reset_index()
vehicle_type_count.columns = ['vehicle_type', 'count']



vehicle_type_count

px.bar(vehicle_type_count, x='vehicle_type', y='count', title='Vehicle Type Count')

In [117]:
# count total number of electric vehicles
electric_vehicles = master_df[master_df['fuel_type'] == 'Electricity']
electric_vehicles_count = electric_vehicles['fuel_type'].count()

electric_vehicles_count

0

In [89]:
make_total_avg_score_u = pd.DataFrame(master_df.groupby(["make_"])['predicted_co2_rating'].mean()).reset_index().rename(columns={'predicted_co2_rating':'avg_predicted_co2_rating_by_make'})

make_total_avg_score_u.sort_values(by='avg_predicted_co2_rating_by_make', ascending=False, inplace=True)


In [68]:
import numpy as np

# Merge this with your current dataframe
master_df_d = pd.merge(master_df, total_models_df, on='make_', how='left')



In [85]:
# Then you can use numpy's average function to compute the weighted average
make_total_avg_score = master_df_d.groupby('make_').apply(lambda x: np.average(x['predicted_co2_rating'], 
                                                                               weights=x['total_models'])).reset_index().rename(columns={0:'weighted_avg_predicted_co2_rating_by_make'})
make_total_avg_score.sort_values(by='weighted_avg_predicted_co2_rating_by_make', ascending=False, inplace=True)



In [103]:
weighted_avg_df = pd.merge(total_models_df, make_total_avg_score, on='make_', how='left')

# sort the dataframe by the weighted average
weighted_avg_df.sort_values(by='weighted_avg_predicted_co2_rating_by_make', ascending=False, inplace=True)

df = pd.merge(weighted_avg_df, vehicle_type_count, on='make_', how='left').sort_values(by='weighted_avg_predicted_co2_rating_by_make', ascending=False)

In [107]:
vehicle_type_count

vehicle_type,make_,electric,fuel-only,hybrid
0,acura,0.0,121.0,0.0
1,alfa romeo,0.0,49.0,0.0
2,aston martin,0.0,80.0,0.0
3,audi,18.0,495.0,11.0
4,bentley,0.0,85.0,5.0
5,bmw,36.0,842.0,53.0
6,bugatti,0.0,13.0,0.0
7,buick,0.0,166.0,0.0
8,cadillac,2.0,286.0,5.0
9,chevrolet,12.0,1019.0,8.0


In [106]:
make = 'bmw'
colors = {
    'background': '#003f5c',
    'text': 'white'
}

df_filtered = df[df['make_']==make]

fig = go.Figure(data=[
        go.Bar(name='Electric', x=df_filtered['make_'], y=df_filtered['electric'], marker_color='rgb(26, 118, 255)'),
        go.Bar(name='Fuel-only', x=df_filtered['make_'], y=df_filtered['fuel-only'], marker_color='rgb(55, 83, 109)'),
        go.Bar(name='Hybrid', x=df_filtered['make_'], y=df_filtered['hybrid'], marker_color='rgb(26, 188, 156)')
    ])

# Change the bar mode
fig.update_layout(
    barmode='stack',
    title=f'Number of Vehicle Types for {make.upper()}',
    title_x=0.5,
    xaxis=dict(title='Make'),
    yaxis=dict(title='Number of Vehicles'),
    plot_bgcolor=colors['background'],
    paper_bgcolor=colors['background'],
    font_color=colors['text'],
    xaxis_showgrid=False,
    yaxis_showgrid=False
)

In [98]:
px.bar(df, x='make_', y='weighted_avg_predicted_co2_rating_by_make', 
       color='make_', title='Weighted Average Predicted CO2 Rating by Make', 
       text='weighted_avg_predicted_co2_rating_by_make', color_discrete_sequence=px.colors.qualitative.Pastel1)

In [23]:
MAKES = master_df['make_'].unique()

MAKES

array(['acura', 'alfa romeo', 'aston martin', 'audi', 'bentley', 'bmw',
       'bugatti', 'buick', 'cadillac', 'chevrolet', 'chrysler', 'dodge',
       'fiat', 'ford', 'genesis', 'gmc', 'honda', 'hyundai', 'infiniti',
       'jaguar', 'jeep', 'kia', 'lamborghini', 'land rover', 'lexus',
       'lincoln', 'maserati', 'mazda', 'mercedes-benz', 'mini',
       'mitsubishi', 'nissan', 'porsche', 'ram', 'rolls-royce', 'subaru',
       'toyota', 'volkswagen', 'volvo', 'smart', 'scion', 'suzuki', 'srt',
       'karma', 'polestar', 'tesla', 'smart eq', 'lucid', 'rivian'],
      dtype=object)

In [49]:
value = 'acura'
def show_avg_predicted_co2_rating_by_make(value):
    filtered_df = master_df[master_df['make_'] == value]


    viz_table = pd.DataFrame(filtered_df.groupby(["make_",'model_year'])['predicted_co2_rating'].mean()).reset_index().rename(columns={'predicted_co2_rating':'avg_predicted_co2_rating_by_make'})

    fig = px.line(viz_table, x='model_year', y='avg_predicted_co2_rating_by_make', title=f'Average Predicted CO2 Rating by Make ({value.upper()})')

    return fig

In [50]:
def show_predicted_co2_rating_by_model(value):
    filtered_df = master_df[master_df['make_'] == value]

    # create line chart
    line_fig = px.scatter(filtered_df, 
                        x='model_year', 
                        y='predicted_co2_rating', 
                        title=f'Predicted CO2 ratings over time by make {value.upper()} and model (hover for model name)',
                        labels={'model_year':'Model Year', 'co2emissions_(g/km)':'CO2 Emissions (g/km)'}, 
                        hover_name='model.1_',
                        color='vehicle_type')

    return line_fig