In [1]:
import pandas as pd
from pathlib import Path
import plotly.express as px
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import dash_bootstrap_components as dbc
import sys, os
import plotly.graph_objects as go


In [64]:
clean_data = os.path.abspath('/Users/macpro/Documents/GitHub/fuel-electric-hybrid-vehicle-ml/data/predicted-data')


In [65]:
filename_2022_1995 = 'vehicle_data_with_clusters.csv'
master_df = pd.read_csv(Path(clean_data,f'{filename_2022_1995}'))


In [73]:
# Count the unique models for each make
total_models_df = master_df.groupby('make_')['model.1_'].nunique().reset_index()

# Rename the columns to make them more descriptive
total_models_df.columns = ['make_', 'total_models']



In [84]:
total_models_df

Unnamed: 0,make_,total_models
0,acura,31
1,alfa romeo,11
2,aston martin,22
3,audi,155
4,bentley,21
5,bmw,265
6,bugatti,5
7,buick,28
8,cadillac,52
9,chevrolet,128


In [77]:
vehicle_type_count = master_df.groupby('make_')['vehicle_type'].value_counts().unstack().reset_index().fillna(0)

# Display the dataframe
vehicle_type_count


vehicle_type,make_,electric,fuel-only,hybrid
0,acura,0.0,121.0,0.0
1,alfa romeo,0.0,49.0,0.0
2,aston martin,0.0,80.0,0.0
3,audi,18.0,495.0,11.0
4,bentley,0.0,85.0,5.0
5,bmw,36.0,842.0,53.0
6,bugatti,0.0,13.0,0.0
7,buick,0.0,166.0,0.0
8,cadillac,2.0,286.0,5.0
9,chevrolet,12.0,1019.0,8.0


In [89]:
make_total_avg_score_u = pd.DataFrame(master_df.groupby(["make_"])['predicted_co2_rating'].mean()).reset_index().rename(columns={'predicted_co2_rating':'avg_predicted_co2_rating_by_make'})

make_total_avg_score_u.sort_values(by='avg_predicted_co2_rating_by_make', ascending=False, inplace=True)


In [68]:
import numpy as np

# Merge this with your current dataframe
master_df_d = pd.merge(master_df, total_models_df, on='make_', how='left')



In [85]:
# Then you can use numpy's average function to compute the weighted average
make_total_avg_score = master_df_d.groupby('make_').apply(lambda x: np.average(x['predicted_co2_rating'], 
                                                                               weights=x['total_models'])).reset_index().rename(columns={0:'weighted_avg_predicted_co2_rating_by_make'})
make_total_avg_score.sort_values(by='weighted_avg_predicted_co2_rating_by_make', ascending=False, inplace=True)



In [88]:
weighted_avg_df = pd.merge(total_models_df, make_total_avg_score, on='make_', how='left')

# sort the dataframe by the weighted average
weighted_avg_df.sort_values(by='weighted_avg_predicted_co2_rating_by_make', ascending=False, inplace=True)

# Display the dataframe
weighted_avg_df

Unnamed: 0,make_,total_models,weighted_avg_predicted_co2_rating_by_make
37,rivian,10,10.0
41,smart eq,2,10.0
27,lucid,15,10.0
34,polestar,6,10.0
21,karma,5,10.0
45,tesla,61,10.0
40,smart,4,9.238095
12,fiat,13,6.627907
16,honda,44,6.560831
31,mini,35,6.451429


In [23]:
MAKES = master_df['make_'].unique()

MAKES

array(['acura', 'alfa romeo', 'aston martin', 'audi', 'bentley', 'bmw',
       'bugatti', 'buick', 'cadillac', 'chevrolet', 'chrysler', 'dodge',
       'fiat', 'ford', 'genesis', 'gmc', 'honda', 'hyundai', 'infiniti',
       'jaguar', 'jeep', 'kia', 'lamborghini', 'land rover', 'lexus',
       'lincoln', 'maserati', 'mazda', 'mercedes-benz', 'mini',
       'mitsubishi', 'nissan', 'porsche', 'ram', 'rolls-royce', 'subaru',
       'toyota', 'volkswagen', 'volvo', 'smart', 'scion', 'suzuki', 'srt',
       'karma', 'polestar', 'tesla', 'smart eq', 'lucid', 'rivian'],
      dtype=object)

In [49]:
value = 'acura'
def show_avg_predicted_co2_rating_by_make(value):
    filtered_df = master_df[master_df['make_'] == value]


    viz_table = pd.DataFrame(filtered_df.groupby(["make_",'model_year'])['predicted_co2_rating'].mean()).reset_index().rename(columns={'predicted_co2_rating':'avg_predicted_co2_rating_by_make'})

    fig = px.line(viz_table, x='model_year', y='avg_predicted_co2_rating_by_make', title=f'Average Predicted CO2 Rating by Make ({value.upper()})')

    return fig

In [50]:
def show_predicted_co2_rating_by_model(value):
    filtered_df = master_df[master_df['make_'] == value]

    # create line chart
    line_fig = px.scatter(filtered_df, 
                        x='model_year', 
                        y='predicted_co2_rating', 
                        title=f'Predicted CO2 ratings over time by make {value.upper()} and model (hover for model name)',
                        labels={'model_year':'Model Year', 'co2emissions_(g/km)':'CO2 Emissions (g/km)'}, 
                        hover_name='model.1_',
                        color='vehicle_type')

    return line_fig