In [None]:
# Install dash
!pip install dash

# Required imports for Dash and visualization
from dash import Dash, dcc, html, Input, Output
import plotly.graph_objects as go

# Data handling
import pandas as pd
import numpy as np

# Plotly express (optional but useful for simpler plots)
import plotly.express as px

# Scikit-learn components for modeling
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import OrdinalEncoder

# Other optional debugging tools
debug = True  # Set to False in production
use_reloader = False  # Avoids reloading issues in Jupyter

# Import necessary module for file upload
from google.colab import files

# Upload the file directly from your computer
uploaded = files.upload()

# Replace the file name with the one you upload
data = pd.read_csv('IEA Global EV Data 2024.csv')



Saving IEA Global EV Data 2024.csv to IEA Global EV Data 2024 (7).csv


In [None]:
# Create a backup of the DataFrame
data_backup = data.copy()

In [None]:
# Use the backup as the main DataFrame
data = data_backup.copy()

# Create a new backup of the updated main DataFrame
data_backup = data.copy()

In [None]:
data.columns

Index(['region', 'category', 'parameter', 'mode', 'powertrain', 'year', 'unit',
       'value'],
      dtype='object')

In [None]:
data['mode'].unique()

array(['Cars', 'EV', 'Buses', 'Vans', 'Trucks'], dtype=object)

In [None]:
# Filter the DataFrame
data = data[(data['category'] != 'Projection-APS') & (data['mode'] == 'Cars')]

In [None]:
data['category'].unique()

array(['Historical', 'Projection-STEPS'], dtype=object)

In [None]:
data['mode'].unique()

array(['Cars'], dtype=object)

In [None]:
data['parameter'].unique( )

array(['EV stock share', 'EV sales share', 'EV sales', 'EV stock',
       'Electricity demand', 'Oil displacement Mbd',
       'Oil displacement, million lge'], dtype=object)

In [None]:
# Filter the data for 'Historical' category
historical_data = data[data['category'] == 'Historical']

# One-hot encode 'region' and 'powertrain' for the entire dataset
data_encoded = pd.get_dummies(data, columns=['region', 'powertrain'], drop_first=True)

# Store the one-hot encoded columns
encoded_columns = [col for col in data_encoded.columns if col.startswith('region_') or col.startswith('powertrain_')]

# Initialize columns for predictions
data['Linear_Prediction'] = np.nan
data['RF_Prediction'] = np.nan
data['XGB_Prediction'] = np.nan

# Loop through each unique parameter
for parameter in historical_data['parameter'].unique():
    # Filter data for the specific parameter
    parameter_data = data_encoded[data_encoded['parameter'] == parameter]

    # Define features (X) and target (y)
    X = parameter_data[['year'] + encoded_columns]
    y = parameter_data['value']

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train and evaluate models
    # Linear Regression
    linear_model = LinearRegression()
    linear_model.fit(X_train, y_train)
    linear_pred = linear_model.predict(X_test)
    linear_mse = mean_squared_error(y_test, linear_pred)

    # Random Forest
    rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
    rf_model.fit(X_train, y_train)
    rf_pred = rf_model.predict(X_test)
    rf_mse = mean_squared_error(y_test, rf_pred)

    # XGBoost
    xgb_model = XGBRegressor(n_estimators=100, random_state=42)
    xgb_model.fit(X_train, y_train)
    xgb_pred = xgb_model.predict(X_test)
    xgb_mse = mean_squared_error(y_test, xgb_pred)

    # Print MSE results
    print(f"{parameter}: Linear MSE={linear_mse}, RF MSE={rf_mse}, XGB MSE={xgb_mse}")

    # Make predictions for the entire dataset
    full_X = data_encoded[data_encoded['parameter'] == parameter][['year'] + encoded_columns]
    linear_predictions = linear_model.predict(full_X)
    rf_predictions = rf_model.predict(full_X)
    xgb_predictions = xgb_model.predict(full_X)

    # Update the respective prediction columns in the original dataset
    data.loc[data['parameter'] == parameter, 'Linear_Prediction'] = linear_predictions
    data.loc[data['parameter'] == parameter, 'RF_Prediction'] = rf_predictions
    data.loc[data['parameter'] == parameter, 'XGB_Prediction'] = xgb_predictions

EV stock share: Linear MSE=6.841515606910906, RF MSE=1.147830750873096, XGB MSE=0.5714888701893339
EV sales share: Linear MSE=60.73317360874346, RF MSE=16.035423688135864, XGB MSE=11.815457709420597
EV sales: Linear MSE=3915500910608.366, RF MSE=5203166707893.389, XGB MSE=7680529561625.534
EV stock: Linear MSE=42669873307781.75, RF MSE=193477700564022.47, XGB MSE=465764537530717.8
Electricity demand: Linear MSE=13233070880.258316, RF MSE=19327104582.187046, XGB MSE=16927975383.219526
Oil displacement Mbd: Linear MSE=0.1641345290319283, RF MSE=0.05208952379983013, XGB MSE=0.056943659039588895
Oil displacement, million lge: Linear MSE=538711713.2340933, RF MSE=149445065.9852895, XGB MSE=185768186.55550134


In [None]:
# Create the Dash app
app = Dash(__name__)

# Layout of the app
app.layout = html.Div([
    html.H1("Interactive EV Data Visualization with Predictions"),

    # Multi-Select Dropdown for Category
    html.Label("Select Category:"),
    dcc.Checklist(
        id='category-checklist',
        options=[
            {'label': 'Historical', 'value': 'Historical'},
            {'label': 'Projection-STEPS', 'value': 'Projection-STEPS'}
        ],
        value=['Historical'],  # Default selection
        inline=True
    ),

    # Dropdown for Parameter
    html.Label("Select Parameter:"),
    dcc.Dropdown(
        id='parameter-dropdown',
        options=[{'label': param, 'value': param} for param in data['parameter'].unique()],
        value=data['parameter'].unique()[0]
    ),

    # Dropdown for Region
    html.Label("Select Region:"),
    dcc.Dropdown(
        id='region-dropdown',
        options=[{'label': region, 'value': region} for region in data['region'].unique()],
        value=data['region'].unique()[0]
    ),

    # Checklist for Model Predictions
    html.Label("Select Predictions to Display:"),
    dcc.Checklist(
        id='model-checklist',
        options=[
            {'label': 'Linear Regression', 'value': 'Linear_Prediction'},
            {'label': 'Random Forest', 'value': 'RF_Prediction'},
            {'label': 'XGBoost', 'value': 'XGB_Prediction'}
        ],
        value=['Linear_Prediction'],  # Default selection
        inline=True
    ),

    # Graph
    dcc.Graph(id='line-plot')
])

# Callback to update the graph based on dropdown inputs and checklist
@app.callback(
    Output('line-plot', 'figure'),
    Input('category-checklist', 'value'),
    Input('parameter-dropdown', 'value'),
    Input('region-dropdown', 'value'),
    Input('model-checklist', 'value')
)
def update_graph(selected_categories, selected_parameter, selected_region, selected_models):
    # Filter the data based on dropdown and checklist selections
    filtered_data = data[
        (data['category'].isin(selected_categories)) &
        (data['parameter'] == selected_parameter) &
        (data['region'] == selected_region)
    ]

    # Get all unique years to ensure the x-axis covers the full range
    all_years = sorted(data['year'].unique())

    # Create the figure
    fig = go.Figure()

    # Add actual values to the graph
    for category in selected_categories:
        category_data = filtered_data[filtered_data['category'] == category]
        fig.add_trace(go.Scatter(
            x=category_data['year'],
            y=category_data['value'],
            mode='lines+markers',
            name=f'Actual ({category})'
        ))

    # Add predictions for selected models
    for model in selected_models:
        if model in filtered_data.columns:
            prediction_data = filtered_data[['year', model]].drop_duplicates()
            fig.add_trace(go.Scatter(
                x=prediction_data['year'],
                y=prediction_data[model],
                mode='lines',
                name=f'Prediction ({model.split("_")[0]})'
            ))

    # Update layout to include all years on the x-axis
    fig.update_layout(
        title=f"{selected_parameter} over Year in {selected_region}",
        xaxis=dict(
            title='Year',
            tickmode='linear',
            range=[min(all_years), max(all_years)]
        ),
        yaxis_title='Value',
        legend_title='Legend'
    )

    return fig

# Run the app in Colab
from google.colab.output import eval_js
print(eval_js("google.colab.kernel.proxyPort(8050)"))
app.run_server(host='0.0.0.0', port=8050, debug=True)

https://prsl3al8cgh-496ff2e9c6d22116-8050-colab.googleusercontent.com/


<IPython.core.display.Javascript object>

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Grouped results for metrics
results = []

# Loop through each unique parameter
for parameter in data['parameter'].unique():
    param_data = data[data['parameter'] == parameter]
    actual = param_data['value']

    # Calculate variance
    variance = np.var(actual)

    for model in ['Linear_Prediction', 'RF_Prediction', 'XGB_Prediction']:
        if model in param_data.columns:
            predictions = param_data[model]

            # Calculate metrics
            mse = mean_squared_error(actual, predictions)
            rmse = np.sqrt(mse)
            r2 = r2_score(actual, predictions)

            # Calculate ratios to variance
            mse_to_var = mse / variance if variance != 0 else None
            rmse_to_var = rmse / variance if variance != 0 else None
            r2_to_var = r2 / variance if variance != 0 else None

            # Append results
            results.append({
                'Parameter': parameter,
                'Model': model,
                'Variance': variance,
                'MSE': mse,
                'RMSE': rmse,
                'R2': r2,
                'MSE/Variance': mse_to_var,
                'RMSE/Variance': rmse_to_var,
                'R2/Variance': r2_to_var
            })

# Convert results to a DataFrame for display
results_df = pd.DataFrame(results)

# Compare Predictions to Projection-STEPS considering region and powertrain
projection_steps_comparison = []

for parameter in data['parameter'].unique():
    param_data_steps = data[(data['parameter'] == parameter) & (data['category'] == 'Projection-STEPS')]
    param_data_historical = data[(data['parameter'] == parameter) & (data['category'] == 'Historical')]

    if param_data_steps.empty or param_data_historical.empty:
        continue

    for _, step_row in param_data_steps.iterrows():
        year = step_row['year']
        region = step_row['region']
        powertrain = step_row['powertrain']
        step_value = step_row['value']

        # Find matching Historical row
        hist_row = param_data_historical[
            (param_data_historical['year'] == year) &
            (param_data_historical['region'] == region) &
            (param_data_historical['powertrain'] == powertrain)
        ]

        if hist_row.empty:
            continue

        closest_model = None
        closest_diff = float('inf')

        # Compare models to Projection-STEPS value
        for model in ['Linear_Prediction', 'RF_Prediction', 'XGB_Prediction']:
            if model in hist_row.columns:
                prediction = hist_row[model].values[0]
                diff = abs(prediction - step_value)

                if diff < closest_diff:
                    closest_diff = diff
                    closest_model = model

        projection_steps_comparison.append({
            'Parameter': parameter,
            'Year': year,
            'Region': region,
            'Powertrain': powertrain,
            'Projection-STEPS Value': step_value,
            'Closest Model': closest_model,
            'Closest Difference': closest_diff
        })

# Convert Projection-STEPS comparison to a DataFrame
projection_steps_df = pd.DataFrame(projection_steps_comparison)

# Ensure all rows are displayed for the simplified comparison
pd.set_option('display.max_rows', None)  # Show all rows
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.width', 1000)  # Prevent column wrapping

# Display the simplified comparison to Projection-STEPS
print("\nProjection-STEPS Simplified Comparison:")
display(projection_steps_df)

# Reset display options if needed
pd.reset_option('display.max_rows')
pd.reset_option('display.max_columns')
pd.reset_option('display.width')


Projection-STEPS Simplified Comparison:


Unnamed: 0,Parameter,Year,Region,Powertrain,Projection-STEPS Value,Closest Model,Closest Difference
0,EV stock share,2020,China,EV,1.9,XGB_Prediction,0.004292965
1,EV stock share,2021,China,EV,3.1,XGB_Prediction,0.1150174
2,EV stock share,2022,China,EV,5.1,XGB_Prediction,0.0001072884
3,EV stock share,2023,China,EV,7.6,XGB_Prediction,0.09789515
4,EV stock share,2020,Europe,EV,1.1,XGB_Prediction,0.01029325
5,EV stock share,2021,Europe,EV,1.8,XGB_Prediction,0.01097989
6,EV stock share,2022,Europe,EV,2.7,XGB_Prediction,0.003088474
7,EV stock share,2023,Europe,EV,3.8,RF_Prediction,0.04899999
8,EV stock share,2020,India,EV,0.029,RF_Prediction,0.02292
9,EV stock share,2021,India,EV,0.054,RF_Prediction,0.02272
