In [None]:
# Importing libraries
import pandas as pd
import altair as alt
import seaborn as sns
import matplotlib.pyplot as plt
import geopandas as gpd
import plotly.graph_objects as go
import folium
from folium import IFrame
from folium.plugins import MarkerCluster
from folium.plugins import FastMarkerCluster
from shapely.geometry import Point
import plotly.express as px
import re
alt.data_transformers.enable("vegafusion")
alt.data_transformers.disable_max_rows()

In [None]:
# Reading csv file into df
df = pd.read_csv("../data/Electric_Vehicle_Population_Data.csv")

In [None]:
# Printing first 5 rows of df
df.head()

In [None]:
# Filtering the df to include only rows where the 'State' column is 'WA'
df = df[df['State'] == 'WA']

## Visualization 1 - Map Chart

In [None]:
# Data extraction function
def extract_coordinates(point_str):
    if isinstance(point_str, str):
        # Removing 'POINT (' and ')' and splitting by space
        coords = point_str.replace('POINT (', '').replace(')', '').split()
        return float(coords[1]), float(coords[0])  # Note: coords are in order of latitude, longitude
    return None, None

# Applying extraction function and dropping nulls
df['Latitude'], df['Longitude'] = zip(*df['Vehicle Location'].apply(extract_coordinates))
df = df.dropna(subset=['Latitude', 'Longitude'])

# Grouping by location to find unique points
grouped = df.groupby(['Latitude', 'Longitude'])

# Creating a new Folium map
m = folium.Map(location=[df['Latitude'].mean(), df['Longitude'].mean()], zoom_start=6)

# Adding title
title_html = '''
             <h3 align="center" style="font-size:20px"><b>Geographic Distribution of EV Registrations in WA</b></h3>
             '''
m.get_root().html.add_child(folium.Element(title_html))

# Creating a MarkerCluster object
marker_cluster = MarkerCluster().add_to(m)

# Adding summarized markers to the cluster
for (lat, lon), group in grouped:
    # Getting the most common county name in the group
    county_name = group['County'].mode()[0]

    # Getting the top 5 makes and models
    top_makes_models = group.groupby(['Make', 'Model']).size().nlargest(5).reset_index(name='Count')

    # Summary string for the popup
    summary_str = "<br>".join([f"<b>{row['Make']} {row['Model']}:</b> {row['Count']}" for index, row in top_makes_models.iterrows()])
    
    # Content for the popup
    popup_content = f"""<div style="width:300px; max-height:200px; overflow:auto;">
                            <b>County:</b> {county_name}<br>
                            <b>Total cars:</b> {len(group)}<br><br>
                            <b>Top Makes and Models:</b><br><br>
                            {summary_str}
                        </div>
                    """
    
    # Creating and adding the marker to the cluster
    folium.Marker(
        location=[lat, lon],
        popup=popup_content
    ).add_to(marker_cluster)

# Outputting chart
m

In [None]:
# Saving the map to an HTML file
m.save('map_chart.html')

## Visualization 2 - Line Chart

In [None]:
# Converting 'Model Year' to a string to prevent it from being treated as a continuous quantity
df['Model Year'] = df['Model Year'].astype(str)

# Aggregating the data to count registrations per year per EV type
agg_data = df.groupby(['Model Year', 'Electric Vehicle Type']).size().reset_index(name='Registrations')

# Creating the line chart
line_chart = alt.Chart(agg_data).mark_line(point=True).encode(
    x=alt.X('Model Year:O', axis=alt.Axis(title='Model Year')),
    y=alt.Y('Registrations:Q', axis=alt.Axis(title='Number of Registrations')),
    color='Electric Vehicle Type:N',
    tooltip=['Model Year', 'Electric Vehicle Type', 'Registrations']
).interactive().properties(
    title='Trend of Electric Vehicle Registrations by Type',
    width=400,
    height=300
)


# Outputting chart
line_chart

In [None]:
# Saving chart to html
line_chart.save('line_chart.html')

## Visualization 3 - Stacked Bar Chart

In [None]:
# Grouping and aggregating the data to count makes within each vehicle type
make_counts = df.groupby(['Make', 'Electric Vehicle Type']).size().reset_index(name='Count')

# Calculating the total count for each make regardless of the vehicle type
total_counts = make_counts.groupby('Make')['Count'].sum().reset_index()

# Getting the top 10 makes by total count
top_makes = total_counts.nlargest(10, 'Count')['Make']

# Filtering the original make_counts to only include the top 10 makes
top_make_counts = make_counts[make_counts['Make'].isin(top_makes)]

# Creating the stacked bar chart with 'Electric Vehicle Type' on the x-axis
stacked_bar_chart = alt.Chart(top_make_counts).mark_bar().encode(
    x=alt.X('Electric Vehicle Type:N', title='Electric Vehicle Type'),
    y=alt.Y('sum(Count):Q', title='Number of Vehicles', stack='zero'),
    color=alt.Color('Make:N', 
                    title='Make',
                    scale=alt.Scale(scheme='tableau20')),  # Use a 20-color scheme for distinction
    order=alt.Order('sum(Count):Q', sort='descending'),  # Order the segments by size
    tooltip=[alt.Tooltip('Make:N', title='Make'),
             alt.Tooltip('Electric Vehicle Type:N', title='Vehicle Type'),
             alt.Tooltip('sum(Count):Q', title='Number of Vehicles')]
).properties(
    width=600,
    height=400,
    title='Market Share by Make within Electric Vehicle Types'
)

# Outputting chart
stacked_bar_chart

In [None]:
# Saving chart to html
stacked_bar_chart.save('stacked_bar_chart.html')

## Visualization 4 - Bar Chart

In [None]:
# Getting count of utility firms
counts = df['Electric Utility'].value_counts().reset_index()
counts.columns = ['Electric Utility', 'Count']  # Renaming columns appropriately
counts = counts.sort_values(by='Count', ascending=False).head(10)  # Ensuring it's sorted in descending order

# Creating bar chart with tool tip
bar_chart = alt.Chart(counts).mark_bar().encode(
    x=alt.X('Electric Utility:N', sort='-y'),  
    y=alt.Y('Count:Q'),  
    color=alt.Color('Count:Q'),
    tooltip=['Electric Utility', 'Count']  
).properties(
    width=800,  
    height=400,  
    title='Top 10 Electric Utility Firms Used in WA')

# Outputting chart
bar_chart

In [None]:
# Saving chart as png
bar_chart.save('bar_chart.png')