In [None]:
# Importing Required Libraries
import pandas as pd
import plotly.express as px
import numpy as np

## Loading the Data

In [None]:
PATH = 'dataset.csv'
df = pd.read_csv(PATH)
df.head()

In [None]:
# Shape
df.shape

In [None]:
# Checking Missing Values
df.isnull().sum()

In [None]:
df['Vehicle Location']=df['Vehicle Location'].replace('NaN',np.nan).fillna(df['Vehicle Location'].mode()[0])
df['Electric Utility']=df['Electric Utility'].replace('NaN',np.nan).fillna(df['Electric Utility'].mode()[0])
df['Legislative District']=df['Legislative District'].replace('NaN',np.nan).fillna(df['Legislative District'].mode()[0])

df['Model']=df['Model'].replace('NaN',np.nan).fillna(df['Model'].mode()[0])


In [None]:
# Checking for missing values
df.isnull().sum()

In [None]:
# Shape After Dropping Missing Values
df.shape

In [None]:
# Checking for Duplicates
dup = df.duplicated()
dup.value_counts()

# TASK-1: Exploratory Data Analysis using plotly.express library

In [None]:
# Installing Plotly
!pip install plotly

In [None]:
# Function for Box Plot
num = ['Model Year','Electric Range', 'Legislative District']
def viz_univariate_box_plots(df, num):
    for j in num:
        if df[j].dtype == 'float64' or df[j].dtype == 'int64':
            fig_box = px.box(df, y=j, title=f'Box Plot of {j}')
            fig_box.show()
            
viz_univariate_box_plots(df, num)

In [None]:
# Count of Clean Alternative Fuel Vehicle (CAFV) Eligibility
fig_histogram = px.histogram(df, y='Clean Alternative Fuel Vehicle (CAFV) Eligibility')
fig_histogram.show()

- Clean Alternative Fuel Vehicle Eligible type is more in number.

In [None]:
fig_histogram = px.histogram(df, x='County')
fig_histogram.show()

- King County has More number of vehicles 

- BEV Electric Vehilces are Very huge compared to PHEV.

In [None]:
company_counts = df.groupby('Make').count().sort_values(by='City', ascending=False)['City'].reset_index()
top_10 = company_counts[:10]

# Create the bar chart
fig = px.bar(top_10, x='Make', y='City', labels={'Make': 'Companies', 'City': 'Count'},
             title='Top 10 Electric Vehicle Companies by Number of Cities', color='City',
             color_continuous_scale='Viridis')

# Show the plot
fig.show()

In [None]:
Companies = df.groupby('Make').count().sort_values(by='City',ascending=False)['City'].index
values = df.groupby('Make').count().sort_values(by='City',ascending=False)['City'].values
top_n = 10  
top_companies = company_counts[:top_n].reset_index() 
fig = px.bar(top_companies, x='Make', y='City', labels={'Make': 'Companies', 'City': 'Count'},
             title='Top Companies Producing Electric Vehicles', color='City',
             color_continuous_scale='Viridis') 
fig.update_layout(xaxis_tickangle=-45) 

fig.show() 



In [None]:
year_wise_cars = df.groupby('Model Year')['VIN (1-10)'].count().reset_index()
year_wise_cars.columns = ['year','num_cars']
fig = px.line(year_wise_cars,x="year", y="num_cars", title='Year Wise Number of Cars',markers=True)
fig.show()

In [None]:
df['Clean Alternative Fuel Vehicle (CAFV) Eligibility'].value_counts()

In [None]:
fig = px.histogram(df, x='Electric Range', color='Electric Vehicle Type',
                   nbins=30, barmode='overlay', histfunc='count', 
                   labels={'Electric Range': 'Electric Range', 'Electric Vehicle Type': 'Vehicle Type'},
                   title='Electric Vehicle Range Distribution by Vehicle Type')

# Step 3: Show the plot
fig.show()

In [None]:
car_counts_St = df['State'].value_counts().nlargest(10)

fig = px.bar(car_counts_St, x=car_counts_St.index, y=car_counts_St.values,
             labels={'x': 'State', 'y': 'Number of Cars (log scale)'},
             title='Top 10 Count of Cars per State',
             template='plotly_dark')

fig.update_layout(yaxis_type='log')

fig.update_traces(marker_color='steelblue')


fig.show()
car_counts_St_df = car_counts_St.to_frame()
car_counts_St_df.style.background_gradient(cmap='Blues')

In [None]:
fig_pie = px.pie(df, names='Clean Alternative Fuel Vehicle (CAFV) Eligibility', values='Electric Range')
fig_pie.show()

In [None]:
fig_pie = px.pie(df, names='Electric Vehicle Type', values='Electric Range')
fig_pie.show()

In [None]:
px.pie(names=list(Companies)[:10],values=values[:10],width=500,height=400)

In [None]:
import re
Location_data = df.groupby('Vehicle Location').count()['County'].reset_index()
Location_data.rename(columns={'Vehicle Location': 'Locations', 'County': 'Count'}, inplace=True)

#  Extract latitude and longitude from 'Locations'
def extract_latitude(location):
    try:
        latitude = re.findall(r'[-+]?\d*\.\d+|\d+', location.split('(')[-1])
        return float(latitude[0])
    except:
        return None

def extract_longitude(location):
    try:
        longitude = re.findall(r'[-+]?\d*\.\d+|\d+', location.split('(')[-1])
        return float(longitude[1])
    except:
        return None

Location_data['Latitude'] = Location_data['Locations'].apply(extract_latitude)
Location_data['Longitude'] = Location_data['Locations'].apply(extract_longitude)

Location_data.dropna(subset=['Latitude', 'Longitude'], inplace=True)

# TASK-2: Choropleth to display the number of EV vehicles based on location

In [None]:
states = list(df.groupby('State').count().sort_values(by='City',ascending=False)['City'].index)
values = df.groupby('State').count().sort_values(by='City',ascending=False)['City'].values


data = pd.DataFrame(df.groupby('State').count().sort_values(by='City',ascending=False)['City'])

data = data.reset_index()

data.columns = ['State','Count']


fig = px.choropleth(data,
                    locations='State',
                    locationmode="USA-states",
                    color='Count',
                    color_continuous_scale="blues",
                    scope="usa")

fig.show()

In [None]:
fig = px.scatter(Location_data, x=Location_data['Latitude'],y= Location_data['Longitude'], size='Count', color='Count',
                 labels={'Latitude': 'Latitude', 'Longitude': 'Longitude', 'Count': 'Count'},
                 title='Vehicle Locations and Counts',
                 hover_data=['Locations', 'Count'])


fig.update_layout(xaxis_range=[-130, -60], yaxis_range=[20, 60])


fig.show()

In [None]:
df['Latitude'] = df['Vehicle Location'].apply(lambda loc: float(loc.split()[1][1:]))
df['Longitude'] =df['Vehicle Location'].apply(lambda loc: float(loc.split()[2][:-1]))


In [None]:
!pip install wget

In [None]:
temp_df = df.groupby(['City'], as_index=False)['Electric Vehicle Type'].count()

data_for_geojson = df[['City', 'Vehicle Location']].copy()
data_for_geojson['Latitude'] = data_for_geojson['Vehicle Location'].apply(lambda loc: float(loc.split()[1][1:]))
data_for_geojson['Longitude'] = data_for_geojson['Vehicle Location'].apply(lambda loc: float(loc.split()[2][:-1]))
data_for_geojson['Electric Vehicle Type'] = temp_df['Electric Vehicle Type']

# Drop the original 'Vehicle Location' column as it's no longer needed
data_for_geojson.drop('Vehicle Location', axis=1, inplace=True)

# Convert the DataFrame into GeoJSON format
features = []
for i, row in data_for_geojson.iterrows():
    city = row['City']
    latitude = row['Latitude']
    longitude = row['Longitude']
    feature = {"type": "Feature", "properties": {"name": city},
               "geometry": {"type": "Point", "coordinates": [longitude, latitude]}}
    features.append(feature)

geojson_data = {"type": "FeatureCollection", "features": features}

fig = px.choropleth(data_for_geojson, 
                    geojson=geojson_data,
                    locations='City',
                    color='Electric Vehicle Type',
                    locationmode='USA-states',
                    color_continuous_scale='Viridis',
                    range_color=(0, data_for_geojson['Electric Vehicle Type'].max()),  
                    scope='world',
                    hover_data={'Electric Vehicle Type': True, 'Latitude': False, 'Longitude': False}, 
                    labels={'Electric Vehicle Type': 'Number of EV Vehicles'}  
                    )

fig.update_layout(title='Choropleth of EV Vehicles by Location',
                  geo=dict(showcoastlines=True, coastlinecolor="RebeccaPurple", 
                           showland=True, landcolor="LightGreen",
                           showocean=True, oceancolor="LightBlue", showlakes=True, 
                           lakecolor="Blue"))

fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})
fig.update_coloraxes(colorbar=dict(thicknessmode="pixels", thickness=10, lenmode="pixels", len=150,
                                    yanchor="top", y=0.8, ticks="outside", ticksuffix="", dtick=1))

fig.show()

# Task3: Racing Bar Plot to display the animation of EV Make and its count each year.

In [None]:
#!pip install bar-chart-race
# !pip install pandas matplotlib bar_chart_race


In [None]:
import bar_chart_race as bcr
import matplotlib.pyplot as plt

In [None]:
# Group the data by 'Model Year' and 'Make', and calculate the count for each group
ev_make_count_by_year = df.groupby(['Model Year', 'Make']).size().reset_index(name='Count')

# Ensure all combinations of 'Model Year' and 'Make' with zero counts are included
all_model_years = df['Model Year'].unique()
all_makes = df['Make'].unique()
all_combinations = pd.MultiIndex.from_product([all_model_years, all_makes], names=['Model Year', 'Make'])
all_combinations_df = pd.DataFrame(index=all_combinations).reset_index()

ev_make_count_by_year = pd.merge(all_combinations_df, ev_make_count_by_year, on=['Model Year', 'Make'], how='left')
ev_make_count_by_year['Count'].fillna(0, inplace=True)

# Create the Racing Bar Plot using Plotly
fig = px.bar(ev_make_count_by_year,
             x='Count',
             y='Make',
             animation_frame='Model Year',
             color='Make',
             labels={'Make': 'EV Make', 'Count': 'Count'},
             title='EV Maker and Count Each Year'
            )

# Customize the layout
fig.update_layout(
    xaxis_title='Count',
    yaxis_title='EV Make',
    yaxis={'categoryorder': 'total ascending'}  
)

fig.show()

### Conclusions:
- From above racing bar plot, it is clear that the electric vehicles are rapidly increasing.

- The electric vehicle market is likely to continue to grow in the coming years, as the demand for electric vehicles increases.

- TESLA is a Leading in Electric Vechile Manufacturer .

- Majority of the vehicles are Battery Electric Vehicles(BEV) and Tesla is producing Battery Electric Vehicles(BEV)

- There are more electric vehicles in USA-WA.

- There are more vehicles in King County compare to others.
- BEV vehicles are more in Number.