# Import the necessary libraries 

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Import the data as EV_clean

In [None]:
EV_clean = pd.read_csv("/kaggle/input/ev-clean/ElectricCarData_Clean.csv")

In [None]:
EV_clean.head()

In [None]:
print(EV_clean.dtypes)

# Rename the data to EV_clean_1 to work with & avoid changing the existing one

In [None]:
EV_clean_1=EV_clean[:]

# Replace with dummy variables for categorical columns

In [None]:
EV_clean_1.RapidCharge=pd.get_dummies(EV_clean_1.RapidCharge,dummy_na=True)
EV_clean_1.PowerTrain=pd.get_dummies(EV_clean_1.PowerTrain,dummy_na=True)
EV_clean_1.PlugType=pd.get_dummies(EV_clean_1.PlugType,dummy_na=True)
EV_clean_1.BodyStyle=pd.get_dummies(EV_clean_1.BodyStyle,dummy_na=True)
EV_clean_1.Segment=pd.get_dummies(EV_clean_1.Segment,dummy_na=True)

# Review the data

In [None]:
EV_clean_1.head()

In [None]:
print(EV_clean.dtypes)

# Check for na values or missing values

In [None]:
count_nan_in_df = EV_clean_1.isnull().sum()
print(count_nan_in_df)

# Use **.astype('category')** to convert object based variable to categorical variable

In [None]:
EV_clean_1["RapidCharge"]=EV_clean_1["RapidCharge"].astype('category')
EV_clean_1["PowerTrain"]=EV_clean_1["PowerTrain"].astype('category')
EV_clean_1["PlugType"]=EV_clean_1["PlugType"].astype('category')
EV_clean_1["BodyStyle"]=EV_clean_1["BodyStyle"].astype('category')
EV_clean_1["Segment"]=EV_clean_1["Segment"].astype('category')
EV_clean_1["PowerTrain"]=EV_clean_1["PowerTrain"].astype('category')
EV_clean_1['FastCharge_KmH'] = pd.to_numeric(EV_clean_1['FastCharge_KmH'], errors='coerce')

In [None]:
EV_clean_1.head()

# Check for outliers and treat them if they exist in plenty by visualizing Boxplot technique. The outliers are displayed with dots outside the boxplot

In [None]:
ax = EV_clean_1[['AccelSec', 'TopSpeed_KmH','Range_Km','Efficiency_WhKm','FastCharge_KmH','PriceEuro']].plot(kind='box',figsize=(10,10), title='boxplot', showmeans=True)
plt.show()

#Analyse the median to know the basic performance of the data

In [None]:
EV_clean_1.median()

In [None]:
EV_clean_1.describe()

#Replace the outliers with median

In [None]:
PriceEuro_median=45000
EV_clean_1['PriceEuro']=EV_clean_1['PriceEuro'].mask(EV_clean_1['PriceEuro']>=120000,PriceEuro_median)

In [None]:
ax = EV_clean_1[['PriceEuro']].plot(kind='box',figsize=(10,10), title='boxplot', showmeans=True)
plt.show()

# Which vehicle has the fastest 0-100 acceleration?  (Method 1)

In [None]:
plt.figure(figsize=(50,20))
ax2=sns.barplot(x = 'Brand',
            y = 'AccelSec',
            data = EV_clean_1)

# Which vehicle has the fastest 0-100 acceleration? (Method 2)

In [None]:
EV_clean_1.describe()

In [None]:
#The maximum value of Accelaration shown in the describe part above is 22.4
EV_clean_1[(EV_clean_1['AccelSec'] == 22.4)]

# Which vehicle has the highest efficiency?

In [None]:
#The maximum value of Efficiency shown in the describe part above is 273
EV_clean_1[(EV_clean_1['Efficiency_WhKm'] == 273)]

# Does a difference in power train effect the range, top speed, efficiency?

In [None]:
fig, axs = plt.subplots(1,2)
sns.catplot(x="PowerTrain", y="Range_Km", data=EV_clean)
sns.catplot(x="PowerTrain", y="TopSpeed_KmH", data=EV_clean)
sns.catplot(x="PowerTrain", y="Efficiency_WhKm", data=EV_clean)
plt.close(1)

# Which manufacturer has the most number of vehicle(Method 1)

In [None]:
(EV_clean[["Brand","Model"]].describe())

# Which manufacturer has the most number of vehicle(Method 2)

In [None]:
Automobile = EV_clean.groupby('Brand').count()
print(Automobile['Model'].sort_values(ascending = False))

# How does price relate to rapid charging?

In [None]:
ax2 = sns.catplot(x="RapidCharge", y="PriceEuro", data = EV_clean_1, kind="violin")

# Other analysis 

In [None]:
EV_clean_1.corr()

# Final conclusions:
# Renault has the fastest acceleration with 22.4.
# Mercedes has the highest efficiency with 273.
# Out of the three parameters (range, top speed, efficiency), power range affects the range the maximum.
# Tesla has the maximum number of vehicles having a count of 13.
# After the converting the categorical variable of Rapid charge column with one's and zero's - 0 represents Yes & 1 represents No.
# So Price heavily depends on rapid charging after referring to the catplot.

# Other analysis 

In [None]:
EV_clean_1.corr()