In [3]:
%matplotlib inline

In [4]:
import pandas as pd
import numpy as np
import sympy as sp
import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.impute import SimpleImputer

from sklearn.linear_model import LogisticRegression, LinearRegression

from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV

from sklearn.metrics import classification_report

import mlflow


#  Data Set
- https://www.kaggle.com/datasets/rajkumarpandey02/electric-vehicle-population-data/data


# Workflow To Be Followed

- Step 1: Loading The Dataset
- Step 2: Performing EDA
- Step 3: Feature Engineering
- Step 4: Model Training & Evaluation 

In [6]:
experiment = mlflow.create_experiment(name = "Initial Tetsing")

In [7]:
run = mlflow.start_run(experiment_id = experiment, run_name="First Run")

## Step 1: Loading The Dataset

In [73]:
# Load the data set
electric_vehicles = pd.read_csv("data/Electric_Vehicle_Population_Data.csv")

In [74]:
#>>> le = LabelEncoder()
#>>> le.fit([1, 2, 2, 6])
le = LabelEncoder()
models = le.fit_transform(electric_vehicles.Model.values)
states = le.fit_transform(electric_vehicles.State)
city = le.fit_transform(electric_vehicles.City)
county = le.fit_transform(electric_vehicles.County)

In [75]:
electric_vehicles.Model = models
electric_vehicles.State = states
electric_vehicles.City = city
electric_vehicles.County = county

In [76]:
electric_vehicles

Unnamed: 0,VIN (1-10),County,City,State,Postal Code,Model Year,Make,Model,Electric Vehicle Type,Clean Alternative Fuel Vehicle (CAFV) Eligibility,Electric Range,Base MSRP,Legislative District,DOL Vehicle ID,Vehicle Location,Electric Utility,2020 Census Tract
0,5YJ3E1EA0K,155,592,44,98512.0,2019,TESLA,71,Battery Electric Vehicle (BEV),Clean Alternative Fuel Vehicle Eligible,220.0,0.0,22.0,242565116,POINT (-122.91310169999997 47.01359260000004),PUGET SOUND ENERGY INC,5.306701e+10
1,1N4BZ1DV4N,67,107,44,98236.0,2022,NISSAN,69,Battery Electric Vehicle (BEV),Eligibility unknown as battery range has not b...,0.0,0.0,10.0,183272785,POINT (-122.35936399999997 47.97965520000008),PUGET SOUND ENERGY INC,5.302997e+10
2,5YJ3E1EA0L,142,533,44,98290.0,2020,TESLA,71,Battery Electric Vehicle (BEV),Clean Alternative Fuel Vehicle Eligible,266.0,0.0,44.0,112552366,POINT (-122.09150499999998 47.91555500000004),PUGET SOUND ENERGY INC,5.306105e+10
3,5YJ3E1EBXL,73,510,44,98134.0,2020,TESLA,71,Battery Electric Vehicle (BEV),Clean Alternative Fuel Vehicle Eligible,322.0,0.0,11.0,6336319,POINT (-122.32981499999994 47.579810000000066),CITY OF SEATTLE - (WA)|CITY OF TACOMA - (WA),5.303301e+10
4,5YJSA1CP0D,142,157,44,98020.0,2013,TESLA,72,Battery Electric Vehicle (BEV),Clean Alternative Fuel Vehicle Eligible,208.0,69900.0,21.0,186212960,POINT (-122.37507 47.80807000000004),PUGET SOUND ENERGY INC,5.306105e+10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
135033,5YJSA1E29L,73,648,44,98004.0,2020,TESLA,72,Battery Electric Vehicle (BEV),Clean Alternative Fuel Vehicle Eligible,330.0,0.0,48.0,124776627,POINT (-122.20190499999995 47.61385000000007),PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA),5.303302e+10
135034,5YJYGDEE7M,73,67,44,98168.0,2021,TESLA,74,Battery Electric Vehicle (BEV),Eligibility unknown as battery range has not b...,0.0,0.0,33.0,142857676,POINT (-122.28645999999998 47.47613000000007),CITY OF SEATTLE - (WA)|CITY OF TACOMA - (WA),5.303303e+10
135035,5YJSA1E51N,112,208,44,98335.0,2022,TESLA,72,Battery Electric Vehicle (BEV),Eligibility unknown as battery range has not b...,0.0,0.0,26.0,220157724,POINT (-122.58354539999999 47.32344880000005),BONNEVILLE POWER ADMINISTRATION||CITY OF TACOM...,5.305307e+10
135036,KM8KMDAF3P,53,169,44,98823.0,2023,HYUNDAI,63,Battery Electric Vehicle (BEV),Eligibility unknown as battery range has not b...,0.0,0.0,13.0,223872596,POINT (-119.55512999999996 47.319360000000074),PUD NO 2 OF GRANT COUNTY,5.302501e+10


In [77]:
#Battery Electric Vehicle (BEV)
#df[(df == 'banana').any(axis=1)]
electric_vehicle_type = electric_vehicles[( electric_vehicles['Electric Vehicle Type'] == 'Battery Electric Vehicle (BEV)')]

In [78]:
plug_in_hybrid_electric_vehicle = electric_vehicles[( electric_vehicles['Electric Vehicle Type'] == 'Plug-in Hybrid Electric Vehicle (PHEV)')]

In [79]:
electric_vehicles["Electric Vehicle Type"].value_counts()

Electric Vehicle Type
Battery Electric Vehicle (BEV)            103882
Plug-in Hybrid Electric Vehicle (PHEV)     31156
Name: count, dtype: int64

In [80]:
electric_vehicle_type_mapping = {type:idx for idx, type in enumerate(np.unique(electric_vehicles['Electric Vehicle Type']))}
#electric_vehicles

In [81]:
electric_vehicle_type_01 = electric_vehicles['Electric Vehicle Type'].map(electric_vehicle_type_mapping)

In [82]:
electric_vehicles['Electric Vehicle Type'] = electric_vehicle_type_01

In [83]:
electric_vehicles

Unnamed: 0,VIN (1-10),County,City,State,Postal Code,Model Year,Make,Model,Electric Vehicle Type,Clean Alternative Fuel Vehicle (CAFV) Eligibility,Electric Range,Base MSRP,Legislative District,DOL Vehicle ID,Vehicle Location,Electric Utility,2020 Census Tract
0,5YJ3E1EA0K,155,592,44,98512.0,2019,TESLA,71,0,Clean Alternative Fuel Vehicle Eligible,220.0,0.0,22.0,242565116,POINT (-122.91310169999997 47.01359260000004),PUGET SOUND ENERGY INC,5.306701e+10
1,1N4BZ1DV4N,67,107,44,98236.0,2022,NISSAN,69,0,Eligibility unknown as battery range has not b...,0.0,0.0,10.0,183272785,POINT (-122.35936399999997 47.97965520000008),PUGET SOUND ENERGY INC,5.302997e+10
2,5YJ3E1EA0L,142,533,44,98290.0,2020,TESLA,71,0,Clean Alternative Fuel Vehicle Eligible,266.0,0.0,44.0,112552366,POINT (-122.09150499999998 47.91555500000004),PUGET SOUND ENERGY INC,5.306105e+10
3,5YJ3E1EBXL,73,510,44,98134.0,2020,TESLA,71,0,Clean Alternative Fuel Vehicle Eligible,322.0,0.0,11.0,6336319,POINT (-122.32981499999994 47.579810000000066),CITY OF SEATTLE - (WA)|CITY OF TACOMA - (WA),5.303301e+10
4,5YJSA1CP0D,142,157,44,98020.0,2013,TESLA,72,0,Clean Alternative Fuel Vehicle Eligible,208.0,69900.0,21.0,186212960,POINT (-122.37507 47.80807000000004),PUGET SOUND ENERGY INC,5.306105e+10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
135033,5YJSA1E29L,73,648,44,98004.0,2020,TESLA,72,0,Clean Alternative Fuel Vehicle Eligible,330.0,0.0,48.0,124776627,POINT (-122.20190499999995 47.61385000000007),PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA),5.303302e+10
135034,5YJYGDEE7M,73,67,44,98168.0,2021,TESLA,74,0,Eligibility unknown as battery range has not b...,0.0,0.0,33.0,142857676,POINT (-122.28645999999998 47.47613000000007),CITY OF SEATTLE - (WA)|CITY OF TACOMA - (WA),5.303303e+10
135035,5YJSA1E51N,112,208,44,98335.0,2022,TESLA,72,0,Eligibility unknown as battery range has not b...,0.0,0.0,26.0,220157724,POINT (-122.58354539999999 47.32344880000005),BONNEVILLE POWER ADMINISTRATION||CITY OF TACOM...,5.305307e+10
135036,KM8KMDAF3P,53,169,44,98823.0,2023,HYUNDAI,63,0,Eligibility unknown as battery range has not b...,0.0,0.0,13.0,223872596,POINT (-119.55512999999996 47.319360000000074),PUD NO 2 OF GRANT COUNTY,5.302501e+10


In [84]:
electric_vehicle_type["Electric Range"] = electric_vehicle_type["Electric Range"].mask(electric_vehicle_type["Electric Range"].eq(0.0), electric_vehicle_type["Electric Range"].mean())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  electric_vehicle_type["Electric Range"] = electric_vehicle_type["Electric Range"].mask(electric_vehicle_type["Electric Range"].eq(0.0), electric_vehicle_type["Electric Range"].mean())


In [85]:
plug_in_hybrid_electric_vehicle["Electric Range"] = plug_in_hybrid_electric_vehicle["Electric Range"].mask(plug_in_hybrid_electric_vehicle["Electric Range"].eq(0.0), plug_in_hybrid_electric_vehicle["Electric Range"].mean())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  plug_in_hybrid_electric_vehicle["Electric Range"] = plug_in_hybrid_electric_vehicle["Electric Range"].mask(plug_in_hybrid_electric_vehicle["Electric Range"].eq(0.0), plug_in_hybrid_electric_vehicle["Electric Range"].mean())


In [86]:
#new_electric_vehicles = [electric_vehicle_type, plug_in_hybrid_electric_vehicle]
#new_electric_vehicles.appned(electric_vehicle_type)plug_in_hybrid_electric_vehicle
#new_electric_vehicles.append(plug_in_hybrid_electric_vehicle)

#frames = [df1, df2, df3]

electric_vehicles = pd.concat([electric_vehicle_type, plug_in_hybrid_electric_vehicle])

In [87]:
electric_vehicles

Unnamed: 0,VIN (1-10),County,City,State,Postal Code,Model Year,Make,Model,Electric Vehicle Type,Clean Alternative Fuel Vehicle (CAFV) Eligibility,Electric Range,Base MSRP,Legislative District,DOL Vehicle ID,Vehicle Location,Electric Utility,2020 Census Tract
0,5YJ3E1EA0K,155,592,44,98512.0,2019,TESLA,71,Battery Electric Vehicle (BEV),Clean Alternative Fuel Vehicle Eligible,220.000000,0.0,22.0,242565116,POINT (-122.91310169999997 47.01359260000004),PUGET SOUND ENERGY INC,5.306701e+10
1,1N4BZ1DV4N,67,107,44,98236.0,2022,NISSAN,69,Battery Electric Vehicle (BEV),Eligibility unknown as battery range has not b...,87.780838,0.0,10.0,183272785,POINT (-122.35936399999997 47.97965520000008),PUGET SOUND ENERGY INC,5.302997e+10
2,5YJ3E1EA0L,142,533,44,98290.0,2020,TESLA,71,Battery Electric Vehicle (BEV),Clean Alternative Fuel Vehicle Eligible,266.000000,0.0,44.0,112552366,POINT (-122.09150499999998 47.91555500000004),PUGET SOUND ENERGY INC,5.306105e+10
3,5YJ3E1EBXL,73,510,44,98134.0,2020,TESLA,71,Battery Electric Vehicle (BEV),Clean Alternative Fuel Vehicle Eligible,322.000000,0.0,11.0,6336319,POINT (-122.32981499999994 47.579810000000066),CITY OF SEATTLE - (WA)|CITY OF TACOMA - (WA),5.303301e+10
4,5YJSA1CP0D,142,157,44,98020.0,2013,TESLA,72,Battery Electric Vehicle (BEV),Clean Alternative Fuel Vehicle Eligible,208.000000,69900.0,21.0,186212960,POINT (-122.37507 47.80807000000004),PUGET SOUND ENERGY INC,5.306105e+10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
135025,1G1RA6E46D,73,464,44,98056.0,2013,CHEVROLET,117,Plug-in Hybrid Electric Vehicle (PHEV),Clean Alternative Fuel Vehicle Eligible,38.000000,0.0,11.0,231205173,POINT (-122.18050499999998 47.50005500000003),PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA),5.303303e+10
135027,JA4J24A54L,155,401,44,98502.0,2020,MITSUBISHI,79,Plug-in Hybrid Electric Vehicle (PHEV),Not eligible due to low battery range,22.000000,0.0,35.0,100639514,POINT (-122.92145 47.04593500000004),PUGET SOUND ENERGY INC,5.306701e+10
135029,1G1RD6S56H,25,606,44,98682.0,2017,CHEVROLET,117,Plug-in Hybrid Electric Vehicle (PHEV),Clean Alternative Fuel Vehicle Eligible,53.000000,0.0,17.0,140190424,POINT (-122.51464729999998 45.67862000000008),BONNEVILLE POWER ADMINISTRATION||PUD NO 1 OF C...,5.301104e+10
135030,1C4JJXN68P,73,509,44,98148.0,2023,JEEP,119,Plug-in Hybrid Electric Vehicle (PHEV),Not eligible due to low battery range,21.000000,0.0,33.0,235938776,POINT (-122.32806 47.46155),PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA),5.303303e+10


In [88]:
def create_electric_range_category(df, column='Electric Range'):
    def categorize_range(electric_range):
        if electric_range == 0:
            return 0.0
        elif electric_range < 50:
            return 1.0 #very Short
        elif 50 <= electric_range <= 100:
            return 3.0 #very Short
        elif 100 < electric_range <= 300:
            return 4.0 #"Medium"
        elif 300 < electric_range:
            return 5.0 #long
        else:
            return -1.0

    df['Electric_Range_Category'] = df[column].apply(categorize_range)
    return df

electric_vehicles = create_electric_range_category(electric_vehicles, column='Electric Range')



In [89]:
#electric_vehicles.dropna(subset=["Electric Range"])
electric_vehicles = electric_vehicles[electric_vehicles['Electric Range'].notna()]

In [90]:
electric_vehicles['Electric Range'].isna().sum()

0

In [92]:
#electric_vehicles['Model Year'] = pd.to_numeric(electric_vehicles['Model Year'], errors="ignore") 

#print(electric_vehicles.dtypes)

#assenger_attributes.drop(columns = ["PassengerId", "Name", "Cabin"])
X = electric_vehicles[['Base MSRP', 'City', 'State', 'County', 'Model', 'Electric Range']]
#X = electric_vehicles.drop(columns = ['Electric Range'])

y = electric_vehicles['Electric_Range_Category']
#scaler = MinMaxScaler()
#y = scaler.fit_transform(electric_vehicles['Electric_Range_Category'])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42434)

linear_model = LogisticRegression(C = 1)
#model = LogisticRegression(C = 10e9)
linear_model.fit(X_train, y_train)

y_pred = linear_model.predict(X_test)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [93]:
linear_model.score(X_train,y_train)

0.9214176143877281

In [94]:
linear_model.score(X_test,y_test)

0.9190116508688784

In [22]:
mlflow.log_artifact("data/Electric_Vehicle_Population_Data.csv")

In [17]:
#Parameters
mlflow.log_param("C", 1)
mlflow.log_param("train_test_split", False)

# Metics
for train_step in range(100):
    mlflow.log_metric("test_metrics", train_step **2, step = train_step) # accuracy


In [345]:
mlflow.end_run()

MlflowException: The run 018bf706d180475d87a085fce63d5db3 must be in 'active' lifecycle_stage.