In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_absolute_error, r2_score


In [5]:
# Load the dataset
data = pd.read_csv('data.csv')
data.head()

Unnamed: 0,Brand,Model,AccelSec,TopSpeed_KmH,Range_Km,Efficiency_WhKm,FastCharge_KmH,RapidCharge,PowerTrain,PlugType,BodyStyle,Segment,Seats,PriceEuro
0,Tesla,Model 3 Long Range Dual Motor,4.6,233,450,161,940,Yes,AWD,Type 2 CCS,Sedan,D,5,55480
1,Volkswagen,ID.3 Pure,10.0,160,270,167,250,Yes,RWD,Type 2 CCS,Hatchback,C,5,30000
2,Polestar,2,4.7,210,400,181,620,Yes,AWD,Type 2 CCS,Liftback,D,5,56440
3,BMW,iX3,6.8,180,360,206,560,Yes,RWD,Type 2 CCS,SUV,D,5,68040
4,Honda,e,9.5,145,170,168,190,Yes,RWD,Type 2 CCS,Hatchback,B,4,32997


In [6]:
print(data['Brand'].unique())
print(data['RapidCharge'].unique())
print(data['PowerTrain'].unique())
print(data['PlugType'].unique())
print(data['BodyStyle'].unique())
print(data['Segment'].unique())


['Tesla ' 'Volkswagen ' 'Polestar ' 'BMW ' 'Honda ' 'Lucid ' 'Peugeot '
 'Audi ' 'Mercedes ' 'Nissan ' 'Hyundai ' 'Porsche ' 'MG ' 'Mini ' 'Opel '
 'Skoda ' 'Volvo ' 'Kia ' 'Renault ' 'Mazda ' 'Lexus ' 'CUPRA ' 'SEAT '
 'Lightyear ' 'Aiways ' 'DS ' 'Citroen ' 'Jaguar ' 'Ford ' 'Byton '
 'Sono ' 'Smart ' 'Fiat ']
['Yes' 'No']
['AWD' 'RWD' 'FWD']
['Type 2 CCS' 'Type 2 CHAdeMO' 'Type 2' 'Type 1 CHAdeMO']
['Sedan' 'Hatchback' 'Liftback' 'SUV' 'Pickup' 'MPV' 'Cabrio' 'SPV'
 'Station']
['D' 'C' 'B' 'F' 'A' 'E' 'N' 'S']


In [7]:
# Encoding dictionaries
brand_encoding = {
    'Tesla': 0, 'Volkswagen': 1, 'Polestar': 2, 'BMW': 3, 'Honda': 4,
    'Lucid': 5, 'Peugeot': 6, 'Audi': 7, 'Mercedes': 8, 'Nissan': 9,
    'Hyundai': 10, 'Porsche': 11, 'MG': 12, 'Mini': 13, 'Opel': 14,
    'Skoda': 15, 'Volvo': 16, 'Kia': 17, 'Renault': 18, 'Mazda': 19,
    'Lexus': 20, 'CUPRA': 21, 'SEAT': 22, 'Lightyear': 23, 'Aiways': 24,
    'DS': 25, 'Citroen': 26, 'Jaguar': 27, 'Ford': 28, 'Byton': 29,
    'Sono': 30, 'Smart': 31, 'Fiat': 32
}

rapid_charge_encoding = {'Yes': 1, 'No': 0}

powertrain_encoding = {'AWD': 0, 'RWD': 1, 'FWD': 2}

plug_type_encoding = {
    'Type 2 CCS': 0, 'Type 2 CHAdeMO': 1, 'Type 2': 2, 'Type 1 CHAdeMO': 3
}

body_style_encoding = {
    'Sedan': 0, 'Hatchback': 1, 'Liftback': 2, 'SUV': 3, 'Pickup': 4,
    'MPV': 5, 'Cabrio': 6, 'SPV': 7, 'Station': 8
}

segment_encoding = {
    'D': 0, 'C': 1, 'B': 2, 'F': 3, 'A': 4, 'E': 5, 'N': 6, 'S': 7
}

# Encoding the data
data['Brand'] = data['Brand'].str.strip().map(brand_encoding)
data['RapidCharge'] = data['RapidCharge'].map(rapid_charge_encoding)
data['PowerTrain'] = data['PowerTrain'].map(powertrain_encoding)
data['PlugType'] = data['PlugType'].map(plug_type_encoding)
data['BodyStyle'] = data['BodyStyle'].map(body_style_encoding)
data['Segment'] = data['Segment'].map(segment_encoding)

# Display the encoded data
data.head()

Unnamed: 0,Brand,Model,AccelSec,TopSpeed_KmH,Range_Km,Efficiency_WhKm,FastCharge_KmH,RapidCharge,PowerTrain,PlugType,BodyStyle,Segment,Seats,PriceEuro
0,0,Model 3 Long Range Dual Motor,4.6,233,450,161,940,1,0,0,0,0,5,55480
1,1,ID.3 Pure,10.0,160,270,167,250,1,1,0,1,1,5,30000
2,2,2,4.7,210,400,181,620,1,0,0,2,0,5,56440
3,3,iX3,6.8,180,360,206,560,1,1,0,3,0,5,68040
4,4,e,9.5,145,170,168,190,1,1,0,1,2,4,32997


In [8]:
data=data.drop('Model',axis=1)

In [9]:

# Define features and target
X = data.drop('PriceEuro', axis=1)
y = data['PriceEuro']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
# Initialize the Decision Tree Regressor
regressor = DecisionTreeRegressor(random_state=42)

# Train the model
regressor.fit(X_train, y_train)


In [11]:
# Make predictions
y_pred = regressor.predict(X_test)

# Evaluate the model
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Absolute Error: {mae}")
print(f"R² Score: {r2}")


Mean Absolute Error: 5428.142857142857
R² Score: 0.92543504982586


In [12]:
import joblib
joblib.dump(regressor, 'decision_tree_model.joblib')


['decision_tree_model.joblib']

In [13]:
model = joblib.load('decision_tree_model.joblib')
user_input = {
    'Brand': 'Tesla',
    'RapidCharge': 'Yes',
    'PowerTrain': 'AWD',
    'PlugType': 'Type 2 CCS',
    'BodyStyle': 'Sedan',
    'Segment': 'D',
    # Include other necessary features with their default or user-provided values
    'AccelSec': 3.1,            # Example value
    'TopSpeed_KmH': 250,        # Example value
    'Range_Km': 500,            # Example value
    'Efficiency_WhKm': 160,     # Example value
    'FastCharge_KmH': 800,      # Example value
    'Seats': 5                  # Example value
}

In [14]:
encoded_input = [
    brand_encoding[user_input['Brand']],
    rapid_charge_encoding[user_input['RapidCharge']],
    powertrain_encoding[user_input['PowerTrain']],
    plug_type_encoding[user_input['PlugType']],
    body_style_encoding[user_input['BodyStyle']],
    segment_encoding[user_input['Segment']],
    user_input['AccelSec'],
    user_input['TopSpeed_KmH'],
    user_input['Range_Km'],
    user_input['Efficiency_WhKm'],
    user_input['FastCharge_KmH'],
    user_input['Seats']
]

In [15]:
import numpy as np
# Reshape the input to match the model's expected input shape
encoded_input = np.array(encoded_input).reshape(1, -1)

# Predict the price
predicted_price = model.predict(encoded_input)

# Output the predicted price
print(f"Predicted Price: {predicted_price[0]:.2f} Euro")

Predicted Price: 65000.00 Euro


