<a href="https://colab.research.google.com/github/semih108/car-price-prediction/blob/main/pricePrediction_clean_train_predict3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import pandas as pd
import os

# Step 1: Define the path where your CSV files are stored
csv_dir = '/content/drive/MyDrive/car_data/'

# Step 2: List all CSV files in the directory
csv_files = [f for f in os.listdir(csv_dir) if f.endswith('.csv')]

# Step 3: Load each CSV file into a DataFrame and append it to a list
data_frames = []
for file in csv_files:
    file_path = os.path.join(csv_dir, file)
    df = pd.read_csv(file_path)
    data_frames.append(df)

# Step 4: Concatenate all DataFrames into a single DataFrame
combined_df = pd.concat(data_frames, ignore_index=True)

# Step 5: Save the combined DataFrame to a new CSV file (optional)
combined_df.to_csv('/content/drive/MyDrive/combined_car_data.csv', index=False)


In [4]:
import pandas as pd
import re
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
import joblib
import json

# Load Data
data = pd.read_csv('/content/drive/MyDrive/combined_car_data.csv')

# Function to clean data
def clean_data(df):
    df['Price'] = df['Price'].str.replace(r'[^\d]', '', regex=True)
    df['Mileage'] = df['Mileage'].str.replace(r'[^\d]', '', regex=True)
    df['Fuel'] = df['Fuel'].replace({'Elektro/Benzin': 'Hybrid'})
    power = df['Power'].str.extract(r'(\d+) kW \((\d+) PS\)')
    df['Power_PS'] = power[1]
    df.drop(columns=['Power'], inplace=True)
    df['Price'] = pd.to_numeric(df['Price'], errors='coerce')
    df['Mileage'] = pd.to_numeric(df['Mileage'], errors='coerce')
    df['Power_PS'] = pd.to_numeric(df['Power_PS'], errors='coerce')
    df['Date'] = pd.to_datetime(df['Date'].apply(lambda x: x if re.match(r'\d{2}/\d{4}', x) else None), format='%m/%Y', errors='coerce')
    data['Car_Age'] = data['Date'].dt.year
    data.drop(columns=['Date'], inplace=True)
    # Extract subType using the API data
    #df['subType'] = df.apply(lambda x: find_subtype(x['Name'].lower(), x['Make'].lower(), x['Model'].lower(), all_model_details), axis=1)

    # Impute missing values for subType using a placeholder
    #df['subType'] = df['subType'].fillna('unknown')
    return df

data = clean_data(data)

# Save cleaned data to a CSV file
cleaned_data_path = '/content/drive/MyDrive/cleaned_car_data.csv'
data.to_csv(cleaned_data_path, index=False)
print(f'Cleaned data saved to {cleaned_data_path}')


Cleaned data saved to /content/drive/MyDrive/cleaned_car_data.csv


In [6]:
bmw_models = {
    '1er': ['116i', '118i', '120i', '118d', '120d', '130i', '125i', '135i', '123d',
            '116d', 'M', '114i', 'M135i', '114d', '125d', 'M140i', '128ti'],
    '2er': ['216d Gran Coupe', '220i', 'M235i', '220d', '228i', '218d', '225d',
            '218i Active Tourer', '220i Active Tourer', '225i Active Tourer',
            '216d Active Tourer', '218d Active Tourer', '220d Active Tourer',
            '218i', 'M2', '216i Active Tourer', '216i Gran Tourer', '218i Gran Tourer',
            '220i Gran Tourer', '214d Active Tourer', '214d Gran Tourer', '216d Gran Tourer',
            '218d Gran Tourer', '220d Gran Tourer', '225xe Active Tourer', '230i',
            'M240i', '218i Gran Coupe', 'M235i Gran Coupe', '220d Gran Coupe',
            '220i Gran Coupe', '218d Gran Coupe', '223i Active Tourer', '223d Active Tourer',
            '225e Active Tourer', '230e Active Tourer','M2'],
    '3er': ['325xi', '330i', '330xi', '318d', '320d', '320td', '330d', '330xd',
            '318Ci', '320Ci', '325Ci', '330Ci', 'M3', '320Cd', '330Cd', '316i', '318i',
            '320i', '325i', '316g', '316ti', '318ti', '325ti', '318td', '335i', '335d',
            '325d', '320si', '316d', '328i', 'ActiveHybrid', '320i GT', '328i GT',
            '335i GT', '318d GT', '320d GT', '325d GT', '330d GT', '335d GT', '340i',
            '330e', '330i GT', '340i GT', 'M340i', 'M340d', '320e','M3'],
    '4er': ['420i', '428i', '435i', '420d', '430d', '435d', 'M4', '420i Gran Coupe',
            '428i Gran Coupe', '435i Gran Coupe', '418d Gran Coupe', '420d Gran Coupe',
            '425d', '430d Gran Coupe', '435d Gran Coupe', '440i', '418d', '430i',
            '430i Gran Coupe', '440i Gran Coupe', '425d Gran Coupe', 'M440i', 'M440d',
            '430i Gran Coupe', '430d Gran Coupe', '420d Gran Coupe'],
    '5er': ['518', '520', '525', '520i', '528', '528i', '525i', '525e', '524td',
            '518i', '535i', 'M535i', '524d', '530i', 'M5', '525ix', '525tds', '525td',
            '540i', '518g', '523i', '530d', '520d', '525d', '545i', '535d', '550i',
            '535i GT', '550i GT', '530d GT', '535d GT', 'ActiveHybrid5', 'M550d', '520d GT',
            '518d', 'M550i', '540d', '530e', '545e', '520e','M5'],
    '6er': ['633 CSi', '635 CSi', '628 CSi', 'M635 CSi', '645 CI', '630i', '650 CI',
            'M6', '635d', '640i', '650i', '640d', '640i Gran Coupe', '650i Gran Coupe',
            '640d Gran Coupe', 'M6 Gran Coupe', '630i GT', '640i GT', '630d GT', '640d GT',
            '620d GT'],
    '7er': ['728', '730', '733i', '728i', '732i', '735i', '745i', '730i', '750i',
            '740i', '725tds', '730d', '740d', '760i', '745d', 'ActiveHybrid7', '750d',
            '740e', 'M760Li', '745e', '750e', 'M760e'],
    '8er': ['850 i', '850 Ci', '850 CSi', '840 Ci', 'M850i', '840d', '840i', 'M8',
            '840i Gran Coupe', 'M850i Gran Coupe', '840d Gran Coupe'],
    'i3': ['i3', 'i3s'],
    'i4': ['eDrive40 Gran Coupe', 'M50 Gran Coupe', 'eDrive35 Gran Coupe','i4'],
    'i7': ['60'],
    'i8': ['i8'],
    'iX': ['iX 40', 'iX 50', 'M60'],
    'iX1': ['xDrive 30','ix1'],
    'iX3': ['electric drive 210 kW','ix3'],
    'iX1': ['xDrive 30', 'ix1'],
    'iX3': ['electric drive 210 kW', 'ix3'],
    'X1': ['xDrive 28i', 'sDrive 18d', 'sDrive 20d', 'xDrive 18d', 'xDrive 20d',
           'xDrive 23d', 'sDrive 18i', 'xDrive 25i', 'sDrive 20i', 'xDrive 20i',
           'sDrive 16d', 'xDrive 25d', 'xDrive 25e', 'xDrive 23i', 'xDrive 30e'],
    'X2': ['sDrive 20i', 'xDrive 20d', 'xDrive 25d', 'sDrive 18i', 'xDrive 20i',
           'sDrive 18d', 'sDrive 20d', 'xDrive 18d', 'M35i', 'xDrive 25e'],
    'X3': ['xDrive 20d', 'xDrive 30d', 'xDrive 40i', 'xDrive 30e', 'M40i', 'M40d'],
    'X4': ['xDrive 20i', 'xDrive 30i', 'xDrive 20d', 'xDrive 30d', 'M40i'],
    'X5': ['xDrive 40i', 'xDrive 45e', 'xDrive 30d', 'xDrive 40d', 'M50d', 'M50i'],
    'X6': ['xDrive 40i', 'xDrive 30d', 'xDrive 40d', 'M50i'],
    'X7': ['xDrive 40i', 'xDrive 40d', 'M50i'],
    'XM': ['XM'],
    'Z1': ['2.5i'],
    'Z3': ['1.8i', '1.9i', '2.0i', '2.8i', '3.0i', 'M'],
    'Z4': ['sDrive 20i', 'sDrive 30i', 'M40i'],
    'Z8': ['4.9i']
}


# Example structure for all_model_details
all_model_details = {
    'audi': {
        '1.3': {},
        '1.6 Diesel': {},
        '1.6': {},
        '1.8': {},
        '1.8 S': {},
        '1.8 Turbo': {},
        '1.8 TFSI': {},
        '1.9': {},
        '1.9 E': {},
        '1.9 Diesel': {},
        '1.9 TDI': {},
        '1.9 TDIe': {},
        '2.0': {},
        '4.2': {},
        '2.0 Diesel': {},
        '2.0 FSI': {},
        '2.0 TDI': {},
        '2.0 TDI clean diesel': {},
        '2.0 TFSI': {},
        '2.0 TFSI E85': {},
        '2.0 TFSI g-tron': {},
        '2.0 TFSI Hybrid': {},
        '2.2': {},
        '2.2 Turbo': {},
        '2.2 ECE': {},
        '2.3': {},
        '2.4 Diesel': {},
        '2.5 TDI': {},
        '2.5 TFSI': {},
        '2.5 Turbodiesel': {},
        '2.6 V6': {},
        '2.7 V6': {},
        '2.7 V6 TDI': {},
        '2.8 V6': {},
        '2.9 V6 TFSI': {},
        '2.9 TFSI': {},
        '3.0 V6': {},
        '3.0 V6 TDI': {},
        '3.0 V6 TDI clean diesel': {},
        '3.0 V6 TFSI': {},
        '3.0 TDI': {},
        '3.0 TDI e-tron': {},
        '3.0 TFSI': {},
        '3.2 V6': {},
        '3.2 V6 FSI': {},
        '3.6 VR6 FSI': {},
        '4.0 TDI': {},
        '4.0 TFSI': {},
        '4.0 V8': {},
        '4.0 V8 TDI': {},
        '4.0 V8 TFSI': {},
        '4.2 S4 V8': {},
        '4.2 V8': {},
        '4.2 V8 FSI': {},
        '4.2 V8 TDI': {},
        '5.0 V10 FSI': {},
        '5.2 V10 FSI': {},
        '5.2 TFSI': {},
        '5.2 FSI': {},
        '5.2': {},
        '6.0 V12 TDI': {},
        'Cooper': {},
        'Cooper S': {},
        'Cooper SD': {},
        'John Cooper Works': {},
        'S4': {},
        'e-tron': {},
        'e-tron RS': {},
        'e-tron S': {},
        'e-tron35': {},
        'e-tron40': {},
        'e-tron45': {},
        'e-tron50': {},
        'e-tron55': {},
        'v6': {},
        'v8': {},
        'w12': {},
        '1.0 TFSI': {},
        '1.4 TDI': {},
        '1.4 TFSI': {},
        '1.4 TFSI ACT': {},
        '1.4 TFSI e-tron': {},
        '1.4 TGI': {},
        '1.4 TGI g-Tron': {},
        '1.5 TFSI': {},
        '1.5 TSI ACT': {},
        '1.6 TDI': {},
        '1.6 TDIe': {},
        '1.6 Turbodiesel': {},
        '2.0 TDI clean diesel': {},
        '25 TFSI': {},
        '30 g-tron': {},
        '30 TDI': {},
        '30 TFSI': {},
        '35 TDI': {},
        '35 TFSI': {},
        '40 e-tron': {},
        '40 g-tron': {},
        '40 TDI': {},
        '40 TFS': {},
        '40 TFSI': {},
        '40 TFSIe': {},
        '45 TDI': {},
        '45 TFSIe': {},
        '45 TFSI': {},
        '50 TDI': {},
        '50 TFSIe': {},
        '50 e-tron': {},
        '55 TFSI': {},
        '55 TFSIe': {},
        '55 e-tron': {},
        '60 TFSIe': {},
        '3.0tdi': {},
        '2.0tdi': {},
        '1.8tfsi': {},
        '30tdi': {},
        '35tdi': {},
        '40tdi': {},
        '40gtron': {},
        '50tdi': {},
        '55tfsi': {},
        '1.0tfsi': {},
        '1.4tfsi': {},
        '1.6tdi': {},
        '1.9tdi': {},
        '1.5tfsi': {},
        '2.7tdi': {},
        '2.0tfsi': {},
        '3.0tfsi': {},
        '4.2fsi': {},
        '4.2tdi': {},
        '40tfsi': {}
    },

    'volkswagen': {
        '2.0 TDI': {},
        '3.0 V6 TDI': {},
        '3.0 TDI': {},
        '1.5 TSI': {},
        '2.0 TSI': {},
        '1.4 eHybrid': {},
        '1.2 TSI': {},
        '1.2': {},
        '1.4 TSI': {},
        '1.6 TDI': {},
        '1.4': {},
        '1.6': {},
        '1.0': {},
        '1.6 FSI': {},
        '1.8': {},
        '1.8 Turbo': {},
        '2.0': {},
        '2.3 V5': {},
        '2.8 V6': {},
        '1.9 TDI': {},
        '1.9 SDI': {},
        '2.0 SDI': {},
        '2.0 EcoFuel': {},
        '1.6 BIFuel': {},
        '1.0 TSI': {},
        '1.4 TGI': {},
        '1.5 TSI ACT': {},
        '2.0 TFSI': {},
        '3.6 V6 FSI': {},
        'electric drive 100 kW': {},
        '0.9': {},
        '1.1': {},
        '1.3': {},
        '1.05': {},
        '2.9 VR6': {},
        '1.8 TSI': {},
        '1.5 TGI': {},
        'electric drive': {},
        '1.4 GTE': {},
        '2.0 16V': {},
        '2.5 TDI DPF': {},
        '2.0 TSI / TFSI': {},
        '3.6 V6': {},
        '1.4 TDI': {},
        '1.3 Diesel': {},
        '1.7 SDI': {},
        '2.4 Diesel': {},
        '2.5 Diesel': {},
        '2.8 TDI': {},
        '1.0 eTSI': {},
        '2.0 TDI clean diesel': {},
        '3.2 V6': {},
        '4.0 W8': {},
        '1.4 TSI EcoFuel': {},
        '1.4 TSI Hybrid': {},
        '4.0 TDI': {},
        '1.4 Diesel': {},
        '1.0 TGI': {},
        '1.4 TSI ACT': {},
        '1.0 TFSI': {},
        '1.4 TFSI': {},
        '1.4 TFSI ACT': {},
        '1.5 TFSI': {},
        '1.6 TDIe': {},
        '2.0 BiFuel': {},
        '1.2 TDI': {},
        '1.6 Turbodiesel': {},
        '2.2': {},
        '1.8 Turbo': {},
        '2.5 V6 TDI': {},
        'V6 TDI': {},
        'TDI': {},
        '4.2 FSI': {},
        '1.0 MPI': {},
        '1.2 MPI': {},
        '1.4 MPI': {},
        '1.6 MPI': {},
        '1.6 TDI CR': {},
        '2.0 TDI CR': {},
        '2.0 BiTDI': {},
        '2.5 FSI': {},
        '3.0 TSI': {},
        '4.2 V8': {},
        '4.9 V8': {},
        '5.0 V10 TDI': {},
        '6.0 W12': {}
    },

    'bmw': bmw_models

}

bmw_model = {
    '1': '1er',
    '2': '2er',
    '3': '3er',
    '4': '4er',
    '5': '5er',
    '6': '6er',
    '7': '7er',
    '8': '8er',
    'i3': 'i3',
    'i4': 'i4',
    'i7': 'i7',
    'i8': 'i8',
    'iX': 'iX',
    'iX1': 'iX1',
    'iX3': 'iX3',
    'X1': 'X1',
    'X2': 'X2',
    'X3': 'X3',
    'X4': 'X4',
    'X5': 'X5',
    'X6': 'X6',
    'X7': 'X7',
    'XM': 'XM',
    'Z1': 'Z1',
    'Z3': 'Z3',
    'Z4': 'Z4',
    'Z8': 'Z8'
}
mercedes_model = {
    '123': '123',
    '190': '190/190 E',
    'A': 'A-Klasse',
    'A-Limousine': 'A-Klasse Limousine',
    'AMG GT': 'AMG GT',
    'AMG GT 4-door': 'AMG GT 4-door',
    'B': 'B-Klasse',
    'C': 'C-Klasse',
    'C-All-Terrain': 'C-Klasse All-Terrain',
    'Citan': 'Citan',
    'CL': 'CL-Coupe',
    'CLA': 'CLA-Klasse',
    'CLC': 'CLC-Klasse',
    'CLK': 'CLK-Klasse',
    'CLS': 'CLS-Klasse',
    'E': 'E-Klasse',
    'E-All-Terrain': 'E-Klasse All-Terrain',
    'EQA': 'EQA',
    'EQB': 'EQB',
    'EQC': 'EQC',
    'EQE': 'EQE',
    'EQS': 'EQS',
    'EQV': 'EQV',
    'G': 'G-Klasse',
    'GL': 'GL-Klasse',
    'GLA': 'GLA-Klasse',
    'GLB': 'GLB-Klasse',
    'GLC': 'GLC-Klasse',
    'GLE': 'GLE-Klasse',
    'GLK': 'GLK-Klasse',
    'GLS': 'GLS-Klasse',
    'M': 'M-Klasse',
    'ML': 'M-Klasse',
    'MB': 'MB 100',
    'R': 'R-Klasse',
    'S': 'S-Klasse',
    'SL': 'SL-Klasse',
    'SLC': 'SLC-Klasse',
    'SLK': 'SLK-Klasse',
    'SLR': 'SLR McLaren',
    'SLS': 'SLS AMG',
    'Sprinter': 'Sprinter',
    'Strich Acht': 'Strich Acht',
    'T1': 'T1 Transporter',
    'T': 'T-Klasse',
    'V': 'V-Klasse',
    'V-Marco Polo': 'V-Klasse Marco Polo',
    'Vaneo': 'Vaneo',
    'Viano': 'Viano',
    'Vito': 'Vito',
    'Vito Tourer': 'Vito Tourer',
    'X': 'X-Klasse'
}

In [8]:
import re
import pandas as pd
import numpy as np

def find_subtype(name, make, model, all_model_details):
    name_lower = name.lower().replace(" ", "").replace(",", ".")

    if make.lower() in ['audi', 'volkswagen']:
        subtypes = all_model_details[make.lower()].keys()
        matching_subtype = None
        for subtype in subtypes:
            subtype_lower = subtype.lower().replace(" ", "").replace(",", ".")
            if subtype_lower in name_lower:
                # Keep the longest matching subtype
                if matching_subtype is None or len(subtype_lower) > len(matching_subtype):
                    matching_subtype = subtype_lower
        return matching_subtype

    elif make.lower() == 'bmw':
        subtypes = all_model_details[make.lower()].get(model, [])
        matching_subtype = None
        for subtype in subtypes:
            subtype_lower = subtype.lower().replace(" ", "").replace(",", ".")
            # Try to match the subtype as a substring
            if subtype_lower in name_lower:
                if matching_subtype is None or len(subtype_lower) > len(matching_subtype):
                    matching_subtype = subtype_lower
        # If no subtype found, fall back to using the model as the subtype
        if matching_subtype is None:
            #print(f"No subtype found for BMW {model}, falling back to model.")
            return model
        return matching_subtype
    return None


# Function to update Volkswagen Golf models based on year
def update_golf_model(row):
    if row['Make'].lower() == 'volkswagen' and 'golf' in row['Model'].lower():
        year = row['Car_Age']
        if 1974 <= year <= 1983:
            return 'Golf 1'
        elif 1983 <= year <= 1992:
            return 'Golf 2'
        elif 1991 <= year <= 1997:
            return 'Golf 3'
        elif 1997 <= year <= 2003:
            return 'Golf 4'
        elif 2003 <= year <= 2008:
            return 'Golf 5'
        elif 2008 <= year <= 2012:
            return 'Golf 6'
        elif 2012 <= year <= 2019:
            return 'Golf 7'
        elif 2019 <= year:
            return 'Golf 8'
    return row['Model']

# Load the data from the CSV file
file_path = '/content/drive/MyDrive/cleaned_car_data.csv'
data = pd.read_csv(file_path)

def determine_subtypeBmw(row):
    if row['Make'].lower() == 'bmw':
        first_char = row['Model'][0]
        return bmw_model.get(first_char, row['Model'])
    return row['Model']

def determine_subtypeMercedes(row):
    if row['Make'].lower() == 'mercedes-benz':
        first_char = row['Model'].split()[0]
        return mercedes_model.get(first_char, row['Model'])
    return row['Model']

# Update Volkswagen Golf models
data['Model'] = data.apply(update_golf_model, axis=1)
data['Model'] = data.apply(determine_subtypeBmw, axis=1)

# Apply the logic to set Detected_Subtype
data['Detected_Subtype'] = np.where(
    data['Make'].str.lower().isin(['mercedes-benz']),
    data['Model'],
    data.apply(lambda row: find_subtype(row['Name'], row['Make'], row['Model'], all_model_details), axis=1)
)
data['Model'] = data.apply(determine_subtypeMercedes, axis=1)


# Display the result
data.head(20)
data.drop(columns=['Name'], inplace=True)
data['Car_Age'] = 2024 - data['Car_Age'] # have to calculate it here because of golf model

# Save the cleaned data to a CSV file
improved_subtype = '/content/drive/MyDrive/cleaned_data_with_subtype.csv'
data.to_csv(improved_subtype, index=False)
print(f'Cleaned data saved to {improved_subtype}')


No subtype found for BMW 2er, falling back to model.
No subtype found for BMW 2er, falling back to model.
No subtype found for BMW 2er, falling back to model.
No subtype found for BMW 2er, falling back to model.
No subtype found for BMW 2er, falling back to model.
No subtype found for BMW 2er, falling back to model.
No subtype found for BMW 2er, falling back to model.
No subtype found for BMW 2er, falling back to model.
No subtype found for BMW 2er, falling back to model.
No subtype found for BMW 2er, falling back to model.
No subtype found for BMW 2er, falling back to model.
No subtype found for BMW 2er, falling back to model.
No subtype found for BMW 2er, falling back to model.
No subtype found for BMW 2er, falling back to model.
No subtype found for BMW 2er, falling back to model.
No subtype found for BMW 2er, falling back to model.
No subtype found for BMW 2er, falling back to model.
No subtype found for BMW 2er, falling back to model.
No subtype found for BMW 2er, falling back to 

In [9]:
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.impute import SimpleImputer

# Load the cleaned data into a DataFrame
data = pd.read_csv('/content/drive/MyDrive/cleaned_data_with_subtype.csv')

# Shuffle the data
data = data.sample(frac=1, random_state=42).reset_index(drop=True)

nan_sum = data.isna().sum()
print(nan_sum)

# Handle categorical data
le_transmission = LabelEncoder()
le_fuel = LabelEncoder()
le_make = LabelEncoder()
le_model = LabelEncoder()
le_subtype = LabelEncoder()

# Apply label encoding to categorical columns
data['Transmission'] = le_transmission.fit_transform(data['Transmission'].str.lower())
data['Fuel'] = le_fuel.fit_transform(data['Fuel'].str.lower())
data['Make'] = le_make.fit_transform(data['Make'].str.lower())
data['Model'] = le_model.fit_transform(data['Model'].str.lower())
data['Detected_Subtype'] = le_subtype.fit_transform(data['Detected_Subtype'].str.lower().fillna(''))

# Prepare features and target
X = data.drop(columns=['Price'])
y = data['Price']

imputer_x = SimpleImputer(strategy='median')
X = pd.DataFrame(imputer_x.fit_transform(X), columns=X.columns)

imputer_y = SimpleImputer(strategy='median')
y = pd.Series(imputer_y.fit_transform(y.values.reshape(-1, 1)).ravel(), name='Price')

# Save the feature names
feature_names = X.columns.tolist()

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate Model
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
print(f'Mean Absolute Error: {mae}')

# Save the model and label
directory = '/content/drive/MyDrive/models'
joblib.dump(feature_names, os.path.join(directory, 'feature_names.pkl'))
joblib.dump(model, os.path.join(directory, 'car_price_predictor.pkl'))
joblib.dump(le_transmission, os.path.join(directory, 'le_transmission.pkl'))
joblib.dump(le_fuel, os.path.join(directory, 'le_fuel.pkl'))
joblib.dump(le_make, os.path.join(directory, 'le_make.pkl'))
joblib.dump(le_model, os.path.join(directory, 'le_model.pkl'))
joblib.dump(le_subtype, os.path.join(directory, 'le_subtype.pkl'))


Make                   0
Model                  0
Price                718
Mileage               75
Transmission           0
Fuel                   0
Power_PS              13
Car_Age              591
Detected_Subtype    4174
dtype: int64
Mean Absolute Error: 5086.572007981325


['/content/drive/MyDrive/models/le_subtype.pkl']

In [11]:
import numpy as np
import joblib
import pandas as pd

# Load the model, label encoders, and feature names
model = joblib.load('/content/drive/MyDrive/models/car_price_predictor.pkl')
le_transmission = joblib.load('/content/drive/MyDrive/models/le_transmission.pkl')
le_fuel = joblib.load('/content/drive/MyDrive/models/le_fuel.pkl')
le_make = joblib.load('/content/drive/MyDrive/models/le_make.pkl')
le_model = joblib.load('/content/drive/MyDrive/models/le_model.pkl')
le_subtype = joblib.load('/content/drive/MyDrive/models/le_subtype.pkl')
feature_names = joblib.load('/content/drive/MyDrive/models/feature_names.pkl')

# Function to check for unseen labels
def check_label(encoder, label):
    label_lower = label.lower()
    if label_lower not in [cls.lower() for cls in encoder.classes_]:
        raise ValueError(f"Label '{label}' not found in encoder classes: {encoder.classes_}")
    return encoder.transform([label_lower])[0]

# Function to predict car price
def predict_car_price(make, model_name, mileage, transmission, fuel, power_ps, car_age, subtype):
    # Check and transform categorical inputs
    make_encoded = check_label(le_make, make)
    model_encoded = check_label(le_model, model_name)
    transmission_encoded = check_label(le_transmission, transmission)
    fuel_encoded = check_label(le_fuel, fuel)
    subtype_encoded = check_label(le_subtype, subtype)

    # Prepare the feature array with correct column names and order
    features = pd.DataFrame([[make_encoded, model_encoded, mileage, transmission_encoded, fuel_encoded, power_ps, car_age, subtype_encoded]], columns=feature_names)

    # Debug: Print the features to check correctness
    print("Features for prediction:")
    print(features)

    # Predict the price
    predicted_price = model.predict(features)[0]

    return predicted_price

# Example usage
try:
    make = 'mercedes-benz'  # example brand
    model_name = 'c-klasse'  # example model
    mileage = 80000  # example mileage (corrected unrealistic value)
    transmission = 'schaltgetriebe'  # example transmission
    fuel = 'hybrid'  # example fuel
    power_ps = 140  # example power in PS
    car_age = 10  # example car age
    subtype = 'c200'

    predicted_car_price = predict_car_price(make, model_name, mileage, transmission, fuel, power_ps, car_age, subtype)
    print(f'The predicted price of the car is: € {predicted_car_price:.2f}')
except ValueError as e:
    print(f'Error: {e}')


Error: Label 'c200' not found in encoder classes: ['' '1.0' '1.0mpi' '1.0tfsi' '1.0tsi' '1.1' '1.2' '1.2tdi' '1.2tsi' '1.4'
 '1.4ehybrid' '1.4mpi' '1.4tdi' '1.4tfsi' '1.4tgi' '1.4tsi' '1.4tsiact'
 '1.4tsihybrid' '1.5tfsi' '1.5tsi' '1.5tsiact' '1.6' '1.6fsi' '1.6tdi'
 '1.6tdicr' '1.6tdie' '1.7sdi' '1.8' '1.8tfsi' '1.8tsi' '1.9' '1.9i'
 '1.9sdi' '1.9tdi' '1.9tdie' '114d' '114i' '116d' '116i' '118d' '118i'
 '120d' '120i' '123d' '125d' '125i' '128ti' '130i' '135i' '2.0' '2.016v'
 '2.0bitdi' '2.0ecofuel' '2.0fsi' '2.0sdi' '2.0tdi' '2.0tdicleandiesel'
 '2.0tdicr' '2.0tfsi' '2.0tfsihybrid' '2.0tsi' '2.2' '2.2turbo' '2.3'
 '2.5tdi' '2.5tdidpf' '2.5tfsi' '2.5v6tdi' '2.7tdi' '2.7v6' '2.8v6'
 '2.9tfsi' '2.9vr6' '214dactivetourer' '216dactivetourer' '216dgrancoupe'
 '216dgrantourer' '216iactivetourer' '216igrantourer' '218d'
 '218dactivetourer' '218dgrancoupe' '218dgrantourer' '218i'
 '218iactivetourer' '218igrancoupe' '218igrantourer' '220d'
 '220dactivetourer' '220dgrancoupe' '220dgrantourer' '2