In [34]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

cars_data = pd.read_csv('Cardetails.csv')

cars_data.drop(columns=['torque'], inplace=True)
cars_data.dropna(inplace=True)
cars_data.drop_duplicates(inplace=True)

def get_brand_name(car_name):
    return car_name.split(' ')[0].strip()

def clean_data(value):
    value = value.split(' ')[0].strip()
    return float(value) if value else 0

cars_data['name'] = cars_data['name'].apply(get_brand_name)
cars_data['mileage'] = cars_data['mileage'].apply(clean_data)
cars_data['max_power'] = cars_data['max_power'].apply(clean_data)
cars_data['engine'] = cars_data['engine'].apply(clean_data)

cars_data['name'].replace(['Maruti', 'Skoda', 'Honda', 'Hyundai', 'Toyota', 'Ford', 'Renault',
                           'Mahindra', 'Tata', 'Chevrolet', 'Datsun', 'Jeep', 'Mercedes-Benz',
                           'Mitsubishi', 'Audi', 'Volkswagen', 'BMW', 'Nissan', 'Lexus',
                           'Jaguar', 'Land', 'MG', 'Volvo', 'Daewoo', 'Kia', 'Fiat', 'Force',
                           'Ambassador', 'Ashok', 'Isuzu', 'Opel'],
                          list(range(1, 32)), inplace=True)

cars_data['transmission'].replace(['Manual', 'Automatic'], [1, 2], inplace=True)
cars_data['seller_type'].replace(['Individual', 'Dealer', 'Trustmark Dealer'], [1, 2, 3], inplace=True)
cars_data['fuel'].replace(['Diesel', 'Petrol', 'LPG', 'CNG'], [1, 2, 3, 4], inplace=True)
cars_data['owner'].replace(['First Owner', 'Second Owner', 'Third Owner',
                            'Fourth & Above Owner', 'Test Drive Car'],
                           [1, 2, 3, 4, 5], inplace=True)

input_data = cars_data.drop(columns=['selling_price'])
output_data = cars_data['selling_price']

x_train, x_test, y_train, y_test = train_test_split(input_data, output_data, test_size=0.2)

model = LinearRegression()
model.fit(x_train, y_train)
predict = model.predict(x_test)
print(predict)

print(x_train.head(1))

input_data_model = pd.DataFrame(
    [[5, 2022, 12000, 1, 1, 1, 1, 12.99, 2494.0, 100.6, 5.0]],
    columns=['name', 'year', 'km_driven', 'fuel', 'seller_type', 'transmission', 'owner', 'mileage', 'engine', 'max_power', 'seats']
)

car_price = model.predict(input_data_model)
print(f"Predicted car price: {car_price[0]}")

[ -45089.03299391 1032500.50242306  747049.4903118  ...  186162.92062052
  162893.87075676  944049.01334696]
     name  year  km_driven  fuel  seller_type  transmission  owner  mileage  \
613     1  2017     100000     1            1             1      2    24.52   

     engine  max_power  seats  
613  1248.0       88.5    7.0  
Predicted car price: 1088947.5025094897


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  cars_data['name'].replace(['Maruti', 'Skoda', 'Honda', 'Hyundai', 'Toyota', 'Ford', 'Renault',
  cars_data['name'].replace(['Maruti', 'Skoda', 'Honda', 'Hyundai', 'Toyota', 'Ford', 'Renault',
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  cars_data['transmission'].replace(['Manu