In [None]:
## CAR PRICE PREDICTION PROJECT
# Importing necessary libraries 

import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

In [None]:
# loading the dataset using pandas 

path=r'c:\Users\santh\OneDrive\Documents\car data.csv'

In [3]:
df=pd.read_csv(path)

In [4]:
df

Unnamed: 0,Car_Name,Year,Selling_Price,Present_Price,Driven_kms,Fuel_Type,Selling_type,Transmission,Owner
0,ritz,2014,3.35,5.59,27000,Petrol,Dealer,Manual,0
1,sx4,2013,4.75,9.54,43000,Diesel,Dealer,Manual,0
2,ciaz,2017,7.25,9.85,6900,Petrol,Dealer,Manual,0
3,wagon r,2011,2.85,4.15,5200,Petrol,Dealer,Manual,0
4,swift,2014,4.60,6.87,42450,Diesel,Dealer,Manual,0
...,...,...,...,...,...,...,...,...,...
296,city,2016,9.50,11.60,33988,Diesel,Dealer,Manual,0
297,brio,2015,4.00,5.90,60000,Petrol,Dealer,Manual,0
298,city,2009,3.35,11.00,87934,Petrol,Dealer,Manual,0
299,city,2017,11.50,12.50,9000,Diesel,Dealer,Manual,0


In [None]:
# splitting the dataset into features and target variable

x=df[['Year','Present_Price','Driven_kms','Fuel_Type','Selling_type','Owner','Transmission']]
y=df[['Selling_Price']]

In [None]:
# managing categorical varaibles through one hot encoding to convert them into numerical variables
# ONE HOT ENCODING is a process of converting categorical variables into a form that could be provided to ML algorithms to do a better job in prediction

categorical_columns = ['Year','Driven_kms','Fuel_Type','Selling_type','Owner','Transmission']

ct = ColumnTransformer(transformers=[
    ('encoder', OneHotEncoder(drop='first', handle_unknown='ignore'), categorical_columns)
], remainder='passthrough')

x_encoded = ct.fit_transform(x)

In [None]:
# testing and training the dataset through the algorithm Linear regression by using sklearn

x_train,x_test,y_train,y_test=train_test_split(x_encoded,y,test_size=0.2,random_state=42)

In [None]:
model=LinearRegression()
model.fit(x_train,y_train)

In [20]:
y_pred=model.predict(x_test)

In [27]:
# evaluating the model by using metrics like mean squared error and r2 score


print(f'predcited car selling price is {y_pred[0][0]:,.2f}')
print(f'actual car price is {y_test.values}')
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))
print("R² Score:", r2_score(y_test,y_pred))

predcited car selling price is 4.63
actual car price is [[ 0.35]
 [10.11]
 [ 4.95]
 [ 0.15]
 [ 6.95]
 [ 7.45]
 [ 1.1 ]
 [ 0.5 ]
 [ 0.45]
 [ 6.  ]
 [ 9.25]
 [ 1.1 ]
 [ 7.5 ]
 [ 0.4 ]
 [ 6.15]
 [ 2.65]
 [ 0.95]
 [17.  ]
 [ 0.48]
 [ 1.7 ]
 [ 0.42]
 [ 8.35]
 [ 5.35]
 [ 3.  ]
 [ 0.6 ]
 [ 3.75]
 [ 5.  ]
 [ 2.65]
 [ 1.2 ]
 [ 1.2 ]
 [ 0.65]
 [ 8.25]
 [ 0.45]
 [ 1.95]
 [ 7.75]
 [ 3.65]
 [ 5.5 ]
 [ 3.8 ]
 [ 2.9 ]
 [ 4.75]
 [ 3.51]
 [ 1.5 ]
 [ 4.5 ]
 [ 0.42]
 [ 6.5 ]
 [ 0.6 ]
 [ 6.25]
 [ 5.25]
 [ 4.  ]
 [ 3.1 ]
 [ 5.11]
 [ 1.35]
 [23.5 ]
 [23.  ]
 [ 8.25]
 [10.9 ]
 [ 5.5 ]
 [ 9.5 ]
 [ 2.1 ]
 [ 7.4 ]
 [ 0.3 ]]
Mean Squared Error: 6.372332821098725
R² Score: 0.7233700401443759


In [None]:
# giving new data to the model to test the accuracy of the model
# steps wise because of categorical variables

# Step 1: Gather input
yearinput = int(input("Enter the year of the car: "))
presentpriceinput = float(input("Enter the present price of the car: "))
drivenkmsinput = float(input("Enter the driven kms of the car: "))
fueltypeinput = input("Enter the fuel type of the car: ")
sellingtypeinput = input("Enter the selling type of the car: ")
ownerinput = int(input("Enter the owner count of the car: "))
transmissioninput = input("Enter the transmission type of the car: ")

# Step 2: Make a DataFrame with same column names
newcar_df = pd.DataFrame([{
    'Year': yearinput,
    'Present_Price': presentpriceinput,
    'Driven_kms': drivenkmsinput,
    'Fuel_Type': fueltypeinput,
    'Selling_type': sellingtypeinput,
    'Owner': ownerinput,
    'Transmission': transmissioninput
}])

In [24]:
# Step 3: Transform new data using the fitted ColumnTransformer
newcar_encoded = ct.transform(newcar_df)



In [25]:
# Step 4: Predict the price
predicted_price = model.predict(newcar_encoded)
print(f"Predicted Selling Price: ₹ {predicted_price[0][0]:,.2f}")


Predicted Selling Price: ₹ 9.58
