# Import Necessary Libraries

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer


# Load Dataset

In [2]:
data =  pd.read_csv("car data.csv")
# Display the first few rows of the dataframe
print(data.head())


  Car_Name  Year  Selling_Price  Present_Price  Kms_Driven Fuel_Type  \
0     ritz  2014           3.35           5.59       27000    Petrol   
1      sx4  2013           4.75           9.54       43000    Diesel   
2     ciaz  2017           7.25           9.85        6900    Petrol   
3  wagon r  2011           2.85           4.15        5200    Petrol   
4    swift  2014           4.60           6.87       42450    Diesel   

  Seller_Type Transmission  Owner  
0      Dealer       Manual      0  
1      Dealer       Manual      0  
2      Dealer       Manual      0  
3      Dealer       Manual      0  
4      Dealer       Manual      0  


In [10]:
data.shape

(301, 9)

In [12]:
data.dtypes

Car_Name          object
Year               int64
Selling_Price    float64
Present_Price    float64
Kms_Driven         int64
Fuel_Type         object
Seller_Type       object
Transmission      object
Owner              int64
dtype: object

# Preprocess the Data

In [3]:
# Defining the features and target variable
X = data.drop(['Car_Name', 'Selling_Price'], axis=1)  # Features (dropping 'Car_Name' as it's likely not useful for prediction in its raw form)
y = data['Selling_Price']  # Target variable

# One-hot encoding categorical features
categorical_features = ['Fuel_Type', 'Seller_Type', 'Transmission']
one_hot = OneHotEncoder()
transformer = ColumnTransformer([("one_hot", 
                                   one_hot, 
                                   categorical_features)],
                                 remainder="passthrough")

X_transformed = transformer.fit_transform(X)


## Split the Data into Training and Test Sets

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X_transformed, y, test_size=0.2, random_state=42)


## Train the Model

In [5]:
# Initialize the model
model = LinearRegression()

# Fit the model
model.fit(X_train, y_train)


## Make Predictions and Evaluate the Model

In [6]:
# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R^2 Score: {r2}")


Mean Squared Error: 3.4788039706425584
R^2 Score: 0.8489813024897045


## example

In [7]:
# New car details
new_car = pd.DataFrame({
    'Year': [2015],
    'Present_Price': [6.40],
    'Kms_Driven': [20000],
    'Fuel_Type': ['Petrol'],
    'Seller_Type': ['Individual'],
    'Transmission': ['Manual'],
    'Owner': [0]
})

# Applying the same transformation to the new data
new_car_transformed = transformer.transform(new_car)

print(new_car_transformed)


[[0.000e+00 0.000e+00 1.000e+00 0.000e+00 1.000e+00 0.000e+00 1.000e+00
  2.015e+03 6.400e+00 2.000e+04 0.000e+00]]


In [8]:
# Making a prediction
predicted_price = model.predict(new_car_transformed)

print(f"The predicted selling price of the car is: {predicted_price[0]:.2f} k USD")


The predicted selling price of the car is: 3.49 k USD
