In [None]:
import pandas as pd

url = '/content/sample_data/car.csv'
data = pd.read_csv(url)

print(data.head())

print(data.describe())

print(data.info())


  Car_Name  Year  Selling_Price  Present_Price  Kms_Driven Fuel_Type  \
0     ritz  2014           3.35           5.59       27000    Petrol   
1      sx4  2013           4.75           9.54       43000    Diesel   
2     ciaz  2017           7.25           9.85        6900    Petrol   
3  wagon r  2011           2.85           4.15        5200    Petrol   
4    swift  2014           4.60           6.87       42450    Diesel   

  Seller_Type Transmission  Owner  
0      Dealer       Manual      0  
1      Dealer       Manual      0  
2      Dealer       Manual      0  
3      Dealer       Manual      0  
4      Dealer       Manual      0  
              Year  Selling_Price  Present_Price     Kms_Driven       Owner
count   301.000000     301.000000     301.000000     301.000000  301.000000
mean   2013.627907       4.661296       7.628472   36947.205980    0.043189
std       2.891554       5.082812       8.644115   38886.883882    0.247915
min    2003.000000       0.100000       0.32000

In [None]:

print(data.isnull().sum())

data = data.dropna()

data = pd.get_dummies(data, columns=['Fuel_Type', 'Seller_Type', 'Transmission'])

print(data.head())


Car_Name         0
Year             0
Selling_Price    0
Present_Price    0
Kms_Driven       0
Fuel_Type        0
Seller_Type      0
Transmission     0
Owner            0
dtype: int64
  Car_Name  Year  Selling_Price  Present_Price  Kms_Driven  Owner  \
0     ritz  2014           3.35           5.59       27000      0   
1      sx4  2013           4.75           9.54       43000      0   
2     ciaz  2017           7.25           9.85        6900      0   
3  wagon r  2011           2.85           4.15        5200      0   
4    swift  2014           4.60           6.87       42450      0   

   Fuel_Type_CNG  Fuel_Type_Diesel  Fuel_Type_Petrol  Seller_Type_Dealer  \
0          False             False              True                True   
1          False              True             False                True   
2          False             False              True                True   
3          False             False              True                True   
4          False     

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import joblib


url = '/content/sample_data/car.csv'
data = pd.read_csv(url)


print(data.head())

print(data.isnull().sum())

data = data.dropna()

data['Car_Age'] = 2024 - data['Year']


data = data.drop(['Year', 'Car_Name'], axis=1)

data = pd.get_dummies(data, columns=['Fuel_Type', 'Seller_Type', 'Transmission'], drop_first=True)

target = 'Selling_Price'

features = data.columns.tolist()
features.remove(target)

X = data[features]
y = data[target]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestRegressor(n_estimators=100, random_state=42)

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

r2 = r2_score(y_test, y_pred)
print(f'R-squared Score: {r2}')

with open('car_price_prediction_model.pkl', 'wb') as f:
    joblib.dump((model, features), f)


  Car_Name  Year  Selling_Price  Present_Price  Kms_Driven Fuel_Type  \
0     ritz  2014           3.35           5.59       27000    Petrol   
1      sx4  2013           4.75           9.54       43000    Diesel   
2     ciaz  2017           7.25           9.85        6900    Petrol   
3  wagon r  2011           2.85           4.15        5200    Petrol   
4    swift  2014           4.60           6.87       42450    Diesel   

  Seller_Type Transmission  Owner  
0      Dealer       Manual      0  
1      Dealer       Manual      0  
2      Dealer       Manual      0  
3      Dealer       Manual      0  
4      Dealer       Manual      0  
Car_Name         0
Year             0
Selling_Price    0
Present_Price    0
Kms_Driven       0
Fuel_Type        0
Seller_Type      0
Transmission     0
Owner            0
dtype: int64
Mean Squared Error: 0.9215642422950816
R-squared Score: 0.9599938850484411


In [None]:
import joblib
import pandas as pd

with open('car_price_prediction_model.pkl', 'rb') as f:
    model, features = joblib.load(f)

new_car_data = pd.DataFrame({
    'Car_Age': [2],
    'Present_Price': [8.5],
    'Kms_Driven': [5000],
    'Owner': [1],
    'Fuel_Type_Diesel': [0],
    'Fuel_Type_Petrol': [1],
    'Seller_Type_Individual': [0],
    'Transmission_Manual': [1]
})

for col in features:
    if col not in new_car_data.columns:
        new_car_data[col] = 0

new_car_data = new_car_data[features]

predicted_price = model.predict(new_car_data)

print(f'Predicted Selling Price: {predicted_price[0]} lakhs')


Predicted Selling Price: 7.006999999999994 lakhs
