1. Import required libraries

In [15]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

2. Load the dataset

In [16]:
df=pd.read_csv('car+data.csv')
df.head()

Unnamed: 0,Car_Name,Year,Selling_Price,Present_Price,Kms_Driven,Fuel_Type,Seller_Type,Transmission,Owner
0,ritz,2014,3.35,5.59,27000,Petrol,Dealer,Manual,0
1,sx4,2013,4.75,9.54,43000,Diesel,Dealer,Manual,0
2,ciaz,2017,7.25,9.85,6900,Petrol,Dealer,Manual,0
3,wagon r,2011,2.85,4.15,5200,Petrol,Dealer,Manual,0
4,swift,2014,4.6,6.87,42450,Diesel,Dealer,Manual,0


3. Check the shape and basic information of the dataset.

In [17]:
df.shape

(301, 9)

In [18]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 301 entries, 0 to 300
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Car_Name       301 non-null    object 
 1   Year           301 non-null    int64  
 2   Selling_Price  301 non-null    float64
 3   Present_Price  301 non-null    float64
 4   Kms_Driven     301 non-null    int64  
 5   Fuel_Type      301 non-null    object 
 6   Seller_Type    301 non-null    object 
 7   Transmission   301 non-null    object 
 8   Owner          301 non-null    int64  
dtypes: float64(2), int64(3), object(4)
memory usage: 21.3+ KB


4. Check for the presence of the duplicate records in the dataset? If present drop them

In [19]:
len(df[df.duplicated()])

2

In [20]:
df.drop_duplicates(inplace=True)

In [21]:
len(df[df.duplicated()])

0

5. Drop the columns which you think redundant for the analysis.

In [22]:
df.columns

Index(['Car_Name', 'Year', 'Selling_Price', 'Present_Price', 'Kms_Driven',
       'Fuel_Type', 'Seller_Type', 'Transmission', 'Owner'],
      dtype='object')

In [24]:
df=df.drop( ['Seller_Type', 'Transmission', 'Owner'],axis=1 )

In [25]:
df.columns

Index(['Car_Name', 'Year', 'Selling_Price', 'Present_Price', 'Kms_Driven',
       'Fuel_Type'],
      dtype='object')

 6. Extract a new feature called 'age_of_the_car' from the feature 'year' and drop the feature year

In [27]:
from datetime import datetime

current_year = datetime.now().year
df['age_of_the_car'] = current_year - df['Year']
df = df.drop('Year', axis=1)

In [28]:
df.columns

Index(['Car_Name', 'Selling_Price', 'Present_Price', 'Kms_Driven', 'Fuel_Type',
       'age_of_the_car'],
      dtype='object')

7. Encode the categorical columns

In [33]:
df=pd.get_dummies(df,drop_first=True)
df.sample(5)

Unnamed: 0,Selling_Price,Present_Price,Kms_Driven,age_of_the_car,Car_Name_Activa 3g,Car_Name_Activa 4g,Car_Name_Bajaj ct 100,Car_Name_Bajaj Avenger 150,Car_Name_Bajaj Avenger 150 street,Car_Name_Bajaj Avenger 220,...,Car_Name_ritz,Car_Name_s cross,Car_Name_swift,Car_Name_sx4,Car_Name_verna,Car_Name_vitara brezza,Car_Name_wagon r,Car_Name_xcent,Fuel_Type_Diesel,Fuel_Type_Petrol
157,0.48,0.84,23000,9,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,True
296,9.5,11.6,33988,8,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,True,False
10,2.85,3.6,2135,7,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,True
122,1.05,1.17,6000,8,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,True
135,0.65,0.74,5000,9,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,True


In [None]:
 8. Separate the target and independent features.

In [34]:
X = df.drop('Selling_Price',axis=1)
y = df['Selling_Price']

9. Split the data into train and test.

In [36]:

# Assuming 'X' contains the features and 'y' contains the target variable
X = df[['Selling_Price', 'Present_Price', 'Kms_Driven']]  # Replace with your actual feature columns
y = df['Selling_Price']  # Replace with your actual target variable

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# The 'test_size' parameter specifies the proportion of the dataset to include in the test split
# The 'random_state' parameter sets the random seed for reproducibility

# Print the shapes of the resulting datasets
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

X_train shape: (239, 3)
X_test shape: (60, 3)
y_train shape: (239,)
y_test shape: (60,)


 10. Build a Random forest Regressor model and check the r2-score for train and test.

In [38]:

from sklearn.ensemble import RandomForestRegressor
# Create a Random Forest Regressor model
rf_regressor = RandomForestRegressor(n_estimators=100, random_state=42)  # You can adjust the number of estimators as needed

# Train the model using the training sets
rf_regressor.fit(X_train, y_train)

# Make predictions on the training set
y_train_pred = rf_regressor.predict(X_train)

# Calculate the R2 score for the training set
r2_train = r2_score(y_train, y_train_pred)
print("R2 score for training set:", r2_train)

# Make predictions on the test set
y_test_pred = rf_regressor.predict(X_test)

# Calculate the R2 score for the test set
r2_test = r2_score(y_test, y_test_pred)
print("R2 score for test set:", r2_test)

R2 score for training set: 0.9974916978287267
R2 score for test set: 0.9755011707201641


11. Create a pickle file with an extension as .pkl

In [58]:
import pickle
# Specify the file name for the pickle file
file_name = 'random_forest_model.pkl'

# Open the file in binary write mode and serialize the model using pickle
with open(file_name, 'wb') as file:
    pickle.dump(rf_regressor, file)

print("Pickle file created: ", file_name)

Pickle file created:  random_forest_model.pkl


12. Create new folder/new project in visual studio/pycharm that should contain the "model.pkl" file *make sure you are using a virutal environment and install required packages.*

a) Create a basic HTML form for the frontend

Create a file **index.html** in the templates folder and copy the following code.

In [None]:
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Document</title>
</head>
<body>
<div class="hero-image">
      <div class="hero-text">

        <h1 style="font-size:50px">Used Car Price Predictor</h1>
         <br><br><h3>{{ prediction_text }}<h3>
      </div>
    </div>

     <style>

        body, html {
          height: 100%;
          margin: 0;
          font-family: Arial, Helvetica, sans-serif;
        }

        .hero-image {
          background-image: linear-gradient(rgba(0, 0, 0, 0.5), rgba(0, 0, 0, 0.5)), url('/static/image.jpg');
          height: 50%;
          background-position: bottom;
          background-repeat: no-repeat;
          background-size: cover;
          position: relative;
        }

        .hero-text {
          text-align: center;
           position: absolute;
          top: 50%;
          left: 50%;
          transform: translate(-50%, -50%);
          color: white;
        }

    </style>


    <div style="color:	rgb(0, 0, 0)">
        <form action="{{ url_for('predict')}}" method="post">
            <h2>Enter Car Details: </h2>
            <h3>Age of the car(In years)</h3>
            <input id="first" name="Age_of_the_car" type="number ">
            <h3>Present Showroom Price(In lakhs)</h3><br><input id="second" name="Present_Price" required="required">
            <h3>Kilometers Driven</h3><input id="third" name="Kms_Driven" required="required">
            <h3>Owner Type (0/1/3)</h3><br><input id="fourth" name="Owner" required="required">
            <h3>Fuel type</h3><br><select name="Fuel_Type" id="fuel" required="required">
                <option value="0">Petrol</option>
                <option value="1">Diesel</option>
                <option value="2">CNG</option>
            </select>
            <h3>Seller Type</h3><br><select name="Seller_Type" id="resea" required="required">
                <option value="0">Dealer</option>
                <option value="1">Individual</option>
            </select>
            <h3>Transmission type</h3><br><select name="Transmission" id="research" required="required">
                <option value="0">Manual Car</option>
                <option value="1">Automatic Car</option>
            </select>
            <br><br><button id="sub" type="submit ">Predict Selling Price</button>
            <br>


        </form>

    </div>

    <style>
	body {
            background-color: 101, 10, 20;
            text-align: center;
            padding: 0px;
	    font-family: Helvetica;
        }

        #research {
            font-size: 18px;
            width: 200px;
            height: 23px;
            top: 23px;
        }
        #box {
            border-radius: 60px;
            border-color: 45px;
            border-style: solid;
            text-align: center;
            background-color: white;
            font-size: medium;
            position: absolute;
            width: 700px;
            bottom: 9%;
            height: 850px;
            right: 30%;
            padding: 0px;
            margin: 0px;
            font-size: 14px;
        }

        #fuel {
            width: 83px;
            height: 43px;
            text-align: center;
            border-radius: 14px;
            font-size: 20px;
        }

        #fuel:hover {
            background-color: white;
        }
        #research {
            width: 150px;
            height: 43px;
            text-align: center;
            border-radius: 14px;
            font-size: 18px;
        }

        #research:hover {
            background-color: white;
        }

        #resea {
            width: 99px;
            height: 43px;
            text-align: center;
            border-radius: 14px;
            font-size: 18px;
        }

        #resea:hover {
            background-color: white;
        }

        #sub {
            background-color: Green;
            font-family:'Helvetica' monospace;
            font-weight: bold;
            width: 180px;
            height: 60px;
            text-align: center;
            border-radius: 20px;
            font-size: 18px;
            color: white;
        }

        #sub:hover {
            background-color: white;
        }

        #first {
            border-radius: 14px;
            height: 25px;
            font-size: 20px;
            text-align: center;
        }
#second {
            border-radius: 14px;
            height: 25px;
            font-size: 20px;
            text-align: center;
        }

        #third {
            border-radius: 14px;
            height: 25px;
            font-size: 20px;
            text-align: center;
        }

        #fourth {
            border-radius: 14px;
            height: 25px;
            font-size: 20px;
            text-align: center;
        }
    </style>
</body>
</html>

b) Create app.py file and write the predict function

In [59]:
from flask import Flask, render_template, request, jsonify
import pickle
import numpy as np
import sklearn


app = Flask(__name__)
model = pickle.load(open('random_forest_model.pkl', 'rb'))


@app.route('/',methods=['GET'])
def Home():
    return render_template('index.html')


@app.route("/predict", methods=['POST'])
def predict():
    if request.method == 'POST':
        Present_Price=float(request.form['Present_Price'])
        Kms_Driven=int(request.form['Kms_Driven'])
        Owner=int(request.form['Owner'])
        Fuel_Type=request.form['Fuel_Type']
        Age_of_the_car=request.form['Age_of_the_car']
        Seller_Type=request.form['Seller_Type']
        Transmission=request.form['Transmission']

        prediction=model.predict([[Present_Price,Kms_Driven,Owner,Age_of_the_car,Fuel_Type,Seller_Type,Transmission]])
        output=round(prediction[0],2)
        return render_template('index.html', prediction_text="You can sell your car at {} lakhs".format(output))

        if __name__ == "__main__":
            app.run(debug=True)

 13. Run the app.py python file which will render to index html page then enter the input values and get the prediction.

In [53]:
import os
file_path = 'random_forest_model.pkl'
if os.path.exists(file_path):
    print(f"The file {file_path} exists.")
else:
    print(f"The file {file_path} does not exist.")

The file random_forest_model.pkl exists.


In [54]:
import sklearn
print(sklearn.__version__)

1.3.0
