<a href="https://colab.research.google.com/github/shashaankreddy/deeplearning-project/blob/main/project_deep_learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
import tensorflow as tf
from sklearn.metrics import mean_squared_error


In [2]:
url = 'https://raw.githubusercontent.com/shashaankreddy/deeplearning-project/main/car%20data.csv'
data = pd.read_csv(url)

In [3]:
data.head()

Unnamed: 0,Car_Name,Year,Selling_Price,Present_Price,Kms_Driven,Fuel_Type,Seller_Type,Transmission,Owner
0,ritz,2014,3.35,5.59,27000,Petrol,Dealer,Manual,0
1,sx4,2013,4.75,9.54,43000,Diesel,Dealer,Manual,0
2,ciaz,2017,7.25,9.85,6900,Petrol,Dealer,Manual,0
3,wagon r,2011,2.85,4.15,5200,Petrol,Dealer,Manual,0
4,swift,2014,4.6,6.87,42450,Diesel,Dealer,Manual,0


In [4]:
data.shape

(301, 9)

In [5]:
#information of dataset
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 301 entries, 0 to 300
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Car_Name       301 non-null    object 
 1   Year           301 non-null    int64  
 2   Selling_Price  301 non-null    float64
 3   Present_Price  301 non-null    float64
 4   Kms_Driven     301 non-null    int64  
 5   Fuel_Type      301 non-null    object 
 6   Seller_Type    301 non-null    object 
 7   Transmission   301 non-null    object 
 8   Owner          301 non-null    int64  
dtypes: float64(2), int64(3), object(4)
memory usage: 21.3+ KB


In [6]:
#statostical information
data.describe()

Unnamed: 0,Year,Selling_Price,Present_Price,Kms_Driven,Owner
count,301.0,301.0,301.0,301.0,301.0
mean,2013.627907,4.661296,7.628472,36947.20598,0.043189
std,2.891554,5.082812,8.644115,38886.883882,0.247915
min,2003.0,0.1,0.32,500.0,0.0
25%,2012.0,0.9,1.2,15000.0,0.0
50%,2014.0,3.6,6.4,32000.0,0.0
75%,2016.0,6.0,9.9,48767.0,0.0
max,2018.0,35.0,92.6,500000.0,3.0


In [7]:
#missing values
data.isnull().sum()

Car_Name         0
Year             0
Selling_Price    0
Present_Price    0
Kms_Driven       0
Fuel_Type        0
Seller_Type      0
Transmission     0
Owner            0
dtype: int64

In [8]:
#checking the distribution of categorical data
print(data.Fuel_Type.value_counts())

Petrol    239
Diesel     60
CNG         2
Name: Fuel_Type, dtype: int64


In [9]:
print(data.Seller_Type.value_counts())

Dealer        195
Individual    106
Name: Seller_Type, dtype: int64


In [10]:
print(data.Transmission.value_counts())



Manual       261
Automatic     40
Name: Transmission, dtype: int64


Encoding a categorical data


In [11]:
#encoding "Fuel type " column 
data.replace({"Fuel_Type":{"Petrol":0,"Diesel":1,"CNG":2}},inplace=True)
data.replace({"Seller_Type":{"Dealer":0,"Individual":1}},inplace=True)
data.replace({"Transmission":{"Manual":0,"Automatic":1}},inplace=True)

In [12]:
data.head()

Unnamed: 0,Car_Name,Year,Selling_Price,Present_Price,Kms_Driven,Fuel_Type,Seller_Type,Transmission,Owner
0,ritz,2014,3.35,5.59,27000,0,0,0,0
1,sx4,2013,4.75,9.54,43000,1,0,0,0
2,ciaz,2017,7.25,9.85,6900,0,0,0,0
3,wagon r,2011,2.85,4.15,5200,0,0,0,0
4,swift,2014,4.6,6.87,42450,1,0,0,0


Training and test data

In [13]:
X=data.drop(["Car_Name","Selling_Price"],axis=1)
Y=data["Selling_Price"]

Split the dataset into training and testing set

In [14]:
X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.2, random_state= 42)

Define a baseline  model(Linear Regression)

In [15]:
baseline_model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(1,input_shape=(X_train.shape[1],))
    
])

Compile the  baseline model

In [16]:
baseline_model.compile(optimizer="adam",loss="mse")

Train the baseline model

In [17]:
baseline_model.fit(X_train,Y_train,epochs=50,batch_size=32,verbose=0)

<keras.callbacks.History at 0x7fe1c8717610>

Evalute the baseline model on the test set

In [18]:
baseline_predictions = baseline_model.predict(X_test)



In [19]:
baseline_mse = mean_squared_error(Y_test,baseline_predictions)
print("Baseline MSE:" , baseline_mse)

Baseline MSE: 439685.4146751348


Define the more sophisticated model( a neural network)

In [20]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(64, activation = "relu", input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(64,activation = "relu"),
    tf.keras.layers.Dense(1)
])

compile the model

In [21]:
model.compile(optimizer = "adam",loss ="mse")

Train the model

In [22]:
model.fit(X_train , Y_train , epochs = 50 , batch_size = 32 , verbose = 0)

<keras.callbacks.History at 0x7fe1ca927310>

Evaluate the model on test data

In [23]:
predictions = model.predict(X_test)
mse = mean_squared_error(Y_test,predictions)
print("Model MSE",mse)

Model MSE 103.71011841352924
