In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn import metrics

# Data Collection And Processing

In [2]:
#Loading the data from csv file to pandas datafrme
car_dataset = pd.read_csv("test.csv")

In [3]:
#inspecting the first 5 rows of the dataframe
car_dataset.head()

Unnamed: 0,Name,Location,Year,Kilometers_Driven,Fuel_Type,Transmission,Owner_Type,Mileage,Engine,Power,Seats,New_Price
0,Maruti Alto K10 LXI CNG,Delhi,2014,40929,CNG,Manual,First,32.26 km/kg,998 CC,58.2 bhp,4.0,
1,Maruti Alto 800 2016-2019 LXI,Coimbatore,2013,54493,Petrol,Manual,Second,24.7 kmpl,796 CC,47.3 bhp,5.0,
2,Toyota Innova Crysta Touring Sport 2.4 MT,Mumbai,2017,34000,Diesel,Manual,First,13.68 kmpl,2393 CC,147.8 bhp,7.0,25.27 Lakh
3,Toyota Etios Liva GD,Hyderabad,2012,139000,Diesel,Manual,First,23.59 kmpl,1364 CC,null bhp,5.0,
4,Hyundai i20 Magna,Mumbai,2014,29000,Petrol,Manual,First,18.5 kmpl,1197 CC,82.85 bhp,5.0,


In [4]:
#checking the number rows and columns
car_dataset.shape

(1234, 12)

In [5]:
#getting some information about the dataset
car_dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1234 entries, 0 to 1233
Data columns (total 12 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Name               1234 non-null   object 
 1   Location           1234 non-null   object 
 2   Year               1234 non-null   int64  
 3   Kilometers_Driven  1234 non-null   int64  
 4   Fuel_Type          1234 non-null   object 
 5   Transmission       1234 non-null   object 
 6   Owner_Type         1234 non-null   object 
 7   Mileage            1234 non-null   object 
 8   Engine             1224 non-null   object 
 9   Power              1224 non-null   object 
 10  Seats              1223 non-null   float64
 11  New_Price          182 non-null    object 
dtypes: float64(1), int64(2), object(9)
memory usage: 115.8+ KB


In [6]:
#checking the number of missing values
car_dataset.isnull().sum()

Name                    0
Location                0
Year                    0
Kilometers_Driven       0
Fuel_Type               0
Transmission            0
Owner_Type              0
Mileage                 0
Engine                 10
Power                  10
Seats                  11
New_Price            1052
dtype: int64

In [7]:
#checking the distribution of categorical data
print(car_dataset.Fuel_Type.value_counts())
print(car_dataset.Transmission.value_counts())
print(car_dataset.Owner_Type.value_counts())

Fuel_Type
Diesel    647
Petrol    579
CNG         6
LPG         2
Name: count, dtype: int64
Transmission
Manual       905
Automatic    329
Name: count, dtype: int64
Owner_Type
First             1023
Second             184
Third               24
Fourth & Above       3
Name: count, dtype: int64


# Encoding the categorical data

In [8]:
#encounding "Fuel_Type"Coulumn
car_dataset.replace({"Fuel_Type":{"Petrol":0,"Diesel":1,"CNG":2,"LPG":3}},inplace=True)

In [9]:
#encoding "Transmission"column
car_dataset.replace({"Transmission":{"Manual":0,"Automatic":1}},inplace=True)

In [10]:
#encoding "Owner_Type"column
car_dataset.replace({"Owner_Type":{"First":0,"Second":1,"Third":2,"Fourth & Above":3}},inplace=True)

In [11]:
car_dataset.head()

Unnamed: 0,Name,Location,Year,Kilometers_Driven,Fuel_Type,Transmission,Owner_Type,Mileage,Engine,Power,Seats,New_Price
0,Maruti Alto K10 LXI CNG,Delhi,2014,40929,2,0,0,32.26 km/kg,998 CC,58.2 bhp,4.0,
1,Maruti Alto 800 2016-2019 LXI,Coimbatore,2013,54493,0,0,1,24.7 kmpl,796 CC,47.3 bhp,5.0,
2,Toyota Innova Crysta Touring Sport 2.4 MT,Mumbai,2017,34000,1,0,0,13.68 kmpl,2393 CC,147.8 bhp,7.0,25.27 Lakh
3,Toyota Etios Liva GD,Hyderabad,2012,139000,1,0,0,23.59 kmpl,1364 CC,null bhp,5.0,
4,Hyundai i20 Magna,Mumbai,2014,29000,0,0,0,18.5 kmpl,1197 CC,82.85 bhp,5.0,


# Spilitting the data Training and Test data

In [12]:
x = car_dataset.drop(["Name","New_Price"],axis=1)
y = car_dataset["New_Price"]

In [13]:
print(x)


        Location  Year  Kilometers_Driven  Fuel_Type  Transmission  \
0          Delhi  2014              40929          2             0   
1     Coimbatore  2013              54493          0             0   
2         Mumbai  2017              34000          1             0   
3      Hyderabad  2012             139000          1             0   
4         Mumbai  2014              29000          0             0   
...          ...   ...                ...        ...           ...   
1229   Hyderabad  2011              89411          1             0   
1230      Mumbai  2015              59000          0             1   
1231     Kolkata  2012              28000          1             0   
1232        Pune  2013              52262          0             1   
1233       Kochi  2014              72443          1             1   

      Owner_Type      Mileage   Engine      Power  Seats  
0              0  32.26 km/kg   998 CC   58.2 bhp    4.0  
1              1    24.7 kmpl   796 CC   

In [14]:
print(y)

0              NaN
1              NaN
2       25.27 Lakh
3              NaN
4              NaN
           ...    
1229           NaN
1230           NaN
1231           NaN
1232           NaN
1233           NaN
Name: New_Price, Length: 1234, dtype: object


# Splitting Trainng and testing data

In [15]:
x_train,x_test,y_train,y_test = train_test_split(x, y, test_size = 0.1,random_state=2)


# Model Training

Linear Regression

In [20]:
#loding the linear regression model
from sklearn.linear_model import LinearRegression

# Instantiate the LinearRegression class
model = LinearRegression()

# Fit the model to your data
model.fit(x, y)

# Make predictions using the trained model
predictions = model.predict(X_test)

lin_reg_model = LinearRegression()

ValueError: could not convert string to float: 'Delhi'

In [17]:
#lin_reg_model.fit(str(x_train,y_train))

In [18]:
#Prediction on Training data
training_data_prediction = lin_reg_model(x_train)

TypeError: 'LinearRegression' object is not callable