In [1]:
import pandas as pd

In [2]:
import numpy as np

In [3]:
import statsmodels.api as sm

In [4]:
import seaborn
seaborn.set()

In [5]:
data = pd.read_csv("CarPrice_Assignment.csv")

In [6]:
data.head()

Unnamed: 0,car_ID,symboling,CarName,fueltype,aspiration,doornumber,carbody,drivewheel,enginelocation,wheelbase,...,enginesize,fuelsystem,boreratio,stroke,compressionratio,horsepower,peakrpm,citympg,highwaympg,price
0,1,3,alfa-romero giulia,gas,std,two,convertible,rwd,front,88.6,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,13495.0
1,2,3,alfa-romero stelvio,gas,std,two,convertible,rwd,front,88.6,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,16500.0
2,3,1,alfa-romero Quadrifoglio,gas,std,two,hatchback,rwd,front,94.5,...,152,mpfi,2.68,3.47,9.0,154,5000,19,26,16500.0
3,4,2,audi 100 ls,gas,std,four,sedan,fwd,front,99.8,...,109,mpfi,3.19,3.4,10.0,102,5500,24,30,13950.0
4,5,2,audi 100ls,gas,std,four,sedan,4wd,front,99.4,...,136,mpfi,3.19,3.4,8.0,115,5500,18,22,17450.0


## Knowing the data

In [7]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 205 entries, 0 to 204
Data columns (total 26 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   car_ID            205 non-null    int64  
 1   symboling         205 non-null    int64  
 2   CarName           205 non-null    object 
 3   fueltype          205 non-null    object 
 4   aspiration        205 non-null    object 
 5   doornumber        205 non-null    object 
 6   carbody           205 non-null    object 
 7   drivewheel        205 non-null    object 
 8   enginelocation    205 non-null    object 
 9   wheelbase         205 non-null    float64
 10  carlength         205 non-null    float64
 11  carwidth          205 non-null    float64
 12  carheight         205 non-null    float64
 13  curbweight        205 non-null    int64  
 14  enginetype        205 non-null    object 
 15  cylindernumber    205 non-null    object 
 16  enginesize        205 non-null    int64  
 1

## Removing ir-relavent fetures

In [8]:
data.drop(['car_ID', 'symboling', 'CarName'], axis=1, inplace=True)

Checking for null values

In [9]:
data.isnull().sum()

fueltype            0
aspiration          0
doornumber          0
carbody             0
drivewheel          0
enginelocation      0
wheelbase           0
carlength           0
carwidth            0
carheight           0
curbweight          0
enginetype          0
cylindernumber      0
enginesize          0
fuelsystem          0
boreratio           0
stroke              0
compressionratio    0
horsepower          0
peakrpm             0
citympg             0
highwaympg          0
price               0
dtype: int64

## Converting categorical values to numeric values

In [10]:
data.select_dtypes(object).head()

Unnamed: 0,fueltype,aspiration,doornumber,carbody,drivewheel,enginelocation,enginetype,cylindernumber,fuelsystem
0,gas,std,two,convertible,rwd,front,dohc,four,mpfi
1,gas,std,two,convertible,rwd,front,dohc,four,mpfi
2,gas,std,two,hatchback,rwd,front,ohcv,six,mpfi
3,gas,std,four,sedan,fwd,front,ohc,four,mpfi
4,gas,std,four,sedan,4wd,front,ohc,five,mpfi


In [11]:
#data['fueltype'].unique()
#data['aspiration'].unique()
#data['doornumber'].unique()
#data['carbody'].unique()
#data['drivewheel'].unique()
#data['enginelocation'].unique()
#data['enginetype'].unique()
#data['cylindernumber'].unique()
data['fuelsystem'].unique()

array(['mpfi', '2bbl', 'mfi', '1bbl', 'spfi', '4bbl', 'idi', 'spdi'],
      dtype=object)

In [12]:
fueltype_mapping = {'gas':0, 'diesel':1}
aspiration_mapping = {'std':0, 'turbo':1}
doornumber_mapping = {'two':0, 'four':1}
carbody_mapping = {'convertible':0, 'hatchback':1, 'sedan':2, 'wagon':3, 'hardtop':4}
drivewheel_mapping = {'rwd':0, 'fwd':1, '4wd':2}
enginelocation_mapping = {'front':0, 'rear':1}
enginetype_mapping = {'dohc':0, 'ohcv':1, 'ohc':2, 'l':3, 'rotor':4, 'ohcf':5, 'dohcv':6}
cylindernumber_mapping = {'four':4, 'six':6, 'five':5, 'three':3, 'twelve':12, 'two':2, 'eight':8}
fuelsystem_mapping = {'mpfi':0, '2bbl':1, 'mfi':2, '1bbl':3, 'spfi':4, '4bbl':5, 'idi':6, 'spdi':7}

In [13]:
data['fueltype'] = data['fueltype'].map(fueltype_mapping)
data['aspiration'] = data['aspiration'].map(aspiration_mapping)
data['doornumber'] = data['doornumber'].map(doornumber_mapping)
data['carbody'] = data['carbody'].map(carbody_mapping)
data['drivewheel'] = data['drivewheel'].map(drivewheel_mapping)
data['enginelocation'] = data['enginelocation'].map(enginelocation_mapping)
data['enginetype'] = data['enginetype'].map(enginetype_mapping)
data['cylindernumber'] = data['cylindernumber'].map(cylindernumber_mapping)
data['fuelsystem'] = data['fuelsystem'].map(fuelsystem_mapping)

In [14]:
data.head()

Unnamed: 0,fueltype,aspiration,doornumber,carbody,drivewheel,enginelocation,wheelbase,carlength,carwidth,carheight,...,enginesize,fuelsystem,boreratio,stroke,compressionratio,horsepower,peakrpm,citympg,highwaympg,price
0,0,0,0,0,0,0,88.6,168.8,64.1,48.8,...,130,0,3.47,2.68,9.0,111,5000,21,27,13495.0
1,0,0,0,0,0,0,88.6,168.8,64.1,48.8,...,130,0,3.47,2.68,9.0,111,5000,21,27,16500.0
2,0,0,0,1,0,0,94.5,171.2,65.5,52.4,...,152,0,2.68,3.47,9.0,154,5000,19,26,16500.0
3,0,0,1,2,1,0,99.8,176.6,66.2,54.3,...,109,0,3.19,3.4,10.0,102,5500,24,30,13950.0
4,0,0,1,2,2,0,99.4,176.6,66.4,54.3,...,136,0,3.19,3.4,8.0,115,5500,18,22,17450.0


In [15]:
x1= data[['fueltype']]
y = data[['price']]
x = sm.add_constant(x1)
results = sm.OLS(y,x).fit()
results.summary()

0,1,2,3
Dep. Variable:,price,R-squared:,0.011
Model:,OLS,Adj. R-squared:,0.006
Method:,Least Squares,F-statistic:,2.293
Date:,"Thu, 28 Jan 2021",Prob (F-statistic):,0.132
Time:,15:13:22,Log-Likelihood:,-2131.3
No. Observations:,205,AIC:,4267.0
Df Residuals:,203,BIC:,4273.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,1.3e+04,585.500,22.203,0.000,1.18e+04,1.42e+04
fueltype,2838.3518,1874.514,1.514,0.132,-857.663,6534.367

0,1,2,3
Omnibus:,80.317,Durbin-Watson:,0.462
Prob(Omnibus):,0.0,Jarque-Bera (JB):,197.82
Skew:,1.801,Prob(JB):,1.11e-43
Kurtosis:,6.192,Cond. No.,3.41


In [16]:
x1= data[['aspiration']]
y = data[['price']]
x = sm.add_constant(x1)
results = sm.OLS(y,x).fit()
results.summary()

0,1,2,3
Dep. Variable:,price,R-squared:,0.032
Model:,OLS,Adj. R-squared:,0.027
Method:,Least Squares,F-statistic:,6.637
Date:,"Thu, 28 Jan 2021",Prob (F-statistic):,0.0107
Time:,15:13:22,Log-Likelihood:,-2129.2
No. Observations:,205,AIC:,4262.0
Df Residuals:,203,BIC:,4269.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,1.261e+04,608.011,20.742,0.000,1.14e+04,1.38e+04
aspiration,3686.8958,1431.157,2.576,0.011,865.056,6508.735

0,1,2,3
Omnibus:,89.492,Durbin-Watson:,0.46
Prob(Omnibus):,0.0,Jarque-Bera (JB):,251.018
Skew:,1.947,Prob(JB):,3.11e-55
Kurtosis:,6.772,Cond. No.,2.7


In [17]:
x1= data[['doornumber']]
y = data[['price']]
x = sm.add_constant(x1)
results = sm.OLS(y,x).fit()
results.summary()

0,1,2,3
Dep. Variable:,price,R-squared:,0.001
Model:,OLS,Adj. R-squared:,-0.004
Method:,Least Squares,F-statistic:,0.2059
Date:,"Thu, 28 Jan 2021",Prob (F-statistic):,0.65
Time:,15:13:23,Log-Likelihood:,-2132.4
No. Observations:,205,AIC:,4269.0
Df Residuals:,203,BIC:,4275.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,1.299e+04,843.743,15.396,0.000,1.13e+04,1.47e+04
doornumber,511.2281,1126.518,0.454,0.650,-1709.948,2732.404

0,1,2,3
Omnibus:,78.69,Durbin-Watson:,0.441
Prob(Omnibus):,0.0,Jarque-Bera (JB):,187.316
Skew:,1.784,Prob(JB):,2.11e-41
Kurtosis:,6.032,Cond. No.,2.79


In [18]:
x1= data[['carbody']]
y = data[['price']]
x = sm.add_constant(x1)
results = sm.OLS(y,x).fit()
results.summary()

0,1,2,3
Dep. Variable:,price,R-squared:,0.026
Model:,OLS,Adj. R-squared:,0.021
Method:,Least Squares,F-statistic:,5.422
Date:,"Thu, 28 Jan 2021",Prob (F-statistic):,0.0209
Time:,15:13:23,Log-Likelihood:,-2129.8
No. Observations:,205,AIC:,4264.0
Df Residuals:,203,BIC:,4270.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,1.05e+04,1312.270,8.005,0.000,7917.076,1.31e+04
carbody,1540.1153,661.399,2.329,0.021,236.023,2844.208

0,1,2,3
Omnibus:,74.331,Durbin-Watson:,0.46
Prob(Omnibus):,0.0,Jarque-Bera (JB):,166.659
Skew:,1.715,Prob(JB):,6.46e-37
Kurtosis:,5.783,Cond. No.,5.74


In [19]:
x1= data[['drivewheel']]
y = data[['price']]
x = sm.add_constant(x1)
results = sm.OLS(y,x).fit()
results.summary()

0,1,2,3
Dep. Variable:,price,R-squared:,0.334
Model:,OLS,Adj. R-squared:,0.331
Method:,Least Squares,F-statistic:,101.8
Date:,"Thu, 28 Jan 2021",Prob (F-statistic):,1.15e-19
Time:,15:13:23,Log-Likelihood:,-2090.8
No. Observations:,205,AIC:,4186.0
Df Residuals:,203,BIC:,4192.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,1.887e+04,717.673,26.287,0.000,1.75e+04,2.03e+04
drivewheel,-8302.2896,822.700,-10.092,0.000,-9924.422,-6680.157

0,1,2,3
Omnibus:,64.06,Durbin-Watson:,0.663
Prob(Omnibus):,0.0,Jarque-Bera (JB):,131.062
Skew:,1.511,Prob(JB):,3.47e-29
Kurtosis:,5.493,Cond. No.,2.82


In [20]:
x1= data[['enginelocation']]
y = data[['price']]
x = sm.add_constant(x1)
results = sm.OLS(y,x).fit()
results.summary()

0,1,2,3
Dep. Variable:,price,R-squared:,0.106
Model:,OLS,Adj. R-squared:,0.101
Method:,Least Squares,F-statistic:,23.97
Date:,"Thu, 28 Jan 2021",Prob (F-statistic):,1.99e-06
Time:,15:13:23,Log-Likelihood:,-2121.0
No. Observations:,205,AIC:,4246.0
Df Residuals:,203,BIC:,4253.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,1.296e+04,532.893,24.322,0.000,1.19e+04,1.4e+04
enginelocation,2.157e+04,4405.103,4.896,0.000,1.29e+04,3.03e+04

0,1,2,3
Omnibus:,86.681,Durbin-Watson:,0.512
Prob(Omnibus):,0.0,Jarque-Bera (JB):,241.732
Skew:,1.876,Prob(JB):,3.23e-53
Kurtosis:,6.771,Cond. No.,8.33


In [21]:
x1= data[['wheelbase']]
y = data[['price']]
x = sm.add_constant(x1)
results = sm.OLS(y,x).fit()
results.summary()

0,1,2,3
Dep. Variable:,price,R-squared:,0.334
Model:,OLS,Adj. R-squared:,0.331
Method:,Least Squares,F-statistic:,101.7
Date:,"Thu, 28 Jan 2021",Prob (F-statistic):,1.18e-19
Time:,15:13:23,Log-Likelihood:,-2090.8
No. Observations:,205,AIC:,4186.0
Df Residuals:,203,BIC:,4192.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-6.243e+04,7518.982,-8.303,0.000,-7.73e+04,-4.76e+04
wheelbase,766.5652,75.996,10.087,0.000,616.722,916.408

0,1,2,3
Omnibus:,115.524,Durbin-Watson:,0.566
Prob(Omnibus):,0.0,Jarque-Bera (JB):,534.632
Skew:,2.298,Prob(JB):,8.050000000000001e-117
Kurtosis:,9.439,Cond. No.,1630.0


In [22]:
x1= data[['carlength']]
y = data[['price']]
x = sm.add_constant(x1)
results = sm.OLS(y,x).fit()
results.summary()



0,1,2,3
Dep. Variable:,price,R-squared:,0.466
Model:,OLS,Adj. R-squared:,0.464
Method:,Least Squares,F-statistic:,177.4
Date:,"Thu, 28 Jan 2021",Prob (F-statistic):,1.68e-29
Time:,15:13:23,Log-Likelihood:,-2068.1
No. Observations:,205,AIC:,4140.0
Df Residuals:,203,BIC:,4147.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-6.369e+04,5792.793,-10.995,0.000,-7.51e+04,-5.23e+04
carlength,442.2161,33.200,13.320,0.000,376.756,507.676

0,1,2,3
Omnibus:,90.016,Durbin-Watson:,0.556
Prob(Omnibus):,0.0,Jarque-Bera (JB):,293.754
Skew:,1.859,Prob(JB):,1.63e-64
Kurtosis:,7.535,Cond. No.,2470.0


In [23]:
x1= data[['carwidth']]
y = data[['price']]
x = sm.add_constant(x1)
results = sm.OLS(y,x).fit()
results.summary()

0,1,2,3
Dep. Variable:,price,R-squared:,0.577
Model:,OLS,Adj. R-squared:,0.574
Method:,Least Squares,F-statistic:,276.4
Date:,"Thu, 28 Jan 2021",Prob (F-statistic):,9.63e-40
Time:,15:13:24,Log-Likelihood:,-2044.4
No. Observations:,205,AIC:,4093.0
Df Residuals:,203,BIC:,4099.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-1.731e+05,1.12e+04,-15.433,0.000,-1.95e+05,-1.51e+05
carwidth,2827.7675,170.081,16.626,0.000,2492.415,3163.120

0,1,2,3
Omnibus:,115.442,Durbin-Watson:,0.638
Prob(Omnibus):,0.0,Jarque-Bera (JB):,617.165
Skew:,2.215,Prob(JB):,9.64e-135
Kurtosis:,10.255,Cond. No.,2030.0


In [24]:
x1= data[['carheight']]
y = data[['price']]
x = sm.add_constant(x1)
results = sm.OLS(y,x).fit()
results.summary()

0,1,2,3
Dep. Variable:,price,R-squared:,0.014
Model:,OLS,Adj. R-squared:,0.009
Method:,Least Squares,F-statistic:,2.933
Date:,"Thu, 28 Jan 2021",Prob (F-statistic):,0.0883
Time:,15:13:24,Log-Likelihood:,-2131.0
No. Observations:,205,AIC:,4266.0
Df Residuals:,203,BIC:,4273.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-7684.4770,1.23e+04,-0.627,0.531,-3.18e+04,1.65e+04
carheight,390.1579,227.827,1.713,0.088,-59.053,839.369

0,1,2,3
Omnibus:,80.143,Durbin-Watson:,0.468
Prob(Omnibus):,0.0,Jarque-Bera (JB):,194.507
Skew:,1.808,Prob(JB):,5.8e-43
Kurtosis:,6.113,Cond. No.,1190.0


In [25]:
x1= data[['curbweight']]
y = data[['price']]
x = sm.add_constant(x1)
results = sm.OLS(y,x).fit()
results.summary()

0,1,2,3
Dep. Variable:,price,R-squared:,0.698
Model:,OLS,Adj. R-squared:,0.696
Method:,Least Squares,F-statistic:,468.6
Date:,"Thu, 28 Jan 2021",Prob (F-statistic):,1.2099999999999998e-54
Time:,15:13:24,Log-Likelihood:,-2009.8
No. Observations:,205,AIC:,4024.0
Df Residuals:,203,BIC:,4030.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-1.948e+04,1543.962,-12.614,0.000,-2.25e+04,-1.64e+04
curbweight,12.8162,0.592,21.647,0.000,11.649,13.984

0,1,2,3
Omnibus:,85.362,Durbin-Watson:,0.575
Prob(Omnibus):,0.0,Jarque-Bera (JB):,382.847
Skew:,1.591,Prob(JB):,7.34e-84
Kurtosis:,8.89,Cond. No.,13100.0


In [26]:
x1= data[['enginetype']]
y = data[['price']]
x = sm.add_constant(x1)
results = sm.OLS(y,x).fit()
results.summary()

0,1,2,3
Dep. Variable:,price,R-squared:,0.009
Model:,OLS,Adj. R-squared:,0.004
Method:,Least Squares,F-statistic:,1.765
Date:,"Thu, 28 Jan 2021",Prob (F-statistic):,0.185
Time:,15:13:24,Log-Likelihood:,-2131.6
No. Observations:,205,AIC:,4267.0
Df Residuals:,203,BIC:,4274.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,1.477e+04,1252.173,11.793,0.000,1.23e+04,1.72e+04
enginetype,-691.0399,520.156,-1.329,0.185,-1716.640,334.561

0,1,2,3
Omnibus:,78.42,Durbin-Watson:,0.438
Prob(Omnibus):,0.0,Jarque-Bera (JB):,185.407
Skew:,1.783,Prob(JB):,5.49e-41
Kurtosis:,5.999,Cond. No.,6.18


In [27]:
x1= data[['cylindernumber']]
y = data[['price']]
x = sm.add_constant(x1)
results = sm.OLS(y,x).fit()
results.summary()

0,1,2,3
Dep. Variable:,price,R-squared:,0.516
Model:,OLS,Adj. R-squared:,0.514
Method:,Least Squares,F-statistic:,216.4
Date:,"Thu, 28 Jan 2021",Prob (F-statistic):,8.03e-34
Time:,15:13:24,Log-Likelihood:,-2058.1
No. Observations:,205,AIC:,4120.0
Df Residuals:,203,BIC:,4127.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-9980.0253,1628.187,-6.130,0.000,-1.32e+04,-6769.697
cylindernumber,5309.1658,360.919,14.710,0.000,4597.536,6020.796

0,1,2,3
Omnibus:,31.811,Durbin-Watson:,0.73
Prob(Omnibus):,0.0,Jarque-Bera (JB):,42.198
Skew:,0.982,Prob(JB):,6.87e-10
Kurtosis:,4.042,Cond. No.,19.8


In [28]:
x1= data[['enginesize']]
y = data[['price']]
x = sm.add_constant(x1)
results = sm.OLS(y,x).fit()
results.summary()

0,1,2,3
Dep. Variable:,price,R-squared:,0.764
Model:,OLS,Adj. R-squared:,0.763
Method:,Least Squares,F-statistic:,657.6
Date:,"Thu, 28 Jan 2021",Prob (F-statistic):,1.35e-65
Time:,15:13:24,Log-Likelihood:,-1984.4
No. Observations:,205,AIC:,3973.0
Df Residuals:,203,BIC:,3979.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-8005.4455,873.221,-9.168,0.000,-9727.191,-6283.700
enginesize,167.6984,6.539,25.645,0.000,154.805,180.592

0,1,2,3
Omnibus:,23.788,Durbin-Watson:,0.768
Prob(Omnibus):,0.0,Jarque-Bera (JB):,33.092
Skew:,0.717,Prob(JB):,6.52e-08
Kurtosis:,4.348,Cond. No.,429.0


In [29]:
x1= data[['fuelsystem']]
y = data[['price']]
x = sm.add_constant(x1)
results = sm.OLS(y,x).fit()
results.summary()

0,1,2,3
Dep. Variable:,price,R-squared:,0.015
Model:,OLS,Adj. R-squared:,0.01
Method:,Least Squares,F-statistic:,3.073
Date:,"Thu, 28 Jan 2021",Prob (F-statistic):,0.0811
Time:,15:13:25,Log-Likelihood:,-2130.9
No. Observations:,205,AIC:,4266.0
Df Residuals:,203,BIC:,4273.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,1.394e+04,670.840,20.775,0.000,1.26e+04,1.53e+04
fuelsystem,-446.6740,254.800,-1.753,0.081,-949.067,55.719

0,1,2,3
Omnibus:,74.175,Durbin-Watson:,0.461
Prob(Omnibus):,0.0,Jarque-Bera (JB):,165.106
Skew:,1.717,Prob(JB):,1.4100000000000001e-36
Kurtosis:,5.744,Cond. No.,3.34


In [30]:
x1= data[['boreratio']]
y = data[['price']]
x = sm.add_constant(x1)
results = sm.OLS(y,x).fit()
results.summary()

0,1,2,3
Dep. Variable:,price,R-squared:,0.306
Model:,OLS,Adj. R-squared:,0.303
Method:,Least Squares,F-statistic:,89.51
Date:,"Thu, 28 Jan 2021",Prob (F-statistic):,7.91e-18
Time:,15:13:25,Log-Likelihood:,-2095.0
No. Observations:,205,AIC:,4194.0
Df Residuals:,203,BIC:,4201.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-4.105e+04,5761.487,-7.125,0.000,-5.24e+04,-2.97e+04
boreratio,1.632e+04,1724.635,9.461,0.000,1.29e+04,1.97e+04

0,1,2,3
Omnibus:,60.08,Durbin-Watson:,0.611
Prob(Omnibus):,0.0,Jarque-Bera (JB):,116.398
Skew:,1.447,Prob(JB):,5.3e-26
Kurtosis:,5.291,Cond. No.,45.0


In [31]:
x1= data[['stroke']]
y = data[['price']]
x = sm.add_constant(x1)
results = sm.OLS(y,x).fit()
results.summary()

0,1,2,3
Dep. Variable:,price,R-squared:,0.006
Model:,OLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,1.289
Date:,"Thu, 28 Jan 2021",Prob (F-statistic):,0.258
Time:,15:13:25,Log-Likelihood:,-2131.8
No. Observations:,205,AIC:,4268.0
Df Residuals:,203,BIC:,4274.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,6688.3876,5828.972,1.147,0.253,-4804.707,1.82e+04
stroke,2023.8046,1782.336,1.135,0.258,-1490.462,5538.071

0,1,2,3
Omnibus:,77.263,Durbin-Watson:,0.444
Prob(Omnibus):,0.0,Jarque-Bera (JB):,179.889
Skew:,1.764,Prob(JB):,8.66e-40
Kurtosis:,5.935,Cond. No.,37.4


In [32]:
x1= data[['compressionratio']]
y = data[['price']]
x = sm.add_constant(x1)
results = sm.OLS(y,x).fit()
results.summary()

0,1,2,3
Dep. Variable:,price,R-squared:,0.005
Model:,OLS,Adj. R-squared:,-0.0
Method:,Least Squares,F-statistic:,0.9426
Date:,"Thu, 28 Jan 2021",Prob (F-statistic):,0.333
Time:,15:13:25,Log-Likelihood:,-2132.0
No. Observations:,205,AIC:,4268.0
Df Residuals:,203,BIC:,4275.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,1.189e+04,1533.580,7.753,0.000,8866.100,1.49e+04
compressionratio,136.7333,140.837,0.971,0.333,-140.958,414.424

0,1,2,3
Omnibus:,79.108,Durbin-Watson:,0.451
Prob(Omnibus):,0.0,Jarque-Bera (JB):,190.882
Skew:,1.785,Prob(JB):,3.55e-42
Kurtosis:,6.1,Cond. No.,30.1


In [33]:
x1= data[['horsepower']]
y = data[['price']]
x = sm.add_constant(x1)
results = sm.OLS(y,x).fit()
results.summary()

0,1,2,3
Dep. Variable:,price,R-squared:,0.653
Model:,OLS,Adj. R-squared:,0.651
Method:,Least Squares,F-statistic:,382.2
Date:,"Thu, 28 Jan 2021",Prob (F-statistic):,1.48e-48
Time:,15:13:25,Log-Likelihood:,-2024.0
No. Observations:,205,AIC:,4052.0
Df Residuals:,203,BIC:,4059.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-3721.7615,929.849,-4.003,0.000,-5555.163,-1888.360
horsepower,163.2631,8.351,19.549,0.000,146.796,179.730

0,1,2,3
Omnibus:,47.741,Durbin-Watson:,0.792
Prob(Omnibus):,0.0,Jarque-Bera (JB):,91.702
Skew:,1.141,Prob(JB):,1.22e-20
Kurtosis:,5.352,Cond. No.,314.0


In [34]:
x1= data[['peakrpm']]
y = data[['price']]
x = sm.add_constant(x1)
results = sm.OLS(y,x).fit()
results.summary()

0,1,2,3
Dep. Variable:,price,R-squared:,0.007
Model:,OLS,Adj. R-squared:,0.002
Method:,Least Squares,F-statistic:,1.487
Date:,"Thu, 28 Jan 2021",Prob (F-statistic):,0.224
Time:,15:13:25,Log-Likelihood:,-2131.7
No. Observations:,205,AIC:,4267.0
Df Residuals:,203,BIC:,4274.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,2.06e+04,6028.571,3.416,0.001,8709.287,3.25e+04
peakrpm,-1.4281,1.171,-1.219,0.224,-3.737,0.881

0,1,2,3
Omnibus:,76.694,Durbin-Watson:,0.461
Prob(Omnibus):,0.0,Jarque-Bera (JB):,177.45
Skew:,1.754,Prob(JB):,2.93e-39
Kurtosis:,5.911,Cond. No.,55700.0


In [35]:
x1= data[['citympg']]
y = data[['price']]
x = sm.add_constant(x1)
results = sm.OLS(y,x).fit()
results.summary()

0,1,2,3
Dep. Variable:,price,R-squared:,0.47
Model:,OLS,Adj. R-squared:,0.468
Method:,Least Squares,F-statistic:,180.2
Date:,"Thu, 28 Jan 2021",Prob (F-statistic):,7.98e-30
Time:,15:13:25,Log-Likelihood:,-2067.3
No. Observations:,205,AIC:,4139.0
Df Residuals:,203,BIC:,4145.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,3.44e+04,1625.031,21.166,0.000,3.12e+04,3.76e+04
citympg,-837.3964,62.381,-13.424,0.000,-960.394,-714.399

0,1,2,3
Omnibus:,60.78,Durbin-Watson:,0.804
Prob(Omnibus):,0.0,Jarque-Bera (JB):,112.964
Skew:,1.507,Prob(JB):,2.95e-25
Kurtosis:,5.036,Cond. No.,104.0


In [36]:
x1= data[['highwaympg']]
y = data[['price']]
x = sm.add_constant(x1)
results = sm.OLS(y,x).fit()
results.summary()

0,1,2,3
Dep. Variable:,price,R-squared:,0.487
Model:,OLS,Adj. R-squared:,0.484
Method:,Least Squares,F-statistic:,192.4
Date:,"Thu, 28 Jan 2021",Prob (F-statistic):,3.23e-31
Time:,15:13:26,Log-Likelihood:,-2064.1
No. Observations:,205,AIC:,4132.0
Df Residuals:,203,BIC:,4139.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,3.816e+04,1838.179,20.761,0.000,3.45e+04,4.18e+04
highwaympg,-809.2735,58.338,-13.872,0.000,-924.299,-694.248

0,1,2,3
Omnibus:,58.304,Durbin-Watson:,0.823
Prob(Omnibus):,0.0,Jarque-Bera (JB):,105.616
Skew:,1.46,Prob(JB):,1.16e-23
Kurtosis:,4.96,Cond. No.,145.0


In [37]:
x1= data[['enginesize','curbweight','horsepower','carwidth','highwaympg',
          'drivewheel','boreratio','enginelocation','enginetype','peakrpm','stroke']]
y = data[['price']]
x = sm.add_constant(x1)
results = sm.OLS(y,x).fit()
results.summary()

0,1,2,3
Dep. Variable:,price,R-squared:,0.873
Model:,OLS,Adj. R-squared:,0.866
Method:,Least Squares,F-statistic:,120.5
Date:,"Thu, 28 Jan 2021",Prob (F-statistic):,3.53e-80
Time:,15:13:26,Log-Likelihood:,-1921.0
No. Observations:,205,AIC:,3866.0
Df Residuals:,193,BIC:,3906.0
Df Model:,11,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-5.333e+04,1.3e+04,-4.098,0.000,-7.9e+04,-2.77e+04
enginesize,99.0902,12.929,7.664,0.000,73.590,124.590
curbweight,3.8449,1.295,2.969,0.003,1.290,6.399
horsepower,8.7453,12.729,0.687,0.493,-16.361,33.852
carwidth,801.2556,203.849,3.931,0.000,399.198,1203.313
highwaympg,21.1793,60.595,0.350,0.727,-98.334,140.692
drivewheel,-1506.2837,490.317,-3.072,0.002,-2473.352,-539.215
boreratio,-3724.2678,1251.669,-2.975,0.003,-6192.974,-1255.562
enginelocation,1.121e+04,2222.330,5.043,0.000,6824.577,1.56e+04

0,1,2,3
Omnibus:,35.882,Durbin-Watson:,0.836
Prob(Omnibus):,0.0,Jarque-Bera (JB):,100.877
Skew:,0.717,Prob(JB):,1.24e-22
Kurtosis:,6.123,Cond. No.,366000.0


## Splitting the data into training and testing sets¶

In [38]:
from sklearn.model_selection import train_test_split

In [39]:
x_train, x_test, y_train, y_test = train_test_split(data.drop(['price'],axis=1), data['price'], test_size=0.2, random_state=42)

In [40]:
#x_train
#y_train

## Model Building and Training

In [41]:
from sklearn import linear_model

In [42]:
model = linear_model.LinearRegression()

In [43]:
model.fit(x_train,y_train)

LinearRegression()

## Predictions

In [44]:
pred = model.predict(x_test)

## Checking the accuracy

In [45]:
from sklearn.metrics import r2_score

In [46]:
acc = r2_score(y_test, pred)

In [47]:
acc

0.8230915463093522