In [245]:
import pandas as pd
import numpy as np

import scipy.stats as ss
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.stats.api import anova_lm
from statsmodels.stats.multicomp import (pairwise_tukeyhsd,
                                         MultiComparison)
import pingouin as pg

import matplotlib.pyplot as plt
import seaborn as sns

In [246]:
cars = pd.read_csv('/home/jupyter-s-kuznetsov-18/stat_7/cars.csv')

In [247]:
cars

Unnamed: 0,car_ID,symboling,CarName,fueltype,aspiration,doornumber,carbody,drivewheel,enginelocation,wheelbase,...,enginesize,fuelsystem,boreratio,stroke,compressionratio,horsepower,peakrpm,citympg,highwaympg,price
0,1,3,alfa-romero giulia,gas,std,two,convertible,rwd,front,88.6,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,13495.0
1,2,3,alfa-romero stelvio,gas,std,two,convertible,rwd,front,88.6,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,16500.0
2,3,1,alfa-romero Quadrifoglio,gas,std,two,hatchback,rwd,front,94.5,...,152,mpfi,2.68,3.47,9.0,154,5000,19,26,16500.0
3,4,2,audi 100 ls,gas,std,four,sedan,fwd,front,99.8,...,109,mpfi,3.19,3.40,10.0,102,5500,24,30,13950.0
4,5,2,audi 100ls,gas,std,four,sedan,4wd,front,99.4,...,136,mpfi,3.19,3.40,8.0,115,5500,18,22,17450.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200,201,-1,volvo 145e (sw),gas,std,four,sedan,rwd,front,109.1,...,141,mpfi,3.78,3.15,9.5,114,5400,23,28,16845.0
201,202,-1,volvo 144ea,gas,turbo,four,sedan,rwd,front,109.1,...,141,mpfi,3.78,3.15,8.7,160,5300,19,25,19045.0
202,203,-1,volvo 244dl,gas,std,four,sedan,rwd,front,109.1,...,173,mpfi,3.58,2.87,8.8,134,5500,18,23,21485.0
203,204,-1,volvo 246,diesel,turbo,four,sedan,rwd,front,109.1,...,145,idi,3.01,3.40,23.0,106,4800,26,27,22470.0


In [248]:
cars.isnull().sum()

car_ID              0
symboling           0
CarName             0
fueltype            0
aspiration          0
doornumber          0
carbody             0
drivewheel          0
enginelocation      0
wheelbase           0
carlength           0
carwidth            0
carheight           0
curbweight          0
enginetype          0
cylindernumber      0
enginesize          0
fuelsystem          0
boreratio           0
stroke              0
compressionratio    0
horsepower          0
peakrpm             0
citympg             0
highwaympg          0
price               0
dtype: int64

In [249]:
cars.dtypes

car_ID                int64
symboling             int64
CarName              object
fueltype             object
aspiration           object
doornumber           object
carbody              object
drivewheel           object
enginelocation       object
wheelbase           float64
carlength           float64
carwidth            float64
carheight           float64
curbweight            int64
enginetype           object
cylindernumber       object
enginesize            int64
fuelsystem           object
boreratio           float64
stroke              float64
compressionratio    float64
horsepower            int64
peakrpm               int64
citympg               int64
highwaympg            int64
price               float64
dtype: object

In [250]:
cars['company'] = cars.CarName.apply(lambda x: x.split()[0])

In [251]:
cars.drop(columns = ['car_ID', 'CarName'], inplace=True)

In [252]:
cars.company.unique()

array(['alfa-romero', 'audi', 'bmw', 'chevrolet', 'dodge', 'honda',
       'isuzu', 'jaguar', 'maxda', 'mazda', 'buick', 'mercury',
       'mitsubishi', 'Nissan', 'nissan', 'peugeot', 'plymouth', 'porsche',
       'porcshce', 'renault', 'saab', 'subaru', 'toyota', 'toyouta',
       'vokswagen', 'volkswagen', 'vw', 'volvo'], dtype=object)

In [253]:
cars.company = cars.company \
                   .apply(lambda x: x \
                   .lower() \
                   .replace('maxda','mazda') \
                   .replace('porcshce','porsche') \
                   .replace('toyouta','toyota') \
                   .replace('vokswagen','volkswagen') \
                   .replace('vw','volkswagen'))

In [254]:
cars.nunique()

symboling             6
fueltype              2
aspiration            2
doornumber            2
carbody               5
drivewheel            3
enginelocation        2
wheelbase            53
carlength            75
carwidth             44
carheight            49
curbweight          171
enginetype            7
cylindernumber        7
enginesize           44
fuelsystem            8
boreratio            38
stroke               37
compressionratio     32
horsepower           59
peakrpm              23
citympg              29
highwaympg           30
price               189
company              22
dtype: int64

In [255]:
cars

Unnamed: 0,symboling,fueltype,aspiration,doornumber,carbody,drivewheel,enginelocation,wheelbase,carlength,carwidth,...,fuelsystem,boreratio,stroke,compressionratio,horsepower,peakrpm,citympg,highwaympg,price,company
0,3,gas,std,two,convertible,rwd,front,88.6,168.8,64.1,...,mpfi,3.47,2.68,9.0,111,5000,21,27,13495.0,alfa-romero
1,3,gas,std,two,convertible,rwd,front,88.6,168.8,64.1,...,mpfi,3.47,2.68,9.0,111,5000,21,27,16500.0,alfa-romero
2,1,gas,std,two,hatchback,rwd,front,94.5,171.2,65.5,...,mpfi,2.68,3.47,9.0,154,5000,19,26,16500.0,alfa-romero
3,2,gas,std,four,sedan,fwd,front,99.8,176.6,66.2,...,mpfi,3.19,3.40,10.0,102,5500,24,30,13950.0,audi
4,2,gas,std,four,sedan,4wd,front,99.4,176.6,66.4,...,mpfi,3.19,3.40,8.0,115,5500,18,22,17450.0,audi
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200,-1,gas,std,four,sedan,rwd,front,109.1,188.8,68.9,...,mpfi,3.78,3.15,9.5,114,5400,23,28,16845.0,volvo
201,-1,gas,turbo,four,sedan,rwd,front,109.1,188.8,68.8,...,mpfi,3.78,3.15,8.7,160,5300,19,25,19045.0,volvo
202,-1,gas,std,four,sedan,rwd,front,109.1,188.8,68.9,...,mpfi,3.58,2.87,8.8,134,5500,18,23,21485.0,volvo
203,-1,diesel,turbo,four,sedan,rwd,front,109.1,188.8,68.9,...,idi,3.01,3.40,23.0,106,4800,26,27,22470.0,volvo


In [256]:
cars = cars[['company', 'fueltype', 'aspiration','carbody', 'drivewheel', 'wheelbase', 'carlength','carwidth',
             'curbweight', 'enginetype', 'cylindernumber', 'enginesize', 'boreratio','horsepower', 'price']]

In [257]:
cars

Unnamed: 0,company,fueltype,aspiration,carbody,drivewheel,wheelbase,carlength,carwidth,curbweight,enginetype,cylindernumber,enginesize,boreratio,horsepower,price
0,alfa-romero,gas,std,convertible,rwd,88.6,168.8,64.1,2548,dohc,four,130,3.47,111,13495.0
1,alfa-romero,gas,std,convertible,rwd,88.6,168.8,64.1,2548,dohc,four,130,3.47,111,16500.0
2,alfa-romero,gas,std,hatchback,rwd,94.5,171.2,65.5,2823,ohcv,six,152,2.68,154,16500.0
3,audi,gas,std,sedan,fwd,99.8,176.6,66.2,2337,ohc,four,109,3.19,102,13950.0
4,audi,gas,std,sedan,4wd,99.4,176.6,66.4,2824,ohc,five,136,3.19,115,17450.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200,volvo,gas,std,sedan,rwd,109.1,188.8,68.9,2952,ohc,four,141,3.78,114,16845.0
201,volvo,gas,turbo,sedan,rwd,109.1,188.8,68.8,3049,ohc,four,141,3.78,160,19045.0
202,volvo,gas,std,sedan,rwd,109.1,188.8,68.9,3012,ohcv,six,173,3.58,134,21485.0
203,volvo,diesel,turbo,sedan,rwd,109.1,188.8,68.9,3217,ohc,six,145,3.01,106,22470.0


In [258]:
cars.corr().round(2) 

Unnamed: 0,wheelbase,carlength,carwidth,curbweight,enginesize,boreratio,horsepower,price
wheelbase,1.0,0.87,0.8,0.78,0.57,0.49,0.35,0.58
carlength,0.87,1.0,0.84,0.88,0.68,0.61,0.55,0.68
carwidth,0.8,0.84,1.0,0.87,0.74,0.56,0.64,0.76
curbweight,0.78,0.88,0.87,1.0,0.85,0.65,0.75,0.84
enginesize,0.57,0.68,0.74,0.85,1.0,0.58,0.81,0.87
boreratio,0.49,0.61,0.56,0.65,0.58,1.0,0.57,0.55
horsepower,0.35,0.55,0.64,0.75,0.81,0.57,1.0,0.81
price,0.58,0.68,0.76,0.84,0.87,0.55,0.81,1.0


In [259]:
cars_object=cars.select_dtypes(include=['object'])

In [260]:
cars.dtypes

company            object
fueltype           object
aspiration         object
carbody            object
drivewheel         object
wheelbase         float64
carlength         float64
carwidth          float64
curbweight          int64
enginetype         object
cylindernumber     object
enginesize          int64
boreratio         float64
horsepower          int64
price             float64
dtype: object

In [261]:
cars

Unnamed: 0,company,fueltype,aspiration,carbody,drivewheel,wheelbase,carlength,carwidth,curbweight,enginetype,cylindernumber,enginesize,boreratio,horsepower,price
0,alfa-romero,gas,std,convertible,rwd,88.6,168.8,64.1,2548,dohc,four,130,3.47,111,13495.0
1,alfa-romero,gas,std,convertible,rwd,88.6,168.8,64.1,2548,dohc,four,130,3.47,111,16500.0
2,alfa-romero,gas,std,hatchback,rwd,94.5,171.2,65.5,2823,ohcv,six,152,2.68,154,16500.0
3,audi,gas,std,sedan,fwd,99.8,176.6,66.2,2337,ohc,four,109,3.19,102,13950.0
4,audi,gas,std,sedan,4wd,99.4,176.6,66.4,2824,ohc,five,136,3.19,115,17450.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200,volvo,gas,std,sedan,rwd,109.1,188.8,68.9,2952,ohc,four,141,3.78,114,16845.0
201,volvo,gas,turbo,sedan,rwd,109.1,188.8,68.8,3049,ohc,four,141,3.78,160,19045.0
202,volvo,gas,std,sedan,rwd,109.1,188.8,68.9,3012,ohcv,six,173,3.58,134,21485.0
203,volvo,diesel,turbo,sedan,rwd,109.1,188.8,68.9,3217,ohc,six,145,3.01,106,22470.0


In [262]:
cars_object

Unnamed: 0,company,fueltype,aspiration,carbody,drivewheel,enginetype,cylindernumber
0,alfa-romero,gas,std,convertible,rwd,dohc,four
1,alfa-romero,gas,std,convertible,rwd,dohc,four
2,alfa-romero,gas,std,hatchback,rwd,ohcv,six
3,audi,gas,std,sedan,fwd,ohc,four
4,audi,gas,std,sedan,4wd,ohc,five
...,...,...,...,...,...,...,...
200,volvo,gas,std,sedan,rwd,ohc,four
201,volvo,gas,turbo,sedan,rwd,ohc,four
202,volvo,gas,std,sedan,rwd,ohcv,six
203,volvo,diesel,turbo,sedan,rwd,ohc,six


In [263]:
cars_not_object = cars.select_dtypes(exclude=['object'])

In [264]:
cars_not_object

Unnamed: 0,wheelbase,carlength,carwidth,curbweight,enginesize,boreratio,horsepower,price
0,88.6,168.8,64.1,2548,130,3.47,111,13495.0
1,88.6,168.8,64.1,2548,130,3.47,111,16500.0
2,94.5,171.2,65.5,2823,152,2.68,154,16500.0
3,99.8,176.6,66.2,2337,109,3.19,102,13950.0
4,99.4,176.6,66.4,2824,136,3.19,115,17450.0
...,...,...,...,...,...,...,...,...
200,109.1,188.8,68.9,2952,141,3.78,114,16845.0
201,109.1,188.8,68.8,3049,141,3.78,160,19045.0
202,109.1,188.8,68.9,3012,173,3.58,134,21485.0
203,109.1,188.8,68.9,3217,145,3.01,106,22470.0


In [265]:
cars_object = pd.get_dummies(data=cars_object, drop_first = True)

In [266]:
cars_object

Unnamed: 0,company_audi,company_bmw,company_buick,company_chevrolet,company_dodge,company_honda,company_isuzu,company_jaguar,company_mazda,company_mercury,...,enginetype_ohc,enginetype_ohcf,enginetype_ohcv,enginetype_rotor,cylindernumber_five,cylindernumber_four,cylindernumber_six,cylindernumber_three,cylindernumber_twelve,cylindernumber_two
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,1,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,1,0,0,0,0
4,1,0,0,0,0,0,0,0,0,0,...,1,0,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,1,0,0,0,0
201,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,1,0,0,0,0
202,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,1,0,0,0
203,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,1,0,0,0


In [267]:
cars = pd.concat([cars_object, cars_not_object], axis=1)

In [268]:
cars

Unnamed: 0,company_audi,company_bmw,company_buick,company_chevrolet,company_dodge,company_honda,company_isuzu,company_jaguar,company_mazda,company_mercury,...,cylindernumber_twelve,cylindernumber_two,wheelbase,carlength,carwidth,curbweight,enginesize,boreratio,horsepower,price
0,0,0,0,0,0,0,0,0,0,0,...,0,0,88.6,168.8,64.1,2548,130,3.47,111,13495.0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,88.6,168.8,64.1,2548,130,3.47,111,16500.0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,94.5,171.2,65.5,2823,152,2.68,154,16500.0
3,1,0,0,0,0,0,0,0,0,0,...,0,0,99.8,176.6,66.2,2337,109,3.19,102,13950.0
4,1,0,0,0,0,0,0,0,0,0,...,0,0,99.4,176.6,66.4,2824,136,3.19,115,17450.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200,0,0,0,0,0,0,0,0,0,0,...,0,0,109.1,188.8,68.9,2952,141,3.78,114,16845.0
201,0,0,0,0,0,0,0,0,0,0,...,0,0,109.1,188.8,68.8,3049,141,3.78,160,19045.0
202,0,0,0,0,0,0,0,0,0,0,...,0,0,109.1,188.8,68.9,3012,173,3.58,134,21485.0
203,0,0,0,0,0,0,0,0,0,0,...,0,0,109.1,188.8,68.9,3217,145,3.01,106,22470.0


In [269]:
cars.shape

(205, 49)

In [270]:
model_main = smf.ols("price ~ horsepower", data=cars).fit().summary()

In [271]:
model_main

0,1,2,3
Dep. Variable:,price,R-squared:,0.653
Model:,OLS,Adj. R-squared:,0.651
Method:,Least Squares,F-statistic:,382.2
Date:,"Tue, 08 Feb 2022",Prob (F-statistic):,1.48e-48
Time:,12:38:49,Log-Likelihood:,-2024.0
No. Observations:,205,AIC:,4052.0
Df Residuals:,203,BIC:,4059.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-3721.7615,929.849,-4.003,0.000,-5555.163,-1888.360
horsepower,163.2631,8.351,19.549,0.000,146.796,179.730

0,1,2,3
Omnibus:,47.741,Durbin-Watson:,0.792
Prob(Omnibus):,0.0,Jarque-Bera (JB):,91.702
Skew:,1.141,Prob(JB):,1.22e-20
Kurtosis:,5.352,Cond. No.,314.0


In [272]:
cars_not_object_not_price = cars_not_object.drop(columns=['price'])

In [273]:
cars_not_object_not_price

Unnamed: 0,wheelbase,carlength,carwidth,curbweight,enginesize,boreratio,horsepower
0,88.6,168.8,64.1,2548,130,3.47,111
1,88.6,168.8,64.1,2548,130,3.47,111
2,94.5,171.2,65.5,2823,152,2.68,154
3,99.8,176.6,66.2,2337,109,3.19,102
4,99.4,176.6,66.4,2824,136,3.19,115
...,...,...,...,...,...,...,...
200,109.1,188.8,68.9,2952,141,3.78,114
201,109.1,188.8,68.8,3049,141,3.78,160
202,109.1,188.8,68.9,3012,173,3.58,134
203,109.1,188.8,68.9,3217,145,3.01,106


In [274]:
model_main_all_predict = smf.ols("price ~ wheelbase+carlength+carwidth+curbweight+enginesize+boreratio+horsepower",
                                 data=cars_not_object) \
                            .fit() \
                            .summary()

In [275]:
model_main_all_predict

0,1,2,3
Dep. Variable:,price,R-squared:,0.822
Model:,OLS,Adj. R-squared:,0.816
Method:,Least Squares,F-statistic:,129.8
Date:,"Tue, 08 Feb 2022",Prob (F-statistic):,2.99e-70
Time:,12:38:49,Log-Likelihood:,-1955.7
No. Observations:,205,AIC:,3927.0
Df Residuals:,197,BIC:,3954.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-4.243e+04,1.32e+04,-3.216,0.002,-6.84e+04,-1.64e+04
wheelbase,94.7692,98.355,0.964,0.336,-99.194,288.733
carlength,-49.9229,54.438,-0.917,0.360,-157.278,57.432
carwidth,541.8531,253.601,2.137,0.034,41.731,1041.975
curbweight,2.6274,1.563,1.681,0.094,-0.455,5.710
enginesize,83.1012,12.716,6.535,0.000,58.025,108.177
boreratio,-1115.7083,1204.007,-0.927,0.355,-3490.105,1258.689
horsepower,55.4856,12.668,4.380,0.000,30.503,80.468

0,1,2,3
Omnibus:,33.173,Durbin-Watson:,0.792
Prob(Omnibus):,0.0,Jarque-Bera (JB):,78.179
Skew:,0.723,Prob(JB):,1.06e-17
Kurtosis:,5.657,Cond. No.,144000.0


In [276]:
model_main_all_predict_no_marok = smf.ols("price ~ wheelbase+carlength+carwidth+curbweight+enginesize+boreratio+horsepower",
                                 data=cars_not_object) \
                            .fit() \
                            .summary()

In [277]:
cars_object

Unnamed: 0,company_audi,company_bmw,company_buick,company_chevrolet,company_dodge,company_honda,company_isuzu,company_jaguar,company_mazda,company_mercury,...,enginetype_ohc,enginetype_ohcf,enginetype_ohcv,enginetype_rotor,cylindernumber_five,cylindernumber_four,cylindernumber_six,cylindernumber_three,cylindernumber_twelve,cylindernumber_two
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,1,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,1,0,0,0,0
4,1,0,0,0,0,0,0,0,0,0,...,1,0,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,1,0,0,0,0
201,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,1,0,0,0,0
202,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,1,0,0,0
203,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,1,0,0,0


In [278]:
X = sm.add_constant(cars)  # добавить константу, чтобы был свободный член

model = sm.OLS(cars_not_object.price, X)  # говорим модели, что у нас ЗП, а что НП
results = model.fit()  # строим регрессионную прямую
print(results.summary())  # смотрим результат

                            OLS Regression Results                            
Dep. Variable:                  price   R-squared:                       1.000
Model:                            OLS   Adj. R-squared:                  1.000
Method:                 Least Squares   F-statistic:                 1.418e+29
Date:                Tue, 08 Feb 2022   Prob (F-statistic):               0.00
Time:                        12:38:49   Log-Likelihood:                 4624.2
No. Observations:                 205   AIC:                            -9152.
Df Residuals:                     157   BIC:                            -8993.
Df Model:                          47                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
const                  -1.86e-

In [279]:
cars_not_object_not_price

Unnamed: 0,wheelbase,carlength,carwidth,curbweight,enginesize,boreratio,horsepower
0,88.6,168.8,64.1,2548,130,3.47,111
1,88.6,168.8,64.1,2548,130,3.47,111
2,94.5,171.2,65.5,2823,152,2.68,154
3,99.8,176.6,66.2,2337,109,3.19,102
4,99.4,176.6,66.4,2824,136,3.19,115
...,...,...,...,...,...,...,...
200,109.1,188.8,68.9,2952,141,3.78,114
201,109.1,188.8,68.8,3049,141,3.78,160
202,109.1,188.8,68.9,3012,173,3.58,134
203,109.1,188.8,68.9,3217,145,3.01,106


In [280]:
cars

Unnamed: 0,company_audi,company_bmw,company_buick,company_chevrolet,company_dodge,company_honda,company_isuzu,company_jaguar,company_mazda,company_mercury,...,cylindernumber_twelve,cylindernumber_two,wheelbase,carlength,carwidth,curbweight,enginesize,boreratio,horsepower,price
0,0,0,0,0,0,0,0,0,0,0,...,0,0,88.6,168.8,64.1,2548,130,3.47,111,13495.0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,88.6,168.8,64.1,2548,130,3.47,111,16500.0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,94.5,171.2,65.5,2823,152,2.68,154,16500.0
3,1,0,0,0,0,0,0,0,0,0,...,0,0,99.8,176.6,66.2,2337,109,3.19,102,13950.0
4,1,0,0,0,0,0,0,0,0,0,...,0,0,99.4,176.6,66.4,2824,136,3.19,115,17450.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200,0,0,0,0,0,0,0,0,0,0,...,0,0,109.1,188.8,68.9,2952,141,3.78,114,16845.0
201,0,0,0,0,0,0,0,0,0,0,...,0,0,109.1,188.8,68.8,3049,141,3.78,160,19045.0
202,0,0,0,0,0,0,0,0,0,0,...,0,0,109.1,188.8,68.9,3012,173,3.58,134,21485.0
203,0,0,0,0,0,0,0,0,0,0,...,0,0,109.1,188.8,68.9,3217,145,3.01,106,22470.0


In [288]:
cars=cars[['fueltype_gas',
       'aspiration_turbo', 'carbody_hardtop', 'carbody_hatchback',
       'carbody_sedan', 'carbody_wagon', 'drivewheel_fwd', 'drivewheel_rwd',
       'enginetype_dohcv', 'enginetype_l', 'enginetype_ohc', 'enginetype_ohcf',
       'enginetype_ohcv', 'enginetype_rotor', 'cylindernumber_five',
       'cylindernumber_four', 'cylindernumber_six', 'cylindernumber_three',
       'cylindernumber_twelve', 'cylindernumber_two', 'wheelbase', 'carlength',
       'carwidth', 'curbweight', 'enginesize', 'boreratio', 'horsepower']]

In [289]:
x = cars

In [290]:
x = sm.add_constant(x)  # добавить константу, чтобы был свободный член

In [291]:
y = cars1[['price']]

In [292]:
model = sm.OLS(y, x)  # говорим модели, что у нас ЗП, а что НП

In [293]:
results = model.fit()  # строим регрессионную прямую
print(results.summary())  # смотрим результат

                            OLS Regression Results                            
Dep. Variable:                  price   R-squared:                       0.914
Model:                            OLS   Adj. R-squared:                  0.901
Method:                 Least Squares   F-statistic:                     72.32
Date:                Tue, 08 Feb 2022   Prob (F-statistic):           9.86e-81
Time:                        12:39:04   Log-Likelihood:                -1881.6
No. Observations:                 205   AIC:                             3817.
Df Residuals:                     178   BIC:                             3907.
Df Model:                          26                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
const                   -1.7e+