In [1]:
import pandas as pd 
import numpy as np
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler, OneHotEncoder,LabelEncoder

In [3]:
df=pd.read_csv('car_price_data.csv')

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1642 entries, 0 to 1641
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   model         1638 non-null   object 
 1   year          1639 non-null   float64
 2   motor_type    1638 non-null   object 
 3   running       1639 non-null   object 
 4   wheel         1638 non-null   object 
 5   color         1638 non-null   object 
 6   type          1640 non-null   object 
 7   status        1639 non-null   object 
 8   motor_volume  1638 non-null   float64
 9   price         1640 non-null   float64
dtypes: float64(3), object(7)
memory usage: 128.4+ KB


In [7]:
df.head()

Unnamed: 0,model,year,motor_type,running,wheel,color,type,status,motor_volume,price
0,toyota,2022.0,petrol,3000 km,left,skyblue,sedan,excellent,2.0,24500.0
1,mercedes-benz,2014.0,petrol,132000 km,left,black,sedan,excellent,2.0,25500.0
2,kia,2018.0,petrol,95000 miles,left,other,sedan,excellent,2.0,11700.0
3,mercedes-benz,2002.0,petrol,137000 miles,left,golden,sedan,excellent,3.2,12000.0
4,mercedes-benz,2017.0,petrol,130000 km,left,black,sedan,good,2.0,26000.0


In [9]:
df['running']

0            3000  km
1          132000  km
2        95000  miles
3       137000  miles
4          130000  km
            ...      
1637    120000  miles
1638       170000  km
1639     68900  miles
1640     31000  miles
1641           20  km
Name: running, Length: 1642, dtype: object

# running qatoridagi milesni kmga utqazib olamiz va yozuvni tashlab yuboramiz

In [12]:
df['running_value'] = df['running'].str.extract(r'(\d+)').astype(float)
df['running_unit'] = df['running'].str.extract(r'([a-zA-Z]+)')

# mileni kmga o'tkazish
df.loc[df['running_unit'] == 'miles', 'running_value'] *= 1.60934

# running ustunini yaratish
df['running_km'] = df['running_value']

# vaqtinchaligini tashlab yuborish 
df.drop(columns=['running_value', 'running_unit'], inplace=True)

# Display the updated DataFrame
print(df)

              model    year motor_type        running wheel    color   type  \
0            toyota  2022.0     petrol       3000  km  left  skyblue  sedan   
1     mercedes-benz  2014.0     petrol     132000  km  left    black  sedan   
2               kia  2018.0     petrol   95000  miles  left    other  sedan   
3     mercedes-benz  2002.0     petrol  137000  miles  left   golden  sedan   
4     mercedes-benz  2017.0     petrol     130000  km  left    black  sedan   
...             ...     ...        ...            ...   ...      ...    ...   
1637        hyundai  2017.0     petrol  120000  miles  left    white  sedan   
1638         toyota  2014.0     petrol     170000  km  left    black  sedan   
1639         nissan  2018.0     petrol   68900  miles  left     blue    suv   
1640         nissan  2019.0     petrol   31000  miles  left    black    suv   
1641         toyota  2022.0     petrol         20  km  left    white  sedan   

         status  motor_volume    price  running_km 

In [14]:
df.head()

Unnamed: 0,model,year,motor_type,running,wheel,color,type,status,motor_volume,price,running_km
0,toyota,2022.0,petrol,3000 km,left,skyblue,sedan,excellent,2.0,24500.0,3000.0
1,mercedes-benz,2014.0,petrol,132000 km,left,black,sedan,excellent,2.0,25500.0,132000.0
2,kia,2018.0,petrol,95000 miles,left,other,sedan,excellent,2.0,11700.0,152887.3
3,mercedes-benz,2002.0,petrol,137000 miles,left,golden,sedan,excellent,3.2,12000.0,220479.58
4,mercedes-benz,2017.0,petrol,130000 km,left,black,sedan,good,2.0,26000.0,130000.0


In [16]:
df.drop(columns=['running'], inplace=True)

# Bush kataklar sonini topish

In [19]:
df.isnull().sum()

model           4
year            3
motor_type      4
wheel           4
color           4
type            2
status          3
motor_volume    4
price           2
running_km      3
dtype: int64

In [21]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1642 entries, 0 to 1641
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   model         1638 non-null   object 
 1   year          1639 non-null   float64
 2   motor_type    1638 non-null   object 
 3   wheel         1638 non-null   object 
 4   color         1638 non-null   object 
 5   type          1640 non-null   object 
 6   status        1639 non-null   object 
 7   motor_volume  1638 non-null   float64
 8   price         1640 non-null   float64
 9   running_km    1639 non-null   float64
dtypes: float64(4), object(6)
memory usage: 128.4+ KB


# Bush kataklarni tuldirish

In [24]:
bush_katak=df.isnull().sum()>0

In [26]:
bush_katak

model           True
year            True
motor_type      True
wheel           True
color           True
type            True
status          True
motor_volume    True
price           True
running_km      True
dtype: bool

In [28]:
for col in df.columns[bush_katak]:
    if df[col].dtype=='object':
        df[col].fillna(df[col].mode()[0], inplace=True)
    else:
        df[col].fillna(df[col].mean(), inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(df[col].mode()[0], inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(df[col].mean(), inplace=True)


In [30]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1642 entries, 0 to 1641
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   model         1642 non-null   object 
 1   year          1642 non-null   float64
 2   motor_type    1642 non-null   object 
 3   wheel         1642 non-null   object 
 4   color         1642 non-null   object 
 5   type          1642 non-null   object 
 6   status        1642 non-null   object 
 7   motor_volume  1642 non-null   float64
 8   price         1642 non-null   float64
 9   running_km    1642 non-null   float64
dtypes: float64(4), object(6)
memory usage: 128.4+ KB


In [32]:
df.head()

Unnamed: 0,model,year,motor_type,wheel,color,type,status,motor_volume,price,running_km
0,toyota,2022.0,petrol,left,skyblue,sedan,excellent,2.0,24500.0,3000.0
1,mercedes-benz,2014.0,petrol,left,black,sedan,excellent,2.0,25500.0,132000.0
2,kia,2018.0,petrol,left,other,sedan,excellent,2.0,11700.0,152887.3
3,mercedes-benz,2002.0,petrol,left,golden,sedan,excellent,3.2,12000.0,220479.58
4,mercedes-benz,2017.0,petrol,left,black,sedan,good,2.0,26000.0,130000.0


In [34]:
bush_katak

model           True
year            True
motor_type      True
wheel           True
color           True
type            True
status          True
motor_volume    True
price           True
running_km      True
dtype: bool

In [36]:
df.isnull().sum()

model           0
year            0
motor_type      0
wheel           0
color           0
type            0
status          0
motor_volume    0
price           0
running_km      0
dtype: int64

# Yozuvli ustunlarni topamiz

In [39]:
yozuvli_ustun=df.select_dtypes(include=['object','category']).columns

In [41]:
yozuvli_ustun

Index(['model', 'motor_type', 'wheel', 'color', 'type', 'status'], dtype='object')

In [43]:
for col in yozuvli_ustun:
    print(f"column {col}: {df[col].nunique()}")

column model: 5
column motor_type: 5
column wheel: 1
column color: 17
column type: 7
column status: 5


In [45]:
df['color'].value_counts()

color
black      544
white      407
silver     222
gray       188
blue       140
red         47
other       30
cherry      20
green       10
brown       10
golden       6
orange       5
beige        5
clove        3
purple       2
skyblue      2
pink         1
Name: count, dtype: int64

# 'color' ustunimizda categorylar soni 17ta bulgani uchun label encodingdan, qolganlariga one hot encodingdan foydalanamiz

In [48]:
df = pd.get_dummies(df, columns=['model', 'motor_type', 'wheel', 'type', 'status'])

In [50]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1642 entries, 0 to 1641
Data columns (total 28 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   year                       1642 non-null   float64
 1   color                      1642 non-null   object 
 2   motor_volume               1642 non-null   float64
 3   price                      1642 non-null   float64
 4   running_km                 1642 non-null   float64
 5   model_hyundai              1642 non-null   bool   
 6   model_kia                  1642 non-null   bool   
 7   model_mercedes-benz        1642 non-null   bool   
 8   model_nissan               1642 non-null   bool   
 9   model_toyota               1642 non-null   bool   
 10  motor_type_diesel          1642 non-null   bool   
 11  motor_type_gas             1642 non-null   bool   
 12  motor_type_hybrid          1642 non-null   bool   
 13  motor_type_petrol          1642 non-null   bool 

In [52]:
df.head()

Unnamed: 0,year,color,motor_volume,price,running_km,model_hyundai,model_kia,model_mercedes-benz,model_nissan,model_toyota,...,type_hatchback,type_minivan / minibus,type_pickup,type_sedan,type_suv,status_crashed,status_excellent,status_good,status_new,status_normal
0,2022.0,skyblue,2.0,24500.0,3000.0,False,False,False,False,True,...,False,False,False,True,False,False,True,False,False,False
1,2014.0,black,2.0,25500.0,132000.0,False,False,True,False,False,...,False,False,False,True,False,False,True,False,False,False
2,2018.0,other,2.0,11700.0,152887.3,False,True,False,False,False,...,False,False,False,True,False,False,True,False,False,False
3,2002.0,golden,3.2,12000.0,220479.58,False,False,True,False,False,...,False,False,False,True,False,False,True,False,False,False
4,2017.0,black,2.0,26000.0,130000.0,False,False,True,False,False,...,False,False,False,True,False,False,False,True,False,False


# color nig label encodingga utqazib olamiz

In [55]:
label_encoder=LabelEncoder()

In [57]:
df['color']=label_encoder.fit_transform(df['color'])

In [59]:
df.head()

Unnamed: 0,year,color,motor_volume,price,running_km,model_hyundai,model_kia,model_mercedes-benz,model_nissan,model_toyota,...,type_hatchback,type_minivan / minibus,type_pickup,type_sedan,type_suv,status_crashed,status_excellent,status_good,status_new,status_normal
0,2022.0,15,2.0,24500.0,3000.0,False,False,False,False,True,...,False,False,False,True,False,False,True,False,False,False
1,2014.0,1,2.0,25500.0,132000.0,False,False,True,False,False,...,False,False,False,True,False,False,True,False,False,False
2,2018.0,10,2.0,11700.0,152887.3,False,True,False,False,False,...,False,False,False,True,False,False,True,False,False,False
3,2002.0,6,3.2,12000.0,220479.58,False,False,True,False,False,...,False,False,False,True,False,False,True,False,False,False
4,2017.0,1,2.0,26000.0,130000.0,False,False,True,False,False,...,False,False,False,True,False,False,False,True,False,False


# booliandan integerga utib olamiz

In [62]:
df[df.columns]=df[df.columns].astype(int)

In [64]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1642 entries, 0 to 1641
Data columns (total 28 columns):
 #   Column                     Non-Null Count  Dtype
---  ------                     --------------  -----
 0   year                       1642 non-null   int32
 1   color                      1642 non-null   int32
 2   motor_volume               1642 non-null   int32
 3   price                      1642 non-null   int32
 4   running_km                 1642 non-null   int32
 5   model_hyundai              1642 non-null   int32
 6   model_kia                  1642 non-null   int32
 7   model_mercedes-benz        1642 non-null   int32
 8   model_nissan               1642 non-null   int32
 9   model_toyota               1642 non-null   int32
 10  motor_type_diesel          1642 non-null   int32
 11  motor_type_gas             1642 non-null   int32
 12  motor_type_hybrid          1642 non-null   int32
 13  motor_type_petrol          1642 non-null   int32
 14  motor_type_petrol and ga

In [66]:
df.head()

Unnamed: 0,year,color,motor_volume,price,running_km,model_hyundai,model_kia,model_mercedes-benz,model_nissan,model_toyota,...,type_hatchback,type_minivan / minibus,type_pickup,type_sedan,type_suv,status_crashed,status_excellent,status_good,status_new,status_normal
0,2022,15,2,24500,3000,0,0,0,0,1,...,0,0,0,1,0,0,1,0,0,0
1,2014,1,2,25500,132000,0,0,1,0,0,...,0,0,0,1,0,0,1,0,0,0
2,2018,10,2,11700,152887,0,1,0,0,0,...,0,0,0,1,0,0,1,0,0,0
3,2002,6,3,12000,220479,0,0,1,0,0,...,0,0,0,1,0,0,1,0,0,0
4,2017,1,2,26000,130000,0,0,1,0,0,...,0,0,0,1,0,0,0,1,0,0


# Scaling qilamiz

In [69]:
scaler = StandardScaler()
df_scaled = scaler.fit_transform(df)
df = pd.DataFrame(df_scaled, columns=df.columns)


In [71]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1642 entries, 0 to 1641
Data columns (total 28 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   year                       1642 non-null   float64
 1   color                      1642 non-null   float64
 2   motor_volume               1642 non-null   float64
 3   price                      1642 non-null   float64
 4   running_km                 1642 non-null   float64
 5   model_hyundai              1642 non-null   float64
 6   model_kia                  1642 non-null   float64
 7   model_mercedes-benz        1642 non-null   float64
 8   model_nissan               1642 non-null   float64
 9   model_toyota               1642 non-null   float64
 10  motor_type_diesel          1642 non-null   float64
 11  motor_type_gas             1642 non-null   float64
 12  motor_type_hybrid          1642 non-null   float64
 13  motor_type_petrol          1642 non-null   float

In [73]:
df.head()

Unnamed: 0,year,color,motor_volume,price,running_km,model_hyundai,model_kia,model_mercedes-benz,model_nissan,model_toyota,...,type_hatchback,type_minivan / minibus,type_pickup,type_sedan,type_suv,status_crashed,status_excellent,status_good,status_new,status_normal
0,1.093941,1.096157,0.248031,1.187377,-1.202829,-0.521423,-0.457309,-0.571255,-0.519533,2.337641,...,-0.096018,-0.024686,-0.024686,0.534627,-0.473771,-0.131713,0.605965,-0.505323,-0.11382,-0.191353
1,-0.121179,-1.074627,0.248031,1.326774,0.132176,-0.521423,-0.457309,1.75053,-0.519533,-0.427782,...,-0.096018,-0.024686,-0.024686,0.534627,-0.473771,-0.131713,0.605965,-0.505323,-0.11382,-0.191353
2,0.486381,0.320877,0.248031,-0.596895,0.348333,-0.521423,2.186708,-0.571255,-0.519533,-0.427782,...,-0.096018,-0.024686,-0.024686,0.534627,-0.473771,-0.131713,0.605965,-0.505323,-0.11382,-0.191353
3,-1.94386,-0.299347,3.037532,-0.555076,1.047835,-0.521423,-0.457309,1.75053,-0.519533,-0.427782,...,-0.096018,-0.024686,-0.024686,0.534627,-0.473771,-0.131713,0.605965,-0.505323,-0.11382,-0.191353
4,0.334491,-1.074627,0.248031,1.396472,0.111478,-0.521423,-0.457309,1.75053,-0.519533,-0.427782,...,-0.096018,-0.024686,-0.024686,0.534627,-0.473771,-0.131713,-1.650259,1.978931,-0.11382,-0.191353


# Training qilamiz

In [76]:
X = df.drop(columns=['price'])  # Features
y = df['price']  # Target variable

In [78]:
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, random_state=42)
X_test,X_val,y_test,y_val=train_test_split(X_temp,y_temp,test_size=0.5,random_state=42)

# Modelni tanlaymiz

In [83]:
model=DecisionTreeRegressor()

# Modelni Training qildiramiz

In [86]:
model.fit(X_train,y_train)

# Model predict 

In [89]:
y_pred = model.predict(X_test)

In [91]:
y

0       1.187377
1       1.326774
2      -0.596895
3      -0.555076
4       1.396472
          ...   
1637   -0.499318
1638    0.072207
1639    0.490396
1640    0.490396
1641    1.744963
Name: price, Length: 1642, dtype: float64

In [93]:
X_train.shape

(1313, 27)

In [95]:
y_train.shape

(1313,)

In [97]:
X_test.shape

(164, 27)

In [99]:
y_test

984     2.372246
1232   -1.496001
807    -0.293708
660     0.797068
1277   -0.694473
          ...   
453     0.420698
1201    0.838887
1055   -0.722352
1364   -0.276284
893    -0.032340
Name: price, Length: 164, dtype: float64

In [101]:
y_test.shape

(164,)

In [103]:
y_pred[0]

1.717083296554786

In [105]:
X_train

Unnamed: 0,year,color,motor_volume,running_km,model_hyundai,model_kia,model_mercedes-benz,model_nissan,model_toyota,motor_type_diesel,...,type_hatchback,type_minivan / minibus,type_pickup,type_sedan,type_suv,status_crashed,status_excellent,status_good,status_new,status_normal
1308,0.638271,-1.074627,0.248031,-0.601000,1.917828,-0.457309,-0.571255,-0.519533,-0.427782,-0.034922,...,-0.096018,-0.024686,-0.024686,0.534627,-0.473771,-0.131713,0.605965,-0.505323,-0.11382,-0.191353
1018,-0.273069,0.941101,0.248031,1.097799,1.917828,-0.457309,-0.571255,-0.519533,-0.427782,-0.034922,...,-0.096018,-0.024686,-0.024686,0.534627,-0.473771,-0.131713,-1.650259,1.978931,-0.11382,-0.191353
1046,0.790161,-0.919571,0.248031,-0.684268,-0.521423,2.186708,-0.571255,-0.519533,-0.427782,-0.034922,...,-0.096018,-0.024686,-0.024686,0.534627,-0.473771,-0.131713,0.605965,-0.505323,-0.11382,-0.191353
1202,0.334491,-0.919571,0.248031,-0.488757,1.917828,-0.457309,-0.571255,-0.519533,-0.427782,-0.034922,...,-0.096018,-0.024686,-0.024686,0.534627,-0.473771,-0.131713,0.605965,-0.505323,-0.11382,-0.191353
721,0.790161,-0.144291,0.248031,-1.078643,1.917828,-0.457309,-0.571255,-0.519533,-0.427782,-0.034922,...,-0.096018,-0.024686,-0.024686,0.534627,-0.473771,-0.131713,0.605965,-0.505323,-0.11382,-0.191353
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1130,0.334491,-0.919571,0.248031,-0.118008,-0.521423,-0.457309,-0.571255,1.924804,-0.427782,-0.034922,...,-0.096018,-0.024686,-0.024686,-1.870463,2.110723,-0.131713,0.605965,-0.505323,-0.11382,-0.191353
1294,-0.121179,-1.074627,0.248031,0.194270,1.917828,-0.457309,-0.571255,-0.519533,-0.427782,-0.034922,...,-0.096018,-0.024686,-0.024686,0.534627,-0.473771,-0.131713,-1.650259,-0.505323,-0.11382,5.225930
860,-1.488189,0.941101,0.248031,1.322297,-0.521423,-0.457309,1.750530,-0.519533,-0.427782,-0.034922,...,-0.096018,-0.024686,-0.024686,0.534627,-0.473771,-0.131713,0.605965,-0.505323,-0.11382,-0.191353
1459,0.486381,1.251213,0.248031,-0.292128,-0.521423,-0.457309,-0.571255,1.924804,-0.427782,-0.034922,...,10.414733,-0.024686,-0.024686,-1.870463,-0.473771,-0.131713,0.605965,-0.505323,-0.11382,-0.191353


In [107]:
X_test

Unnamed: 0,year,color,motor_volume,running_km,model_hyundai,model_kia,model_mercedes-benz,model_nissan,model_toyota,motor_type_diesel,...,type_hatchback,type_minivan / minibus,type_pickup,type_sedan,type_suv,status_crashed,status_excellent,status_good,status_new,status_normal
984,0.334491,-0.919571,0.248031,-0.093874,-0.521423,-0.457309,1.750530,-0.519533,-0.427782,-0.034922,...,-0.096018,-0.024686,-0.024686,0.534627,-0.473771,-0.131713,0.605965,-0.505323,-0.11382,-0.191353
1232,-2.703310,0.941101,0.248031,0.804853,-0.521423,-0.457309,1.750530,-0.519533,-0.427782,-0.034922,...,-0.096018,-0.024686,-0.024686,0.534627,-0.473771,-0.131713,0.605965,-0.505323,-0.11382,-0.191353
807,0.182601,0.941101,0.248031,0.427523,-0.521423,-0.457309,-0.571255,-0.519533,2.337641,-0.034922,...,-0.096018,-0.024686,-0.024686,0.534627,-0.473771,-0.131713,0.605965,-0.505323,-0.11382,-0.191353
660,0.486381,-0.144291,0.248031,-0.405965,-0.521423,-0.457309,-0.571255,1.924804,-0.427782,-0.034922,...,-0.096018,-0.024686,-0.024686,-1.870463,2.110723,-0.131713,-1.650259,1.978931,-0.11382,-0.191353
1277,0.182601,0.786045,0.248031,-0.800858,1.917828,-0.457309,-0.571255,-0.519533,-0.427782,-0.034922,...,-0.096018,-0.024686,-0.024686,0.534627,-0.473771,-0.131713,0.605965,-0.505323,-0.11382,-0.191353
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
453,0.486381,1.251213,0.248031,-0.584339,-0.521423,-0.457309,-0.571255,1.924804,-0.427782,-0.034922,...,-0.096018,-0.024686,-0.024686,-1.870463,2.110723,-0.131713,0.605965,-0.505323,-0.11382,-0.191353
1201,1.093941,1.251213,0.248031,-1.047596,-0.521423,2.186708,-0.571255,-0.519533,-0.427782,-0.034922,...,-0.096018,-0.024686,-0.024686,0.534627,-0.473771,-0.131713,0.605965,-0.505323,-0.11382,-0.191353
1055,0.334491,0.941101,0.248031,-1.232261,-0.521423,2.186708,-0.571255,-0.519533,-0.427782,-0.034922,...,-0.096018,-0.024686,-0.024686,0.534627,-0.473771,-0.131713,-1.650259,1.978931,-0.11382,-0.191353
1364,-0.273069,1.251213,0.248031,-0.234588,-0.521423,-0.457309,-0.571255,-0.519533,2.337641,-0.034922,...,-0.096018,-0.024686,-0.024686,0.534627,-0.473771,-0.131713,0.605965,-0.505323,-0.11382,-0.191353


In [109]:
X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1642 entries, 0 to 1641
Data columns (total 27 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   year                       1642 non-null   float64
 1   color                      1642 non-null   float64
 2   motor_volume               1642 non-null   float64
 3   running_km                 1642 non-null   float64
 4   model_hyundai              1642 non-null   float64
 5   model_kia                  1642 non-null   float64
 6   model_mercedes-benz        1642 non-null   float64
 7   model_nissan               1642 non-null   float64
 8   model_toyota               1642 non-null   float64
 9   motor_type_diesel          1642 non-null   float64
 10  motor_type_gas             1642 non-null   float64
 11  motor_type_hybrid          1642 non-null   float64
 12  motor_type_petrol          1642 non-null   float64
 13  motor_type_petrol and gas  1642 non-null   float

In [111]:
X.head()

Unnamed: 0,year,color,motor_volume,running_km,model_hyundai,model_kia,model_mercedes-benz,model_nissan,model_toyota,motor_type_diesel,...,type_hatchback,type_minivan / minibus,type_pickup,type_sedan,type_suv,status_crashed,status_excellent,status_good,status_new,status_normal
0,1.093941,1.096157,0.248031,-1.202829,-0.521423,-0.457309,-0.571255,-0.519533,2.337641,-0.034922,...,-0.096018,-0.024686,-0.024686,0.534627,-0.473771,-0.131713,0.605965,-0.505323,-0.11382,-0.191353
1,-0.121179,-1.074627,0.248031,0.132176,-0.521423,-0.457309,1.75053,-0.519533,-0.427782,-0.034922,...,-0.096018,-0.024686,-0.024686,0.534627,-0.473771,-0.131713,0.605965,-0.505323,-0.11382,-0.191353
2,0.486381,0.320877,0.248031,0.348333,-0.521423,2.186708,-0.571255,-0.519533,-0.427782,-0.034922,...,-0.096018,-0.024686,-0.024686,0.534627,-0.473771,-0.131713,0.605965,-0.505323,-0.11382,-0.191353
3,-1.94386,-0.299347,3.037532,1.047835,-0.521423,-0.457309,1.75053,-0.519533,-0.427782,-0.034922,...,-0.096018,-0.024686,-0.024686,0.534627,-0.473771,-0.131713,0.605965,-0.505323,-0.11382,-0.191353
4,0.334491,-1.074627,0.248031,0.111478,-0.521423,-0.457309,1.75053,-0.519533,-0.427782,-0.034922,...,-0.096018,-0.024686,-0.024686,0.534627,-0.473771,-0.131713,-1.650259,1.978931,-0.11382,-0.191353


# Modelni baholaymiz

In [114]:
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [116]:
print(f"\nModel Evaluation Metrics:")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"R² Score: {r2:.2f}")


Model Evaluation Metrics:
Mean Squared Error (MSE): 0.44
Mean Absolute Error (MAE): 0.40
R² Score: 0.57


# K-cross validation

In [129]:
kfold = KFold(n_splits=6, shuffle=True, random_state=42) 

In [131]:
cv_scores_lr = cross_val_score(model, X_train, y_train, cv=kfold, scoring='r2')

cv_scores_lr_mean = np.mean(cv_scores_lr)
print(f"Decision Tree Cross-Validation R²: {cv_scores_lr_mean:.4f}")

Decision Tree Cross-Validation R²: 0.5973


In [133]:
cv_scores_lr

array([0.58652081, 0.58253978, 0.6401607 , 0.58331212, 0.63640414,
       0.55510162])

In [135]:
cv_scores_lr = cross_val_score(model, X_train, y_train, cv=kfold, scoring='r2')

cv_scores_lr_mean = np.mean(cv_scores_lr)
print(f"Decision Tree Cross-Validation R²: {cv_scores_lr_mean:.4f}")

Decision Tree Cross-Validation R²: 0.5928
