In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_absolute_error
from config.config import OUTPUT_CSV_FILE, CLEANED_CSV_FILE

df = pd.read_csv(CLEANED_CSV_FILE)
df

Unnamed: 0.1,Unnamed: 0,amount,city,marka,model,year,type,color,distance,motor_volume,horse_power,motor_type
0,0,17500.0,Bakı,Hyundai,Sonata,2010,Sedan,Gümüşü,222000,2.4,178,Benzin
1,1,27300.0,Bakı,Toyota,Corolla,2021,Sedan,Qara,73000,1.6,122,Benzin
2,2,32123.0,Bakı,Toyota,Corolla,2024,Sedan,Ağ,0,1.8,98,Hibrid
3,3,17500.0,Qazax,Chevrolet,Trax,2018,"Offroader / SUV, 5 qapı",Qara,205000,1.4,138,Benzin
4,4,24900.0,Bakı,Ford,Fusion (North America),2016,Sedan,Ağ,169000,2.0,240,Plug-in Hibrid
...,...,...,...,...,...,...,...,...,...,...,...,...
2901,3060,18800.0,Bakı,Ford,Transit,2007,Furqon,Ağ,195000,2.2,125,Dizel
2902,3061,36000.0,Bakı,Volkswagen,Touareg,2015,"Offroader / SUV, 5 qapı",Ağ,129000,3.6,249,Benzin
2903,3062,14800.0,Bakı,Nissan,March,2016,"Hetçbek, 5 qapı",Tünd qırmızı,54000,1.2,79,Benzin
2904,3063,12500.0,Bakı,Hyundai,i30,2010,"Hetçbek, 5 qapı",Qara,260000,1.4,109,Benzin


In [2]:
data = df
X = data.drop(['Unnamed: 0','amount'], axis=1)
y = data['amount']

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=47)

In [4]:
categorical_cols = X.select_dtypes(include='object').columns
numerical_cols = X.select_dtypes(include=['int64', 'float64']).columns

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_cols),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
    ])

model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(n_estimators=200, random_state=42))
])

In [5]:
model.fit(X_train, y_train)

In [6]:
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

y_pred = model.predict(X_test)

r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

print(f"R² Score: {r2:.4f}")
print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")

R² Score: 0.8635
MAE: 5725.44
RMSE: 13052.63
