In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from joblib import dump, load

# --- 1. Ma'lumotni yuklash ---
df = pd.read_csv(r"C:\Users\Rasulbek907\Desktop\Project_8\Data\Raw_Data\filtered.csv")

# Keraksiz ustunlarni o'chirish
for col in ['Unnamed: 0', 'index']:
    if col in df.columns:
        df.drop(col, axis=1, inplace=True)

# Target va features
target = 'kilometer'
X = df.drop(columns=[target])
y = df[target]

# --- 2. Numerical va categorical ustunlarni aniqlash ---
numerical_features = X.select_dtypes(include=['int64', 'float64']).columns.tolist()
categorical_features = X.select_dtypes(include=['object']).columns.tolist()

# --- 3. ColumnTransformer bilan preprocessor yaratish ---
preprocessor = ColumnTransformer([
    ('num', StandardScaler(), numerical_features),
    ('cat', OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1), categorical_features)
])

# --- 4. Pipeline yaratish ---
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(
        n_estimators=100,
        max_depth=15,
        random_state=42,
        n_jobs=-1  # barcha CPU yadro ishlatiladi
    ))
])

# --- 5. Modelni fit qilish ---
pipeline.fit(X, y)

# --- 6. Pipeline'ni saqlash ---
save_path = r"C:\Users\Rasulbek907\Desktop\Project_8\Models\Pipeline_Models\RandomForestRegressor_Fast.joblib"
dump(pipeline, save_path)
print(f"Tez pipeline muvaffaqiyatli saqlandi: {save_path}")

# --- 7. Offline test ma'lumotlari ---
offline_data = pd.DataFrame({
    'dateCrawled': ['2016-03-24 11:52:17'],
    'name': ['Golf_3_1.6'],
    'seller': ['privat'],
    'offerType': ['Angebot'],
    'price': [480],
    'abtest': ['test'],
    'vehicleType': [None],
    'yearOfRegistration': [1993],
    'gearbox': ['manuell'],
    'powerPS': [75],
    'model': ['golf'],
    'monthOfRegistration': [0],
    'fuelType': ['benzin'],
    'brand': ['volkswagen'],
    'notRepairedDamage': [None],
    'dateCreated': ['2016-03-24 00:00:00'],
    'nrOfPictures': [0],
    'postalCode': [70435],
    'lastSeen': ['2016-04-07 03:16:57']
})

# Offline data ustunlarini X bilan moslash
offline_data_prepared = offline_data[X.columns]

# --- 8. Offline predict ---
pred = pipeline.predict(offline_data_prepared)
print("Predicted Value:", pred[0])

Tez pipeline muvaffaqiyatli saqlandi: C:\Users\Rasulbek907\Desktop\Project_8\Models\Pipeline_Models\RandomForestRegressor_Fast.joblib
Predicted Value: 129029.05951351783
