In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from xgboost import XGBRegressor

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [None]:
%cd /content/drive/MyDrive
df = pd.read_csv('/content/drive/MyDrive/Laptop_price.csv')

/content/drive/MyDrive


In [None]:
X = df.drop('Price', axis=1)
y = df['Price']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
categoricalFeatures = X.select_dtypes(include=['object']).columns.tolist()
numericalFeatures = X.select_dtypes(include=['int64', 'float64']).columns.tolist()

In [None]:
numericalTransformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

categoricalTransformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

In [None]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numericalTransformer, numericalFeatures),
        ('cat', categoricalTransformer, categoricalFeatures)
    ])

In [None]:
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('mod', XGBRegressor(random_state=37))
])

In [None]:
param = {
    'mod__n_estimators': [50, 100, 200],
    'mod__learning_rate': [0.01, 0.1, 0.2],
    'mod__max_depth': [4, 6, 8],
    'mod__gamma': [0, 0.2, 0.3],
    'mod__subsample': [0.8, 1.0]
}

In [None]:
gridSearch = GridSearchCV(pipeline, param, cv=5, n_jobs=-1, verbose=2)

In [None]:
gridSearch.fit(X_train, y_train)

Fitting 5 folds for each of 162 candidates, totalling 810 fits


In [None]:
mod = gridSearch.best_estimator_
bestParams = gridSearch.best_params_

In [None]:
print("Лучшие параметры: ", bestParams)

Лучшие параметры:  {'mod__gamma': 0, 'mod__learning_rate': 0.1, 'mod__max_depth': 4, 'mod__n_estimators': 100, 'mod__subsample': 1.0}
