In [1]:
import numpy as np
import pandas as pd

data = pd.read_csv('dataset14.csv')

In [2]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error

X_train, X_test, y_train, y_test = train_test_split(
    data.drop('bpm', axis=1),
    data.bpm,
    test_size=0.15,
    random_state=42
)

scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

rf = RandomForestRegressor(random_state=42)

param_grid = {
    'n_estimators': [200, 270, 350],   # число деревьев
    'max_depth': [10, 20, 50],          # глубина
    # 'min_samples_split': [2],   # минимальное число объектов для разбиения
    # 'min_samples_leaf': [1]      # минимальное число объектов в листе
}

model = GridSearchCV(
    estimator=rf,
    param_grid=param_grid,
    scoring='neg_root_mean_squared_error',
    cv=4,         # 4-кратная кросс-валидация
    n_jobs=-1,    # параллельно на всех ядрах
    verbose=2
)

model.fit(X_train, y_train)
y_pred = model.predict(X_test)
y_train_pred = model.predict(X_train)

print("Best params:", model.best_params_)
print("Best RMSE:", -model.best_score_)
print('-----')
print("Test RMSE = %.4f" % np.sqrt(mean_squared_error(y_test, y_pred)))
print("Train RMSE = %.4f" % np.sqrt(mean_squared_error(y_train, y_train_pred)))

Fitting 4 folds for each of 9 candidates, totalling 36 fits
Best params: {'max_depth': 20, 'n_estimators': 350}
Best RMSE: 4.0880367244242795
-----
Test RMSE = 3.2024
Train RMSE = 1.4667
