In [112]:
import numpy as np
import pandas as pd

# loading the data from local file
data = pd.read_csv('dataset8.csv')
data.head()

Unnamed: 0,bpm,pace,time,temperature,gap,dist_quater,count_quater,steps_quater,puls_quater,weight_yeqr
0,149,357,4305,20,2,372590,27,1160574,60,77.1
1,147,363,4403,21,4,372480,27,1172636,60,77.1
2,149,366,4468,24,3,372320,27,1184919,60,77.1
3,153,374,5205,29,2,381430,27,1189566,60,77.2
4,151,356,6564,22,3,363030,26,1181979,60,77.2


In [113]:
print(data.shape)
print(data.isna().sum().sum())

(270, 10)
0


In [114]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 270 entries, 0 to 269
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   bpm           270 non-null    int64  
 1   pace          270 non-null    int64  
 2   time          270 non-null    int64  
 3   temperature   270 non-null    int64  
 4   gap           270 non-null    int64  
 5   dist_quater   270 non-null    int64  
 6   count_quater  270 non-null    int64  
 7   steps_quater  270 non-null    int64  
 8   puls_quater   270 non-null    int64  
 9   weight_yeqr   270 non-null    float64
dtypes: float64(1), int64(9)
memory usage: 21.2 KB


In [115]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    data.drop('bpm', axis=1),
    data.bpm,
    test_size=0.15,
    random_state=42
)

scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [116]:
X_train.mean(axis = 0)

array([0.28722123, 0.28163802, 0.5748216 , 0.03582482, 0.5601904 ,
       0.60779557, 0.64834798, 0.35633188, 0.50295973])

In [117]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error

rf = RandomForestRegressor(random_state=42)

# Сетка параметров
param_grid = {
    'n_estimators': [200],   # число деревьев
    'max_depth': [30],          # глубина
    'min_samples_split': [2],   # минимальное число объектов для разбиения
    'min_samples_leaf': [1]      # минимальное число объектов в листе
}

# GridSearchCV
model = GridSearchCV(
    estimator=rf,
    param_grid=param_grid,
    scoring='neg_root_mean_squared_error',
    cv=4,         # 4-кратная кросс-валидация
    n_jobs=-1,    # параллельно на всех ядрах
    verbose=2
)

model.fit(X_train, y_train)
y_pred = model.predict(X_test)
y_train_pred = model.predict(X_train)

print("Best params:", model.best_params_)
print("Best RMSE:", -model.best_score_)
print('-----')
print("Test RMSE = %.4f" % np.sqrt(mean_squared_error(y_test, y_pred)))
print("Train RMSE = %.4f" % np.sqrt(mean_squared_error(y_train, y_train_pred)))

Fitting 4 folds for each of 1 candidates, totalling 4 fits
Best params: {'max_depth': 30, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 200}
Best RMSE: 4.077916036046693
-----
Test RMSE = 3.2463
Train RMSE = 1.4871


In [118]:
best_rf = model.best_estimator_
importances = best_rf.feature_importances_

for i, val in enumerate(importances):
    print(f"Feature {i}: {val*1000:.1f}")

Feature 0: 364.7
Feature 1: 49.8
Feature 2: 25.3
Feature 3: 12.0
Feature 4: 24.1
Feature 5: 21.7
Feature 6: 72.6
Feature 7: 306.6
Feature 8: 123.0
