In [1]:
# Importowanie bibliotek
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV, KFold
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, PowerTransformer, StandardScaler, PolynomialFeatures
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Wczytanie danych
bike_data = pd.read_csv('daily-bike-share.csv')

# Usunięcie brakujących danych
bike_data = bike_data.dropna()

# Definicja zmiennych
numeric_features = ['temp', 'atemp', 'hum', 'windspeed']
categorical_features = ['season', 'mnth', 'holiday', 'weekday', 'workingday', 'weathersit']
target = 'rentals'

# Poprawa zmiennych numerycznych dla PowerTransformer
for col in numeric_features:
    if bike_data[col].min() <= 0:
        bike_data[col] += abs(bike_data[col].min()) + 1

X = bike_data[numeric_features + categorical_features].copy()
y = bike_data[target].copy()

# Podział na zbiory treningowy i testowy
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Pipeline dla zmiennych numerycznych
numeric_transformer = Pipeline(steps=[
    ('powertransformer', PowerTransformer()),
    ('scaler', StandardScaler()),
    ('polynomialfeatures', PolynomialFeatures(degree=2))
])

# Pipeline dla zmiennych kategorycznych
categorical_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Łączenie przetwarzania w ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ]
)

# Pipeline końcowy z modelem Random Forest
pipeline_rf = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(random_state=0))
])

# Ograniczona siatka hiperparametrów do GridSearch dla Random Forest
param_grid_rf = {
    'regressor__n_estimators': [50, 100, 200],
    'regressor__max_depth': [10, 20, None],
    'regressor__min_samples_split': [2, 5, 10],
    'regressor__min_samples_leaf': [1, 2, 4]
}

grid_search_rf = GridSearchCV(pipeline_rf, param_grid_rf, scoring='neg_mean_squared_error', cv=5, n_jobs=-1, verbose=2)

grid_search_rf.fit(X_train, y_train)

# Najlepszy model Random Forest
best_model_rf = grid_search_rf.best_estimator_

# Predykcje Random Forest
y_test_pred_rf = best_model_rf.predict(X_test)

# Metryki Random Forest
mae_test_rf = mean_absolute_error(y_test, y_test_pred_rf)
rmse_test_rf = np.sqrt(mean_squared_error(y_test, y_test_pred_rf))
r2_test_rf = r2_score(y_test, y_test_pred_rf)

# Wyświetlenie najlepszych parametrów Random Forest
print("Najlepsze parametry dla Random Forest:")
print(grid_search_rf.best_params_)

# Analiza wyników GridSearch
print("\nNajlepszy wynik GridSearch (neg_mean_squared_error):", -grid_search_rf.best_score_)
print("\nWyniki wszystkich testów:")
results = pd.DataFrame(grid_search_rf.cv_results_)
print(results[['mean_test_score', 'params']].sort_values(by='mean_test_score', ascending=False))

# Wyświetlenie wyników
print("\nMetryki modelu Random Forest:")
print(f"MAE: {mae_test_rf:.2f}")
print(f"RMSE: {rmse_test_rf:.2f}")
print(f"R^2: {r2_test_rf:.2f}")


Fitting 5 folds for each of 81 candidates, totalling 405 fits
Najlepsze parametry dla Random Forest:
{'regressor__max_depth': None, 'regressor__min_samples_leaf': 2, 'regressor__min_samples_split': 5, 'regressor__n_estimators': 200}

Najlepszy wynik GridSearch (neg_mean_squared_error): 163852.0638391755

Wyniki wszystkich testów:
    mean_test_score                                             params
68   -163852.063839  {'regressor__max_depth': None, 'regressor__min...
41   -163852.625064  {'regressor__max_depth': 20, 'regressor__min_s...
14   -164129.251414  {'regressor__max_depth': 10, 'regressor__min_s...
65   -164190.187230  {'regressor__max_depth': None, 'regressor__min...
38   -164196.497438  {'regressor__max_depth': 20, 'regressor__min_s...
..              ...                                                ...
34   -175983.963871  {'regressor__max_depth': 20, 'regressor__min_s...
61   -175983.963871  {'regressor__max_depth': None, 'regressor__min...
6    -177561.184515  {'regres