In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error
import time

# 1. Загрузка данных
df = pd.read_csv("day.csv")

# 2. Предобработка данных
df = df.drop(['instant', 'dteday', 'casual', 'registered'], axis=1)
df = df.rename(columns={
    'yr': 'year',
    'mnth': 'month',
    'weathersit': 'weather',
    'cnt': 'total_rentals'
})

# 3. Преобразование категориальных признаков
categorical_features = ['season', 'month', 'weekday', 'weather']
df[categorical_features] = df[categorical_features].astype('category')
df = pd.get_dummies(df, columns=categorical_features, drop_first=True)

# 4. Разделение данных
X = df.drop('total_rentals', axis=1)
y = df['total_rentals']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 5. Создание и обучение модели
start_time = time.time()

gb_model = GradientBoostingRegressor(
    n_estimators=300,
    learning_rate=0.1,
    max_depth=5,
    random_state=42,
    subsample=0.8
)

gb_model.fit(X_train, y_train)
training_time = time.time() - start_time

# 6. Предсказание и оценка
start_pred = time.time()
y_pred = gb_model.predict(X_test)
prediction_time = time.time() - start_pred

# 7. Расчет метрик
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

# 8. Вывод результатов
print(f"Время обучения: {training_time:.2f} сек")
print(f"MSE: {mse:.2f}")
print(f"MAE: {mae:.2f}")

Время обучения: 1.93 сек
MSE: 421663.00
MAE: 433.43
