In [1]:
import pandas as pd
import numpy as np

from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_log_error

In [2]:
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

In [3]:
print(train.columns)
print(test.columns)

Index(['datetime', 'season', 'holiday', 'workingday', 'weather', 'temp',
       'atemp', 'humidity', 'windspeed', 'casual', 'registered', 'count'],
      dtype='object')
Index(['datetime', 'season', 'holiday', 'workingday', 'weather', 'temp',
       'atemp', 'humidity', 'windspeed', 'casual', 'registered'],
      dtype='object')


In [4]:
train['datetime'] = pd.to_datetime(train['datetime'], errors='coerce')
test['datetime'] = pd.to_datetime(test['datetime'], errors='coerce')

In [5]:
# Extract features
for df in [train, test]:
    df['hour'] = df['datetime'].dt.hour
    df['day'] = df['datetime'].dt.day
    df['month'] = df['datetime'].dt.month
    df['year'] = df['datetime'].dt.year
    df['dayofweek'] = df['datetime'].dt.dayofweek

In [6]:
# Drop columns that leak target information
train = train.drop(['datetime', 'casual', 'registered'], axis=1)

# Save datetime for submission
test_datetime = test['datetime']
test = test.drop(['datetime'], axis=1)

In [7]:
y = np.log1p(train['count'])
X = train.drop('count', axis=1)

In [8]:
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [9]:
model = GradientBoostingRegressor(
    n_estimators=500,
    learning_rate=0.05,
    max_depth=5,
    random_state=42
)

model.fit(X_train, y_train)

In [10]:
val_pred = model.predict(X_val)

rmsle = np.sqrt(
    mean_squared_log_error(
        np.expm1(y_val),
        np.expm1(val_pred)
    )
)

print("Validation RMSLE:", rmsle)

Validation RMSLE: 0.28238998856723146


In [11]:
model.fit(X, y)

In [13]:
# Make sure test has same columns as X
test = test[X.columns]

# Predict
test_pred = model.predict(test)
test_pred = np.expm1(test_pred)

submission = pd.DataFrame({
    'datetime': test_datetime,
    'count': test_pred
})

submission.to_csv("submission.csv", index=False)

print("Submission file created successfully!")

Submission file created successfully!


In [15]:
import pickle

model.fit(X_train, y_train)

pickle.dump(model, open("bike_model.pkl", "wb"))

print("Model saved successfully ✅")

Model saved successfully ✅
