In [None]:
import pandas as pd

df = pd.read_csv('day.csv')  # Replace with your actual path
df.head()


In [None]:
df = df.drop(['instant', 'dteday', 'casual', 'registered'], axis=1)


In [None]:
df = pd.get_dummies(df, columns=['season', 'weathersit', 'mnth', 'weekday'], drop_first=True)


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

X = df.drop('cnt', axis=1)
y = df['cnt']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [None]:
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

def evaluate(model, name):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    print(f"📊 {name}")
    print("R² Score:", r2_score(y_test, y_pred))
    print("MSE:", mean_squared_error(y_test, y_pred))
    print("MAE:", mean_absolute_error(y_test, y_pred))
    print("-" * 40)
    
    return r2_score(y_test, y_pred)


In [None]:
from sklearn.linear_model import LinearRegression

lr = LinearRegression()
evaluate(lr, "Linear Regression")


In [None]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline

poly = make_pipeline(PolynomialFeatures(degree=2), LinearRegression())
evaluate(poly, "Polynomial Regression")


In [None]:
from sklearn.linear_model import Ridge

ridge = Ridge(alpha=1.0)
evaluate(ridge, "Ridge Regression")


In [None]:
from sklearn.linear_model import Lasso

lasso = Lasso(alpha=0.1)
evaluate(lasso, "Lasso Regression")


In [None]:
from sklearn.tree import DecisionTreeRegressor

tree = DecisionTreeRegressor(random_state=42)
evaluate(tree, "Decision Tree Regression")


In [None]:
from sklearn.ensemble import RandomForestRegressor

rf = RandomForestRegressor(n_estimators=100, random_state=42)
evaluate(rf, "Random Forest Regression")


In [None]:
models = ['Linear', 'Polynomial', 'Ridge', 'Lasso', 'Decision Tree', 'Random Forest']
scores = [
    r2_score(y_test, lr.predict(X_test)),
    r2_score(y_test, poly.predict(X_test)),
    r2_score(y_test, ridge.predict(X_test)),
    r2_score(y_test, lasso.predict(X_test)),
    r2_score(y_test, tree.predict(X_test)),
    r2_score(y_test, rf.predict(X_test))
]

import seaborn as sns
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))
sns.barplot(x=models, y=scores, palette='coolwarm')
plt.title('Model Comparison - R² Score')
plt.ylim(0, 1)
plt.ylabel('R² Score')
plt.show()


In [None]:
y_pred_best = best_model.predict(X_test)

plt.figure(figsize=(8,6))
plt.scatter(y_test, y_pred_best, alpha=0.6, color='green')
plt.plot([y.min(), y.max()], [y.min(), y.max()], 'r--')
plt.xlabel("Actual Rentals")
plt.ylabel("Predicted Rentals")
plt.title("Actual vs Predicted Bike Rentals")
plt.show()


In [None]:
import joblib
joblib.dump(best_model, 'bike_rental_forecast_model.pkl')
