# Bike Demand Prediction using Multiple Linear Regression

# Step 1: Import Required Libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

# Step 2: Load the Dataset
df = pd.read_csv('day.csv')  # Make sure the file name is correct
df.head()

# Step 3: Data Preprocessing
# Map categorical values
df['season'] = df['season'].map({1: 'spring', 2: 'summer', 3: 'fall', 4: 'winter'})
df['weathersit'] = df['weathersit'].map({
    1: 'Clear',
    2: 'Mist + Cloudy',
    3: 'Light Snow/Rain',
    4: 'Heavy Rain/Snow'
})

# Drop irrelevant columns
df.drop(['instant', 'dteday', 'casual', 'registered'], axis=1, inplace=True)

# One-hot encoding for categorical features
df = pd.get_dummies(df, columns=['season', 'weathersit', 'mnth', 'weekday'], drop_first=True)

# Step 4: Define Features and Target
X = df.drop('cnt', axis=1)
y = df['cnt']

# Step 5: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 6: Train the Model
lr = LinearRegression()
lr.fit(X_train, y_train)

# Step 7: Make Predictions and Evaluate
y_pred = lr.predict(X_test)
r2 = r2_score(y_test, y_pred)
print(f"R-squared Score: {r2:.4f}")

# Step 8: Residual Analysis
residuals = y_test - y_pred
sns.histplot(residuals, kde=True)
plt.title("Residuals Distribution")
plt.xlabel("Residuals")
plt.ylabel("Frequency")
plt.show()