In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline

# Load the training dataset
train = pd.read_csv('train.csv')

# Task 1: Explore the relationship between temperature and bike rental demand
# (You can create seasonal subsets and analyze the relationship)
train['datetime'] = pd.to_datetime(train['datetime'])
train['season'] = train['datetime'].dt.month.map({1: 'Winter', 2: 'Winter', 3: 'Spring', 4: 'Spring', 5: 'Spring', 6: 'Summer', 7: 'Summer', 8: 'Summer', 9: 'Fall', 10: 'Fall', 11: 'Fall', 12: 'Winter'})

seasons = train['season'].unique()
plt.figure(figsize=(12, 8))

for season in seasons:
    subset = train[train['season'] == season]
    plt.scatter(subset['temp'], subset['count'], label=season, alpha=0.5)

plt.title('Temperature vs. Bike Rental Demand for Different Seasons')
plt.xlabel('Temperature (°C)')
plt.ylabel('Bike Rental Count')
plt.legend()
plt.show()

# Task 2: Prepare the data for prediction
# Extract hour, day, and month from the datetime column
train['hour'] = train['datetime'].dt.hour
train['day'] = train['datetime'].dt.day
train['month'] = train['datetime'].dt.month

# Select relevant features
features = ['temp', 'humidity', 'windspeed', 'hour', 'day', 'month']
X = train[features]
y = train['count']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Task 3: Train regression models (KNN, Decision Tree, Random Forest)
models = {
    'KNN Regressor': KNeighborsRegressor(n_neighbors=5),
    'Decision Tree Regressor': DecisionTreeRegressor(random_state=42),
    'Random Forest Regressor': RandomForestRegressor(n_estimators=100, random_state=42)
}

for model_name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    # Evaluate the model's performance
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    print(f"{model_name} - Mean Squared Error: {mse}")
    print(f"{model_name} - R-squared: {r2}")
    print()

# Optional: Incorporate interaction effects
# Use PolynomialFeatures to create interaction terms in a Linear Regression model
poly = PolynomialFeatures(degree=2, interaction_only=True, include_bias=False)
X_poly = poly.fit_transform(X)

# Split the data into training and testing sets
X_train_poly, X_test_poly, y_train, y_test = train_test_split(X_poly, y, test_size=0.2, random_state=42)

# Train a Linear Regression model with interaction terms
model = make_pipeline(PolynomialFeatures(degree=2, include_bias=False), RandomForestRegressor(n_estimators=100, random_state=42))
model.fit(X_train_poly, y_train)

# Make predictions on the test set with interaction terms
y_pred_poly = model.predict(X_test_poly)

# Evaluate the model's performance with interaction terms
mse_poly = mean_squared_error(y_test, y_pred_poly)
r2_poly = r2_score(y_test, y_pred_poly)

print("Mean Squared Error with Interaction Terms: ", mse_poly)
print("R-squared with Interaction Terms: ", r2_poly)
