In [2]:
import pandas as pd
import pickle
import numpy as np
import os
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# ==========================================
# CONFIGURATION & PATHS
# ==========================================
# Path to the training data (used only if Linear Regression needs to be retrained)
TRAINING_DATA_PATH = r"C:\Users\preet\Downloads\day.csv"

SCALER_PATH = 'scaler.pkl'

# Dictionary of all models to be used
MODELS_CONFIG = {
    'Linear Regression': 'linear_regression_model.pkl',
    'Random Forest': 'random_forest_regression_model.pkl',
    'Decision Tree': 'decision_tree_regression_model.pkl',
    'Ridge': 'ridge_regression_model.pkl',
    'Lasso': 'lasso_regression_model.pkl'
}

# Exact feature order expected by the models
FEATURE_COLS = [
    'season', 'yr', 'mnth', 'holiday', 'weekday', 
    'workingday', 'weathersit', 'temp', 'hum', 
    'windspeed', 'is_weekend'
]

# ==========================================
# PART 1: TRAINING LOGIC
# ==========================================
def ensure_linear_regression_model():
    """
    Checks if the Linear Regression model exists. 
    If not, it loads the data, trains the model, and saves it.
    """
    lr_model_path = MODELS_CONFIG['Linear Regression']
    
    if os.path.exists(lr_model_path):
        print(f"[INFO] Found existing {lr_model_path}. Skipping training.")
        return

    print(f"[WARN] {lr_model_path} not found. Initializing training sequence...")
    
    try:
        # 1. Load Data
        if not os.path.exists(TRAINING_DATA_PATH):
            print(f"[CRITICAL ERROR] Training data not found at: {TRAINING_DATA_PATH}")
            print("Cannot train Linear Regression. Prediction will proceed without it.")
            return

        data = pd.read_excel(TRAINING_DATA_PATH)
        X = data.drop('cnt', axis=1)
        y = data['cnt']
        
        # 2. Split Data
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        
        # 3. Handle Scaler
        if os.path.exists(SCALER_PATH):
            with open(SCALER_PATH, 'rb') as f:
                scaler = pickle.load(f)
            print("  > Using existing scaler.")
        else:
            print("  > Scaler not found. Fitting a new one.")
            scaler = StandardScaler()
            scaler.fit(X_train)
            with open(SCALER_PATH, 'wb') as f:
                pickle.dump(scaler, f)
        
        X_train_scaled = scaler.transform(X_train)
        
        # 4. Train Model
        lin_reg = LinearRegression()
        lin_reg.fit(X_train_scaled, y_train)
        
        # 5. Save Model
        with open(lr_model_path, 'wb') as f:
            pickle.dump(lin_reg, f)
            
        print(f"[SUCCESS] Trained and saved {lr_model_path}")
        
    except Exception as e:
        print(f"[ERROR] Failed during Linear Regression training: {e}")

# ==========================================
# PART 2: LOADING ARTIFACTS
# ==========================================
def load_artifacts():
    """Loads the scaler and all available model files."""
    loaded_models = {}
    scaler = None
    
    try:
        with open(SCALER_PATH, 'rb') as f:
            scaler = pickle.load(f)
        print(f"[OK] Loaded {SCALER_PATH}")
    except FileNotFoundError:
        print(f"[ERROR] Critical: {SCALER_PATH} is missing.")
        return None, None

    print("--- Loading Models ---")
    for name, path in MODELS_CONFIG.items():
        try:
            with open(path, 'rb') as f:
                loaded_models[name] = pickle.load(f)
            print(f"  [OK] {name}")
        except FileNotFoundError:
            print(f"  [SKIP] {name} (File not found)")
    
    return scaler, loaded_models

# ==========================================
# PART 3: USER INTERFACE (CORRECTED)
# ==========================================
def get_user_input():
    """Prompts user for input and handles normalization logic automatically."""
    print("\n" + "-"*40)
    print(" ENTER PREDICTION DETAILS")
    print("-" * 40)
    
    data = {}
    try:
        # --- Categorical Inputs ---
        data['season'] = int(input("Season (1:Winter, 2:Spring, 3:Summer, 4:Fall): "))
        
        # FIX: Corrected the input prompt and the mapping logic for 2018/2019
        raw_yr = int(input("Year (e.g. 2018 or 2019): "))
        if raw_yr == 2018:
            data['yr'] = 0
        elif raw_yr == 2019:
            data['yr'] = 1
        elif raw_yr == 0 or raw_yr == 1:
            data['yr'] = raw_yr # Allows user to input 0 or 1 directly
        else:
            print("[WARNING] Unknown year entered. Defaulting to 2019 (1).")
            data['yr'] = 1
        
        data['mnth'] = int(input("Month (1-12): "))
        data['holiday'] = int(input("Holiday (0:No, 1:Yes): "))
        data['weekday'] = int(input("Weekday (0:Sun, 1:Mon... 6:Sat): "))
        data['workingday'] = int(input("Working Day (0:No, 1:Yes): "))
        data['weathersit'] = int(input("Weather (1:Clear, 2:Mist, 3:Snow/Rain): "))
        
        # --- Numerical Inputs ---
        data['temp'] = float(input("Temperature (Celsius): "))
        data['hum'] = float(input("Humidity (0-100): "))
        data['windspeed'] = float(input("Windspeed: "))
        
        # Logic for 'is_weekend'
        if data['weekday'] == 0 or data['weekday'] == 6:
            data['is_weekend'] = 1
        else:
            data['is_weekend'] = 0
            
    except ValueError:
        print("\n[ERROR] Invalid input! Please enter numbers only.")
        return None

    # Return as DataFrame
    return pd.DataFrame([data], columns=FEATURE_COLS)

# ==========================================
# MAIN EXECUTION FLOW
# ==========================================
def main():
    print("=== BIKE SHARE DEMAND PREDICTOR SYSTEM ===")
    ensure_linear_regression_model()
    
    print("\n=== INITIALIZATION ===")
    scaler, models = load_artifacts()
    
    if not scaler or not models:
        print("Critical Error: Missing scaler or no models loaded. Exiting.")
        return

    # Prediction Loop
    while True:
        input_df = get_user_input()
        
        if input_df is not None:
            # Scale the input using the same scaler as training
            input_scaled = scaler.transform(input_df)
            
            print("\n" + "="*40)
            print(" PREDICTED BIKE DEMAND COUNT")
            print("="*40)
            
            for name, model in models.items():
                try:
                    pred = model.predict(input_scaled)[0]
                    # Ensuring prediction isn't negative (models can sometimes do this)
                    final_pred = max(0, int(pred))
                    print(f"{name.ljust(25)}: {final_pred} bikes")
                except Exception as e:
                    print(f"{name.ljust(25)}: Error ({e})")
            
            print("="*40)
        
        cont = input("\nTest another datapoint? (y/n): ")
        if cont.lower() != 'y':
            print("Exiting...")
            break

if __name__ == "__main__":
    main()

=== BIKE SHARE DEMAND PREDICTOR SYSTEM ===
[INFO] Found existing linear_regression_model.pkl. Skipping training.

=== INITIALIZATION ===
[OK] Loaded scaler.pkl
--- Loading Models ---
  [OK] Linear Regression
  [OK] Random Forest
  [OK] Decision Tree
  [OK] Ridge
  [OK] Lasso

----------------------------------------
 ENTER PREDICTION DETAILS
----------------------------------------


Season (1:Winter, 2:Spring, 3:Summer, 4:Fall):  2
Year (e.g. 2018 or 2019):  2018
Month (1-12):  3
Holiday (0:No, 1:Yes):  0
Weekday (0:Sun, 1:Mon... 6:Sat):  4
Working Day (0:No, 1:Yes):  1
Weather (1:Clear, 2:Mist, 3:Snow/Rain):  2
Temperature (Celsius):  23
Humidity (0-100):  45
Windspeed:  54



 PREDICTED BIKE DEMAND COUNT
Linear Regression        : 1936 bikes
Random Forest            : 3400 bikes
Decision Tree            : 1115 bikes
Ridge                    : 1936 bikes
Lasso                    : 1940 bikes



Test another datapoint? (y/n):  n


Exiting...
