# Preprocessing Code

In [22]:
# Import the Pandas library for data handling
import pandas as pd

# Import LabelEncoder to convert weather text values into numbers
from sklearn.preprocessing import LabelEncoder

# -------------------------------
# 1. LOAD THE DATASET
# -------------------------------

# Read the CSV file into a DataFrame
df = pd.read_csv("library_dataset.csv")

# -------------------------------
# 2. DATE CONVERSION 
# -------------------------------

# Convert 'date' column from string to datetime format
df['date'] = pd.to_datetime(df['date'])

# -------------------------------
# 3. ENCODE WEATHER COLUMN
# -------------------------------

# Create a label encoder object
le = LabelEncoder()

# Convert weather values (sunny/cloudy/rainy) into numbers (0,1,2)
df['weather_encoded'] = le.fit_transform(df['weather'])

# Display the mapping
print("Weather mapping:", dict(zip(le.classes_, le.transform(le.classes_))))

# -------------------------------
# 4. SELECT FEATURES FOR MODEL
# -------------------------------
# IMPORTANT: Remove boys_count and girls_count to avoid data leakage

X = df[['day_of_week',       # Day of week (0=Mon, 6=Sun)
        'hour',              # Hour of the day (0-23)
        'exam_day',          # 1 if exam day, else 0
        'weather_encoded'    # Encoded weather value
       ]]

# Choose the target/output column
y = df['seats_available']    # What we want to predict

# Print confirmation
print("Preprocessing complete!")

# Show first few rows of the final dataset
X.head()


Weather mapping: {'cloudy': np.int64(0), 'rainy': np.int64(1), 'sunny': np.int64(2)}
Preprocessing complete!


  df['date'] = pd.to_datetime(df['date'])


Unnamed: 0,day_of_week,hour,exam_day,weather_encoded
0,4,0,0,1
1,4,1,0,1
2,4,2,0,1
3,4,3,0,0
4,4,4,0,0


# MODEL TRAINING + EVALUATION

In [23]:
# ---------------------------------------------------------
# TRAINING ALL MODELS (Linear + Tree + Forest)
# ---------------------------------------------------------

# Import ML libraries
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
import numpy as np

# ---------------------------------------------------------
# 1. SPLIT DATA
# ---------------------------------------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# ---------------------------------------------------------
# 2. TRAIN LINEAR REGRESSION
# ---------------------------------------------------------
lr = LinearRegression()
lr.fit(X_train, y_train)
lr_pred = lr.predict(X_test)

# ---------------------------------------------------------
# 3. TRAIN DECISION TREE
# ---------------------------------------------------------
dt = DecisionTreeRegressor(random_state=42)
dt.fit(X_train, y_train)
dt_pred = dt.predict(X_test)

# ---------------------------------------------------------
# 4. TRAIN RANDOM FOREST
# ---------------------------------------------------------
rf = RandomForestRegressor(n_estimators=200, random_state=42)
rf.fit(X_train, y_train)
rf_pred = rf.predict(X_test)

# ---------------------------------------------------------
# 5. EVALUATION FUNCTION
# ---------------------------------------------------------
def evaluate(name, y_true, y_pred):
    print(f"\n------ {name} ------")
    print("MAE  :", mean_absolute_error(y_true, y_pred))
    print("RMSE :", np.sqrt(mean_squared_error(y_true, y_pred)))
    print("R2   :", r2_score(y_true, y_pred))

# ---------------------------------------------------------
# 6. SHOW ACCURACY OF ALL MODELS
# ---------------------------------------------------------
evaluate("Linear Regression", y_test, lr_pred)
evaluate("Decision Tree", y_test, dt_pred)
evaluate("Random Forest", y_test, rf_pred)



------ Linear Regression ------
MAE  : 34.339391380834556
RMSE : 42.84726031600836
R2   : 0.13117726830994902

------ Decision Tree ------
MAE  : 14.191989443305232
RMSE : 19.317758014054057
R2   : 0.8233968270298504

------ Random Forest ------
MAE  : 13.722046482193841
RMSE : 18.287556913865185
R2   : 0.8417307878310033


# Save Random Forest Model

In [24]:
import joblib

# Save the Random Forest model
joblib.dump(rf, "seat_availability_model.pkl")

print("Model saved successfully!")


Model saved successfully!
