In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.metrics import mean_squared_error, accuracy_score
import joblib
from numpy import hstack

# Load dataset
df = pd.read_csv("main.2 - Sheet1.csv")

# Convert Date column to datetime
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')

# Extract new features from Date
df['Day'] = df['Date'].dt.day
df['Month'] = df['Date'].dt.month
df['Year'] = df['Date'].dt.year
df['Weekday'] = df['Date'].dt.weekday  # 0=Monday, 6=Sunday
df['IsWeekend'] = df['Weekday'].apply(lambda x: 1 if x >= 5 else 0)

# Encode categorical columns
le_place = LabelEncoder()
df['Place_encoded'] = le_place.fit_transform(df['Religious_Place'])

le_country = LabelEncoder()
df['Country_encoded'] = le_country.fit_transform(df['Country'])

le_holiday = LabelEncoder()
df['Holiday_encoded'] = le_holiday.fit_transform(df['Public_Holiday'])

le_crowd = LabelEncoder()
df['Crowd_encoded'] = le_crowd.fit_transform(df['crowd_level'])

# Define Inputs (X) and Outputs (y)
X = df[['Past Crowd Levels', 'Holiday_encoded', 'Day', 'Month', 'Weekday',
        'IsWeekend', 'Place_encoded', 'Country_encoded']]
y_reg = df['visitor_count']            # Regression target
y_clf = df['Crowd_encoded']            # Classification target

# Split dataset
X_train, X_test, y_reg_train, y_reg_test, y_clf_train, y_clf_test = train_test_split(
    X, y_reg, y_clf, test_size=0.2, random_state=42
)

# Train Regression model
reg_model = RandomForestRegressor(n_estimators=200, random_state=42)
reg_model.fit(X_train, y_reg_train)
y_reg_pred = reg_model.predict(X_test)
reg_mse = mean_squared_error(y_reg_test, y_reg_pred)

# Train Classification model
clf_model = RandomForestClassifier(n_estimators=200, random_state=42)
clf_model.fit(X_train, y_clf_train)
y_clf_pred = clf_model.predict(X_test)
clf_acc = accuracy_score(y_clf_test, y_clf_pred)

print("✅ Regression MSE:", reg_mse)
print("✅ Classification Accuracy:", clf_acc)

# Save models + encoders
joblib.dump(reg_model, "regression_model.joblib")
joblib.dump(clf_model, "classification_model.joblib")
joblib.dump(le_place, "label_encoder_place.joblib")
joblib.dump(le_country, "label_encoder_country.joblib")
joblib.dump(le_holiday, "label_encoder_holiday.joblib")
joblib.dump(le_crowd, "label_encoder_crowd.joblib")

print("🎉 Improved models & encoders saved successfully!")


✅ Regression MSE: 1754771.791945485
✅ Classification Accuracy: 0.33482587064676617
🎉 Improved models & encoders saved successfully!


In [6]:
joblib.dump(reg_model, "regression_model.joblib")
joblib.dump(clf_model, "classification_model.joblib")
joblib.dump(le_place, "label_encoder_place.joblib")
joblib.dump(le_country, "label_encoder_country.joblib")
joblib.dump(le_holiday, "label_encoder_holiday.joblib")
joblib.dump(le_crowd, "label_encoder_crowd.joblib")

print("🎉 Improved models & encoders saved successfully!")

🎉 Improved models & encoders saved successfully!
