# Dwarka Temple Visitor Prediction Model

This notebook demonstrates the ML model for predicting temple visitor inflow patterns.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import math
import random

# Set style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

## Generate Synthetic Dataset

In [None]:
def generate_synthetic_data(days=365):
    """Generate synthetic visitor data for training"""
    
    data = []
    start_date = datetime(2024, 1, 1)
    
    temples = {
        'dwarka': 50000,
        'ambaji': 30000,
        'somnath': 40000
    }
    
    for day in range(days):
        current_date = start_date + timedelta(days=day)
        
        # Festival days (random 10% of days have 2x visitors)
        is_festival = random.random() < 0.1
        
        # Weekend effect
        is_weekend = current_date.weekday() >= 5
        
        for temple_id, capacity in temples.items():
            for hour in range(24):
                # Base pattern
                if 5 <= hour <= 9:  # Morning peak
                    base_factor = 0.6 + 0.4 * math.sin((hour - 5) * math.pi / 4)
                elif 17 <= hour <= 21:  # Evening peak
                    base_factor = 0.7 + 0.3 * math.sin((hour - 17) * math.pi / 4)
                elif 10 <= hour <= 16:  # Afternoon
                    base_factor = 0.4 + 0.2 * math.sin((hour - 10) * math.pi / 6)
                else:  # Night
                    base_factor = 0.1 + 0.1 * random.random()
                
                # Apply multipliers
                weekend_mult = 1.3 if is_weekend else 1.0
                festival_mult = 2.0 if is_festival else 1.0
                
                visitors = int(
                    capacity * base_factor * weekend_mult * festival_mult
                    * (0.8 + 0.4 * random.random())
                )
                
                data.append({
                    'date': current_date.strftime('%Y-%m-%d'),
                    'hour': hour,
                    'temple_id': temple_id,
                    'visitors': max(100, visitors),
                    'is_weekend': is_weekend,
                    'is_festival': is_festival,
                    'day_of_week': current_date.weekday(),
                    'month': current_date.month
                })
    
    return pd.DataFrame(data)

# Generate dataset
df = generate_synthetic_data()
print(f"Generated {len(df)} records")
df.head()

## Data Analysis

In [None]:
# Save dataset
df.to_csv('visitors.csv', index=False)
print("Dataset saved as visitors.csv")

# Basic statistics
print("\nDataset Statistics:")
print(df.describe())

In [None]:
# Hourly patterns
plt.figure(figsize=(15, 5))

plt.subplot(1, 3, 1)
hourly_avg = df.groupby('hour')['visitors'].mean()
plt.plot(hourly_avg.index, hourly_avg.values, marker='o', linewidth=2)
plt.title('Average Visitors by Hour')
plt.xlabel('Hour of Day')
plt.ylabel('Average Visitors')
plt.grid(True, alpha=0.3)

plt.subplot(1, 3, 2)
weekend_comparison = df.groupby(['hour', 'is_weekend'])['visitors'].mean().unstack()
plt.plot(weekend_comparison.index, weekend_comparison[False], label='Weekday', marker='o')
plt.plot(weekend_comparison.index, weekend_comparison[True], label='Weekend', marker='s')
plt.title('Weekday vs Weekend Patterns')
plt.xlabel('Hour of Day')
plt.ylabel('Average Visitors')
plt.legend()
plt.grid(True, alpha=0.3)

plt.subplot(1, 3, 3)
temple_comparison = df.groupby(['hour', 'temple_id'])['visitors'].mean().unstack()
for temple in temple_comparison.columns:
    plt.plot(temple_comparison.index, temple_comparison[temple], label=temple.title(), marker='o')
plt.title('Temple Comparison')
plt.xlabel('Hour of Day')
plt.ylabel('Average Visitors')
plt.legend()
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# Festival impact analysis
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
festival_comparison = df.groupby(['hour', 'is_festival'])['visitors'].mean().unstack()
plt.plot(festival_comparison.index, festival_comparison[False], label='Normal Day', marker='o')
plt.plot(festival_comparison.index, festival_comparison[True], label='Festival Day', marker='s')
plt.title('Festival vs Normal Day Impact')
plt.xlabel('Hour of Day')
plt.ylabel('Average Visitors')
plt.legend()
plt.grid(True, alpha=0.3)

plt.subplot(1, 2, 2)
monthly_avg = df.groupby('month')['visitors'].mean()
plt.bar(monthly_avg.index, monthly_avg.values, color='skyblue', alpha=0.7)
plt.title('Monthly Visitor Patterns')
plt.xlabel('Month')
plt.ylabel('Average Visitors')
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## Model Training (Simplified)

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score

# Prepare features
features = ['hour', 'day_of_week', 'month', 'is_weekend', 'is_festival']
X = df[features]
y = df['visitors']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Model Performance:")
print(f"Mean Absolute Error: {mae:.2f}")
print(f"R² Score: {r2:.3f}")

# Feature importance
feature_importance = pd.DataFrame({
    'feature': features,
    'importance': model.feature_importances_
}).sort_values('importance', ascending=False)

plt.figure(figsize=(8, 4))
plt.bar(feature_importance['feature'], feature_importance['importance'])
plt.title('Feature Importance')
plt.xlabel('Features')
plt.ylabel('Importance')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

print("\nFeature Importance:")
print(feature_importance)

## Sample Prediction

In [None]:
# Generate sample prediction for tomorrow
tomorrow = datetime.now() + timedelta(days=1)
is_weekend = tomorrow.weekday() >= 5

sample_predictions = []
for hour in range(24):
    prediction_input = pd.DataFrame({
        'hour': [hour],
        'day_of_week': [tomorrow.weekday()],
        'month': [tomorrow.month],
        'is_weekend': [is_weekend],
        'is_festival': [False]  # Normal day
    })
    
    predicted_visitors = model.predict(prediction_input)[0]
    sample_predictions.append({
        'hour': hour,
        'visitors': int(predicted_visitors)
    })

# Plot prediction
pred_df = pd.DataFrame(sample_predictions)
plt.figure(figsize=(12, 6))
plt.plot(pred_df['hour'], pred_df['visitors'], marker='o', linewidth=2, markersize=6)
plt.title(f'24-Hour Visitor Prediction for {tomorrow.strftime("%Y-%m-%d")}')
plt.xlabel('Hour of Day')
plt.ylabel('Predicted Visitors')
plt.grid(True, alpha=0.3)
plt.xticks(range(0, 24, 2))

# Highlight peak hours
peak_hour = pred_df.loc[pred_df['visitors'].idxmax(), 'hour']
peak_visitors = pred_df['visitors'].max()
plt.axvline(x=peak_hour, color='red', linestyle='--', alpha=0.7, label=f'Peak: {peak_hour}:00 ({peak_visitors:,} visitors)')
plt.legend()

plt.tight_layout()
plt.show()

print(f"\nPrediction Summary for {tomorrow.strftime('%Y-%m-%d')}:")
print(f"Total predicted visitors: {pred_df['visitors'].sum():,}")
print(f"Peak hour: {peak_hour}:00 with {peak_visitors:,} visitors")
print(f"Weekend: {'Yes' if is_weekend else 'No'}")