In [3]:
import pandas as pd
import numpy as np
import pickle
import os
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import warnings
warnings.filterwarnings('ignore')

class RestaurantLocationML:
    def __init__(self, models_path="models", data_file="Data_Final_2024_Clean.csv"):
        self.models_path = models_path
        self.data_file = data_file
        self.models = {}
        self.scalers = {}
        self.label_encoders = {}
        self.branch_performance = None
        self.feature_names = []
        
        # Jakarta Selatan boundaries
        self.lat_bounds = (-6.4, -6.1)
        self.lng_bounds = (106.7, 106.9)
        
        os.makedirs(models_path, exist_ok=True)
    
    def load_and_prepare_data(self):
        """Load dan prepare data dari CSV yang sudah di-clean"""
        print(f"🔄 Loading data from {self.data_file}")
        
        try:
            df = pd.read_csv(self.data_file)
            print(f"✅ Data loaded successfully: {df.shape}")
        except Exception as e:
            print(f"❌ Error loading data: {e}")
            return None
        
        # Check required columns
        required_cols = [
            'branchID', 'latitude', 'longitude', 'gtv_2024', 'aov_2024', 
            'Category', 'subdistrictName', 'total_qty'
        ]
        missing_cols = [col for col in required_cols if col not in df.columns]
        if missing_cols:
            print(f"❌ Missing required columns: {missing_cols}")
            return None
        
        print(f"✅ All required columns present")
        print(f"📊 Data overview:")
        print(f"   - Unique branches: {df['branchID'].nunique()}")
        print(f"   - Categories: {df['Category'].nunique()}")
        print(f"   - Subdistricts: {df['subdistrictName'].nunique()}")
        
        return df
    
    def create_branch_features(self, df):
        """Create comprehensive branch-level features"""
        print("🔧 Creating branch features...")
        
        # Since data is already aggregated per branch (yearly), we work with it directly
        branch_features = df.copy()
        
        # Calculate target variable: monthly_gtv
        branch_features['monthly_gtv'] = branch_features['gtv_2024'] / 12
        
        # Create additional derived features
        branch_features['estimated_transactions'] = branch_features['gtv_2024'] / branch_features['aov_2024']
        branch_features['estimated_daily_transactions'] = branch_features['estimated_transactions'] / 365
        branch_features['revenue_per_transaction'] = branch_features['gtv_2024'] / branch_features['total_qty']
        
        # Performance indicators
        branch_features['high_value_customer'] = (branch_features['aov_2024'] > branch_features['aov_2024'].median()).astype(int)
        branch_features['high_volume_branch'] = (branch_features['total_qty'] > branch_features['total_qty'].median()).astype(int)
        
        # Geographic encoding
        le_subdistrict = LabelEncoder()
        le_category = LabelEncoder()
        
        branch_features['subdistrict_encoded'] = le_subdistrict.fit_transform(branch_features['subdistrictName'])
        branch_features['category_encoded'] = le_category.fit_transform(branch_features['Category'])
        
        # Store encoders
        self.label_encoders['subdistrict'] = le_subdistrict
        self.label_encoders['category'] = le_category
        
        # Clean invalid data
        branch_features = branch_features[
            (branch_features['latitude'] != 0) & 
            (branch_features['longitude'] != 0) &
            (branch_features['gtv_2024'] > 0) & 
            (branch_features['aov_2024'] > 0) &
            (branch_features['monthly_gtv'] > 0)
        ]
        
        # Filter to Jakarta Selatan coordinates
        branch_features = branch_features[
            (branch_features['latitude'] >= self.lat_bounds[0]) & 
            (branch_features['latitude'] <= self.lat_bounds[1]) &
            (branch_features['longitude'] >= self.lng_bounds[0]) & 
            (branch_features['longitude'] <= self.lng_bounds[1])
        ]
        
        print(f"✅ Branch features created: {branch_features.shape}")
        print(f"📊 Target variable (monthly_gtv) stats:")
        print(f"   - Mean: Rp {branch_features['monthly_gtv'].mean():,.0f}")
        print(f"   - Median: Rp {branch_features['monthly_gtv'].median():,.0f}")
        print(f"   - Min: Rp {branch_features['monthly_gtv'].min():,.0f}")
        print(f"   - Max: Rp {branch_features['monthly_gtv'].max():,.0f}")
        
        return branch_features
    
    def train_models(self, df):
        """Train RandomForest models per category"""
        print("\n🚀 Starting model training...")
        
        branch_features = self.create_branch_features(df)
        
        if branch_features is None or len(branch_features) == 0:
            print("❌ No valid data for training")
            return False
        
        # Define feature columns (exclude target and identifier columns)
        feature_cols = [
            'latitude', 'longitude', 'aov_2024',
            'estimated_daily_transactions', 'revenue_per_transaction',
            'high_value_customer', 'high_volume_branch',
            'subdistrict_encoded'
        ]
        
        self.feature_names = feature_cols
        
        # Get unique categories
        categories = branch_features['Category'].unique()
        print(f"📋 Training models for {len(categories)} categories")
        
        training_results = {}
        
        for category in categories:
            print(f"\n=== Training model for {category} ===")
            
            # Filter data for this category
            cat_data = branch_features[branch_features['Category'] == category].copy()
            
            if len(cat_data) < 4:
                print(f"⚠️  Skipping {category}: insufficient data ({len(cat_data)} records)")
                continue
            
            # Prepare features and target
            X = cat_data[feature_cols].copy()
            y = cat_data['monthly_gtv']
            
            # Check for missing values
            if X.isnull().any().any():
                print(f"⚠️  Found missing values in features for {category}")
                X = X.fillna(X.median())
            
            # Scale features
            scaler = StandardScaler()
            X_scaled = scaler.fit_transform(X)
            
            # Split data
            if len(cat_data) < 20:
                # For small datasets, use smaller test size
                test_size = 0.1
            else:
                test_size = 0.2
            
            X_train, X_test, y_train, y_test = train_test_split(
                X_scaled, y, test_size=test_size, random_state=42
            )
            
            # Train Random Forest model
            model = RandomForestRegressor(
                n_estimators=100,
                max_depth=10,
                min_samples_split=5,
                min_samples_leaf=2,
                random_state=42,
                n_jobs=-1
            )
            
            model.fit(X_train, y_train)
            
            # Evaluate model
            y_pred = model.predict(X_test)
            
            # Calculate metrics
            r2 = r2_score(y_test, y_pred)
            mae = mean_absolute_error(y_test, y_pred)
            rmse = np.sqrt(mean_squared_error(y_test, y_pred))
            
            # Cross-validation for more robust evaluation
            if len(X_train) > 10:
                cv_scores = cross_val_score(model, X_train, y_train, cv=5, scoring='r2')
                cv_r2_mean = cv_scores.mean()
                cv_r2_std = cv_scores.std()
            else:
                cv_r2_mean = cv_r2_std = 0
            
            # Feature importance
            feature_importance = dict(zip(feature_cols, model.feature_importances_))
            most_important_feature = max(feature_importance, key=feature_importance.get)
            
            # Store results
            training_results[category] = {
                'data_points': len(cat_data),
                'r2_score': r2,
                'mae': mae,
                'rmse': rmse,
                'cv_r2_mean': cv_r2_mean,
                'cv_r2_std': cv_r2_std,
                'most_important_feature': most_important_feature,
                'feature_importance': feature_importance
            }
            
            # Store model and scaler
            self.models[category] = model
            self.scalers[category] = scaler
            
            # Print results
            print(f"📊 Results for {category}:")
            print(f"   - Data points: {len(cat_data)}")
            print(f"   - R² Score: {r2:.3f}")
            print(f"   - MAE: Rp {mae:,.0f}")
            print(f"   - RMSE: Rp {rmse:,.0f}")
            if cv_r2_mean > 0:
                print(f"   - CV R² (5-fold): {cv_r2_mean:.3f} ± {cv_r2_std:.3f}")
            print(f"   - Most important feature: {most_important_feature}")
        
        # Store branch performance data
        self.branch_performance = branch_features
        
        # Print overall summary
        print(f"\n🎯 Training Summary:")
        print(f"   - Total categories trained: {len(self.models)}")
        print(f"   - Average R² score: {np.mean([r['r2_score'] for r in training_results.values()]):.3f}")
        print(f"   - Average MAE: Rp {np.mean([r['mae'] for r in training_results.values()]):,.0f}")
        
        return training_results
    
    def predict_location_performance(self, latitude, longitude, category, aov_estimate=None):
        """Predict monthly GTV for a given location and category"""
        if category not in self.models:
            available_categories = list(self.models.keys())
            print(f"❌ Model not available for category '{category}'")
            print(f"Available categories: {available_categories}")
            return None
        
        # Encode subdistrict (use most common if unknown)
        subdistrict_encoded = 0  # Default encoding
        
        # Use provided AOV or category average
        if aov_estimate is None:
            if self.branch_performance is not None:
                cat_data = self.branch_performance[self.branch_performance['Category'] == category]
                aov_estimate = cat_data['aov_2024'].median() if len(cat_data) > 0 else 75000
            else:
                aov_estimate = 75000  # Default AOV
        
        # Calculate derived features (use reasonable defaults)
        estimated_daily_transactions = 10  # Default assumption
        revenue_per_transaction = aov_estimate * 1.2  # AOV + markup
        high_value_customer = 1 if aov_estimate > 100000 else 0
        high_volume_branch = 0  # Conservative assumption for new location
        
        # Create feature vector
        features = np.array([[
            latitude,
            longitude,
            aov_estimate,
            estimated_daily_transactions,
            revenue_per_transaction,
            high_value_customer,
            high_volume_branch,
            subdistrict_encoded
        ]])
        
        # Scale features
        scaler = self.scalers[category]
        features_scaled = scaler.transform(features)
        
        # Make prediction
        model = self.models[category]
        predicted_monthly_gtv = model.predict(features_scaled)[0]
        
        return {
            'category': category,
            'latitude': latitude,
            'longitude': longitude,
            'predicted_monthly_gtv': predicted_monthly_gtv,
            'predicted_annual_gtv': predicted_monthly_gtv * 12,
            'input_aov': aov_estimate
        }
    
    def save_models(self):
        """Save all trained models and encoders"""
        print(f"\n💾 Saving models to {self.models_path}/")
        
        # Save models
        for category, model in self.models.items():
            filename = f"{category.replace(' ', '_').replace('/', '_')}_model.pkl"
            filepath = os.path.join(self.models_path, filename)
            with open(filepath, 'wb') as f:
                pickle.dump(model, f)
        
        # Save scalers
        scalers_file = os.path.join(self.models_path, 'scalers.pkl')
        with open(scalers_file, 'wb') as f:
            pickle.dump(self.scalers, f)
        
        # Save label encoders
        encoders_file = os.path.join(self.models_path, 'label_encoders.pkl')
        with open(encoders_file, 'wb') as f:
            pickle.dump(self.label_encoders, f)
        
        # Save feature names
        features_file = os.path.join(self.models_path, 'feature_names.pkl')
        with open(features_file, 'wb') as f:
            pickle.dump(self.feature_names, f)
        
        # Save branch performance data
        if self.branch_performance is not None:
            performance_file = os.path.join(self.models_path, 'branch_performance.pkl')
            with open(performance_file, 'wb') as f:
                pickle.dump(self.branch_performance, f)
        
        print(f"✅ Models saved successfully!")
        print(f"   - {len(self.models)} category models")
        print(f"   - Scalers and encoders")
        print(f"   - Feature configurations")
    
    def load_models(self):
        """Load previously trained models"""
        print(f"📂 Loading models from {self.models_path}/")
        
        try:
            # Load scalers
            scalers_file = os.path.join(self.models_path, 'scalers.pkl')
            with open(scalers_file, 'rb') as f:
                self.scalers = pickle.load(f)
            
            # Load label encoders
            encoders_file = os.path.join(self.models_path, 'label_encoders.pkl')
            with open(encoders_file, 'rb') as f:
                self.label_encoders = pickle.load(f)
            
            # Load feature names
            features_file = os.path.join(self.models_path, 'feature_names.pkl')
            with open(features_file, 'rb') as f:
                self.feature_names = pickle.load(f)
            
            # Load branch performance
            performance_file = os.path.join(self.models_path, 'branch_performance.pkl')
            if os.path.exists(performance_file):
                with open(performance_file, 'rb') as f:
                    self.branch_performance = pickle.load(f)
            
            # Load models
            model_files = [f for f in os.listdir(self.models_path) if f.endswith('_model.pkl')]
            for model_file in model_files:
                category = model_file.replace('_model.pkl', '').replace('_', ' ')
                filepath = os.path.join(self.models_path, model_file)
                with open(filepath, 'rb') as f:
                    self.models[category] = pickle.load(f)
            
            print(f"✅ Models loaded successfully!")
            print(f"   - {len(self.models)} category models loaded")
            return True
            
        except Exception as e:
            print(f"❌ Error loading models: {e}")
            return False

# Example usage and training script
if __name__ == "__main__":
    print("🍽️  Restaurant Location ML Training System")
    print("=" * 50)
    
    # Initialize the ML system
    ml_system = RestaurantLocationML(
        models_path="models",
        data_file="Data_Final_2024_Clean.csv"
    )
    
    # Load and prepare data
    df = ml_system.load_and_prepare_data()
    if df is None:
        print("❌ Failed to load data. Exiting...")
        exit(1)
    
    # Train models
    training_results = ml_system.train_models(df)
    if not training_results:
        print("❌ Training failed. Exiting...")
        exit(1)
    
    # Save trained models
    ml_system.save_models()
    
    # Example prediction
    print("\n🔮 Example Prediction:")
    example_prediction = ml_system.predict_location_performance(
        latitude=-6.2608,  # Kebayoran area
        longitude=106.7811,
        category="Coffee",  # Change to a category in your data
        aov_estimate=85000
    )
    
    if example_prediction:
        print(f"📍 Location: ({example_prediction['latitude']}, {example_prediction['longitude']})")
        print(f"🏷️  Category: {example_prediction['category']}")
        print(f"💰 Predicted Monthly GTV: Rp {example_prediction['predicted_monthly_gtv']:,.0f}")
        print(f"📅 Predicted Annual GTV: Rp {example_prediction['predicted_annual_gtv']:,.0f}")
    
    print("\n✅ Training completed successfully!")
    print(f"📁 Models saved in: {ml_system.models_path}/")
    print("🚀 Ready for deployment!")

🍽️  Restaurant Location ML Training System
🔄 Loading data from Data_Final_2024_Clean.csv
✅ Data loaded successfully: (1510216, 18)
✅ All required columns present
📊 Data overview:
   - Unique branches: 1990
   - Categories: 23
   - Subdistricts: 65

🚀 Starting model training...
🔧 Creating branch features...
✅ Branch features created: (1382727, 26)
📊 Target variable (monthly_gtv) stats:
   - Mean: Rp 293,694
   - Median: Rp 52,030
   - Min: Rp 0
   - Max: Rp 211,781,062
📋 Training models for 22 categories

=== Training model for Fast Food ===
📊 Results for Fast Food:
   - Data points: 46461
   - R² Score: 0.995
   - MAE: Rp 4,987
   - RMSE: Rp 28,953
   - CV R² (5-fold): 0.905 ± 0.127
   - Most important feature: estimated_daily_transactions

=== Training model for Traditional food ===
📊 Results for Traditional food:
   - Data points: 247321
   - R² Score: 0.989
   - MAE: Rp 9,760
   - RMSE: Rp 73,049
   - CV R² (5-fold): 0.981 ± 0.010
   - Most important feature: estimated_daily_transac

In [5]:
df = pd.read_csv("df_clean.csv")
df_soy = df[df["Category"] == "Eastern"]
df_soy

Unnamed: 0,menuID,SalesDate,cityName,brandName,branchID,branchName,branchCode,branchCompanyID,latitude,longitude,subdistrictName,companyID,companyName,companyCode,total_qty,gtv_2024,aov_2024,Category
124,1173835,2024-08-02,Jakarta Selatan,Oz Lezzat,2439,Kamu Kunafa?,OZL,8560,-6.243348,106.798603,Melawai,8560,PT Dour Almadinah Almunawarah,DAW,5945.0,206051600.0,34659.646762,Eastern
125,1173828,2024-10-02,Jakarta Selatan,Oz Lezzat,2440,Kamu Kunafa Event,KKNE,8560,-6.294338,106.784689,Cilandak Barat,8560,PT Dour Almadinah Almunawarah,DAW,371.0,19511500.0,52591.644205,Eastern
233,1413420,2024-01-02,Jakarta Selatan,Kebuli Ijab Qabul Tebet,5110,Kebuli Ijab Qabul Tebet,KIQT,3150,-6.225468,106.85242,Tebet Timur,3150,Irwan hiusnandar ( Franchise ijab qabul Tebet),IHR,22384.0,1383380000.0,61802.173791,Eastern
346,1578182,2024-01-01,Jakarta Selatan,ISTANBUL KEBAB,7418,Istanbul Kebab & Pide Turkish Pizza Kalibata,IKKB,3527,0.0,0.0,Rawa Jati,3527,PT Mustafa Kuliner Indonesia,MFKI,5107.0,100605000.0,19699.441747,Eastern
347,1578182,2024-11-03,Jakarta Selatan,ISTANBUL KEBAB,7447,Lidah Mertua X Pide X Kebab Moh. Kahfi Jagakarsa,JAGA,3527,-6.322386,106.811174,Jagakarsa,3527,PT Mustafa Kuliner Indonesia,MFKI,1976.0,21085580.0,10670.840081,Eastern
348,1578183,2024-06-14,Jakarta Selatan,ISTANBUL KEBAB,7450,Istanbul Kebab & Pide Turkish Pizza Cidodol,CKBLM,3527,-6.228848,106.772147,Grogol Selatan,3527,PT Mustafa Kuliner Indonesia,MFKI,1994.0,35222380.0,17664.182548,Eastern
722,2216075,2024-11-10,Jakarta Selatan,Almaz Fried Chicken,18311,Almaz Fried Chicken - Jagakarsa,AZFCS,8695,-6.350166,106.802254,Cipedak,8695,Almaz Fried Chicken,AZFC,137682.0,1052748000.0,7646.225287,Eastern
808,2391967,2024-03-14,Jakarta Selatan,EMADO'S ELITE,20974,EMADO'S ELITE,EMSB,7694,-6.22402,106.810186,Senayan,7694,PT Emados Kebab Indonesia,EKI,83768.0,3116486000.0,37203.771217,Eastern
838,2444035,2024-01-01,Jakarta Selatan,Fingers Butter Rice,22717,"Fingers Butter Rice, Tebet",FBR01,2782,-6.240972,106.850233,Tebet Barat,2782,Fingers Butter Rice,FBR,15166.0,623260000.0,41095.872346,Eastern
839,2444035,2024-01-01,Jakarta Selatan,Fingers Butter Rice,22722,"Fingers Butter Rice, Cipete Utara",FBR06,2782,-6.265465,106.804249,Cipete Utara,2782,Fingers Butter Rice,FBR,15986.0,670774000.0,41960.090079,Eastern


In [None]:
# import streamlit as st
# import pandas as pd
# import numpy as np
# import pickle
# import plotly.express as px
# import plotly.graph_objects as go
# from plotly.subplots import make_subplots
# import folium
# from folium import plugins
# from streamlit_folium import st_folium
# import os

# # Page configuration
# st.set_page_config(
#     page_title="Restaurant Location ML System",
#     page_icon="🍽️",
#     layout="wide",
#     initial_sidebar_state="expanded"
# )

# # Custom CSS
# st.markdown("""
# <style>
#     .main-header {
#         font-size: 2.5rem;
#         color: #1f77b4;
#         text-align: center;
#         margin-bottom: 1rem;
#     }
#     .metric-card {
#         background-color: #f0f2f6;
#         padding: 1rem;
#         border-radius: 10px;
#         border-left: 5px solid #1f77b4;
#         margin: 0.5rem 0;
#     }
#     .location-card {
#         background-color: #ffffff;
#         padding: 1rem;
#         border-radius: 8px;
#         box-shadow: 0 2px 4px rgba(0,0,0,0.1);
#         margin: 0.5rem 0;
#     }
#     .stDataFrame {
#         font-size: 14px;
#     }
#     .success-box {
#         background-color: #d4edda;
#         color: #155724;
#         padding: 1rem;
#         border-radius: 8px;
#         margin: 1rem 0;
#     }
# </style>
# """, unsafe_allow_html=True)

# @st.cache_data
# def load_models():
#     """Load pre-trained models and data"""
#     models_path = "models"
    
#     try:
#         # Load scalers
#         with open(f"{models_path}/scalers.pkl", 'rb') as f:
#             scalers = pickle.load(f)
        
#         # Load label encoders
#         with open(f"{models_path}/label_encoders.pkl", 'rb') as f:
#             encoders = pickle.load(f)
        
#         # Load feature names
#         with open(f"{models_path}/feature_names.pkl", 'rb') as f:
#             feature_names = pickle.load(f)
        
#         # Load branch performance data
#         with open(f"{models_path}/branch_performance.pkl", 'rb') as f:
#             branch_performance = pickle.load(f)
        
#         # Load models
#         models = {}
#         model_files = [f for f in os.listdir(models_path) if f.endswith('_model.pkl')]
#         for model_file in model_files:
#             category = model_file.replace('_model.pkl', '').replace('_', ' ')
#             with open(f"{models_path}/{model_file}", 'rb') as f:
#                 models[category] = pickle.load(f)
        
#         return models, scalers, encoders, feature_names, branch_performance
        
#     except Exception as e:
#         st.error(f"Error loading models: {e}")
#         return None, None, None, None, None

# def predict_single_location(models, scalers, encoders, feature_names, branch_performance, 
#                           latitude, longitude, category, aov_estimate=None):
#     """Predict performance for a single location"""
    
#     if category not in models:
#         return None
    
#     model = models[category]
#     scaler = scalers[category]
    
#     # Get category benchmark data
#     cat_data = branch_performance[branch_performance['Category'] == category]
    
#     # Use provided AOV or category median
#     if aov_estimate is None:
#         aov_estimate = cat_data['aov_2024'].median() if len(cat_data) > 0 else 75000
    
#     # Calculate derived features
#     estimated_daily_transactions = 15  # Conservative estimate
#     revenue_per_transaction = aov_estimate * 1.2
#     high_value_customer = 1 if aov_estimate > cat_data['aov_2024'].median() else 0
#     high_volume_branch = 0  # Conservative for new location
    
#     # Default subdistrict encoding (most common)
#     subdistrict_encoded = 0
    
#     # Create feature vector matching training features
#     features = np.array([[
#         latitude,
#         longitude,
#         aov_estimate,
#         estimated_daily_transactions,
#         revenue_per_transaction,
#         high_value_customer,
#         high_volume_branch,
#         subdistrict_encoded
#     ]])
    
#     # Scale features
#     features_scaled = scaler.transform(features)
    
#     # Make prediction
#     predicted_monthly_gtv = model.predict(features_scaled)[0]
    
#     return {
#         'latitude': latitude,
#         'longitude': longitude,
#         'category': category,
#         'predicted_monthly_gtv': predicted_monthly_gtv,
#         'predicted_annual_gtv': predicted_monthly_gtv * 12,
#         'input_aov': aov_estimate,
#         'estimated_daily_transactions': estimated_daily_transactions
#     }

# def calculate_area_demographics(lat, lng, branch_performance):
#     """Calculate area-specific demographics and business factors"""
    
#     # Calculate distance to major business centers (Jakarta Selatan landmarks)
#     business_centers = [
#         (-6.2088, 106.8456),  # Sudirman CBD
#         (-6.2297, 106.8230),  # Senopati
#         (-6.2614, 106.7814),  # Kebayoran Baru
#         (-6.3002, 106.8197),  # Pondok Indah
#         (-6.2751, 106.8095),  # Blok M
#     ]
    
#     # Distance to nearest business center (smaller = better)
#     min_distance_to_cbd = min([
#         np.sqrt((lat - cbd_lat)**2 + (lng - cbd_lng)**2) 
#         for cbd_lat, cbd_lng in business_centers
#     ])
    
#     # Area traffic factor (closer to CBD = higher traffic)
#     traffic_factor = max(0.5, 1.0 - (min_distance_to_cbd * 10))  # Scale factor
    
#     # Calculate local branch density in radius
#     nearby_branches = 0
#     radius = 0.01  # ~1km radius
#     for _, branch in branch_performance.iterrows():
#         dist = np.sqrt((lat - branch['latitude'])**2 + (lng - branch['longitude'])**2)
#         if dist <= radius:
#             nearby_branches += 1
    
#     # Market saturation factor
#     market_saturation = min(nearby_branches / 10.0, 0.8)  # Max 80% saturation
    
#     # Area prosperity factor (based on distance to upscale areas)
#     upscale_areas = [
#         (-6.2297, 106.8230),  # Senopati
#         (-6.3002, 106.8197),  # Pondok Indah
#         (-6.2614, 106.7814),  # Kebayoran Baru
#     ]
    
#     min_distance_to_upscale = min([
#         np.sqrt((lat - area_lat)**2 + (lng - area_lng)**2) 
#         for area_lat, area_lng in upscale_areas
#     ])
    
#     prosperity_factor = max(0.7, 1.3 - (min_distance_to_upscale * 8))
    
#     return {
#         'traffic_factor': traffic_factor,
#         'market_saturation': market_saturation,
#         'prosperity_factor': prosperity_factor,
#         'nearby_branches': nearby_branches,
#         'distance_to_cbd': min_distance_to_cbd
#     }

# def find_optimal_locations(models, scalers, encoders, feature_names, branch_performance, 
#                           category, num_locations=10):
#     """Find optimal locations for a category with dynamic location-specific features"""
    
#     if category not in models:
#         return None
    
#     # Get existing successful locations for this category
#     cat_data = branch_performance[branch_performance['Category'] == category]
    
#     if len(cat_data) == 0:
#         return None
    
#     # Define grid of potential locations in Jakarta Selatan (increased resolution)
#     lat_min, lat_max = -6.4, -6.1
#     lng_min, lng_max = 106.7, 106.9
    
#     # Create finer grid for more location variety
#     lat_grid = np.linspace(lat_min, lat_max, 30)  # Increased from 20 to 30
#     lng_grid = np.linspace(lng_min, lng_max, 30)  # Increased from 20 to 30
    
#     predictions = []
    
#     # Get category stats for dynamic calculations
#     category_stats = {
#         'median_aov': cat_data['aov_2024'].median(),
#         'q75_aov': cat_data['aov_2024'].quantile(0.75),
#         'q25_aov': cat_data['aov_2024'].quantile(0.25),
#         'median_daily_transactions': cat_data['estimated_daily_transactions'].median() if 'estimated_daily_transactions' in cat_data.columns else 15,
#         'max_daily_transactions': cat_data['estimated_daily_transactions'].max() if 'estimated_daily_transactions' in cat_data.columns else 50
#     }
    
#     for lat in lat_grid:
#         for lng in lng_grid:
#             # Calculate location-specific demographics
#             demographics = calculate_area_demographics(lat, lng, branch_performance)
            
#             # Dynamic AOV based on area prosperity
#             dynamic_aov = category_stats['median_aov'] * demographics['prosperity_factor']
#             dynamic_aov = np.clip(dynamic_aov, category_stats['q25_aov'], category_stats['q75_aov'] * 1.2)
            
#             # Dynamic daily transactions based on traffic and saturation
#             base_transactions = category_stats['median_daily_transactions']
#             dynamic_daily_transactions = base_transactions * demographics['traffic_factor'] * (1 - demographics['market_saturation'])
#             dynamic_daily_transactions = max(5, min(dynamic_daily_transactions, category_stats['max_daily_transactions']))
            
#             # Calculate distance to competitors for this specific category
#             competitor_distances = []
#             for _, competitor in cat_data.iterrows():
#                 dist = np.sqrt((lat - competitor['latitude'])**2 + 
#                              (lng - competitor['longitude'])**2)
#                 competitor_distances.append(dist)
            
#             nearest_competitor_distance = min(competitor_distances) if competitor_distances else 1.0
            
#             # Make prediction with dynamic features
#             prediction = predict_single_location_dynamic(
#                 models, scalers, encoders, feature_names, branch_performance,
#                 lat, lng, category, dynamic_aov, dynamic_daily_transactions, demographics
#             )
            
#             if prediction:
#                 # Enhanced opportunity score calculation
#                 base_score = prediction['predicted_monthly_gtv']
                
#                 # Bonus for distance from competitors
#                 competition_bonus = min(nearest_competitor_distance * 100000, base_score * 0.3)
                
#                 # Penalty for market saturation
#                 saturation_penalty = base_score * demographics['market_saturation'] * 0.2
                
#                 # Bonus for high traffic areas
#                 traffic_bonus = base_score * (demographics['traffic_factor'] - 1) * 0.5
                
#                 opportunity_score = base_score + competition_bonus - saturation_penalty + traffic_bonus
                
#                 prediction.update({
#                     'nearest_competitor_distance': nearest_competitor_distance,
#                     'opportunity_score': opportunity_score,
#                     'traffic_factor': demographics['traffic_factor'],
#                     'market_saturation': demographics['market_saturation'],
#                     'prosperity_factor': demographics['prosperity_factor'],
#                     'nearby_branches_all_categories': demographics['nearby_branches'],
#                     'distance_to_cbd': demographics['distance_to_cbd'],
#                     'dynamic_aov_used': dynamic_aov,
#                     'dynamic_transactions_used': dynamic_daily_transactions
#                 })
                
#                 predictions.append(prediction)
    
#     # Sort by opportunity score and return top locations
#     predictions.sort(key=lambda x: x['opportunity_score'], reverse=True)
    
#     # Remove very similar locations (deduplication)
#     filtered_predictions = []
#     min_distance_between_locations = 0.005  # Minimum distance between recommended locations
    
#     for pred in predictions:
#         too_close = False
#         for existing in filtered_predictions:
#             dist = np.sqrt((pred['latitude'] - existing['latitude'])**2 + 
#                           (pred['longitude'] - existing['longitude'])**2)
#             if dist < min_distance_between_locations:
#                 too_close = True
#                 break
        
#         if not too_close:
#             filtered_predictions.append(pred)
            
#         if len(filtered_predictions) >= num_locations:
#             break
    
#     return filtered_predictions

# def predict_single_location_dynamic(models, scalers, encoders, feature_names, branch_performance, 
#                                    latitude, longitude, category, dynamic_aov, dynamic_daily_transactions, demographics):
#     """Enhanced prediction with dynamic location-specific features"""
    
#     if category not in models:
#         return None
    
#     model = models[category]
#     scaler = scalers[category]
    
#     # Calculate derived features with dynamic values
#     revenue_per_transaction = dynamic_aov * 1.1  # Reduced multiplier for realism
    
#     # Dynamic customer value based on area prosperity
#     high_value_customer = 1 if (dynamic_aov * demographics['prosperity_factor']) > 100000 else 0
    
#     # Volume estimation based on traffic and competition
#     high_volume_branch = 1 if dynamic_daily_transactions > 20 else 0
    
#     # Default subdistrict encoding (could be enhanced with actual mapping)
#     subdistrict_encoded = 0
    
#     # Create feature vector matching training features
#     features = np.array([[
#         latitude,
#         longitude,
#         dynamic_aov,
#         dynamic_daily_transactions,
#         revenue_per_transaction,
#         high_value_customer,
#         high_volume_branch,
#         subdistrict_encoded
#     ]])
    
#     # Scale features
#     features_scaled = scaler.transform(features)
    
#     # Make prediction
#     predicted_monthly_gtv = model.predict(features_scaled)[0]
    
#     # Apply location-specific adjustments
#     location_adjustment = demographics['traffic_factor'] * demographics['prosperity_factor'] * (1 - demographics['market_saturation'] * 0.3)
#     adjusted_prediction = predicted_monthly_gtv * location_adjustment
    
#     # Ensure realistic bounds
#     adjusted_prediction = max(adjusted_prediction, 10000000)  # Min 10 million/month
#     adjusted_prediction = min(adjusted_prediction, 500000000)  # Max 500 million/month
    
#     return {
#         'latitude': latitude,
#         'longitude': longitude,
#         'category': category,
#         'predicted_monthly_gtv': adjusted_prediction,
#         'predicted_annual_gtv': adjusted_prediction * 12,
#         'input_aov': dynamic_aov,
#         'estimated_daily_transactions': dynamic_daily_transactions,
#         'location_adjustment_factor': location_adjustment
#     }

# def create_location_map(predictions, existing_branches=None):
#     """Create interactive map with predictions"""
    
#     if not predictions:
#         return None
    
#     # Create base map
#     center_lat = -6.25
#     center_lng = 106.8
    
#     m = folium.Map(
#         location=[center_lat, center_lng],
#         zoom_start=12,
#         tiles='OpenStreetMap'
#     )
    
#     # Add predicted locations
#     colors = ['red', 'orange', 'yellow', 'green', 'blue', 'purple', 'pink', 'gray', 'black', 'lightred']
    
#     for i, pred in enumerate(predictions):
#         color = colors[i % len(colors)]
        
#         popup_text = f"""
#         <b>Rank {i+1} - {pred['category']}</b><br>
#         Coordinates: ({pred['latitude']:.4f}, {pred['longitude']:.4f})<br>
#         <b>Predicted Monthly Revenue: Rp {pred['predicted_monthly_gtv']:,.0f}</b><br>
#         <b>Predicted Annual Revenue: Rp {pred['predicted_annual_gtv']:,.0f}</b><br>
#         Est. Daily Transactions: {pred['estimated_daily_transactions']:.1f}<br>
#         Dynamic AOV: Rp {pred.get('dynamic_aov_used', pred['input_aov']):,.0f}<br>
#         Traffic Factor: {pred.get('traffic_factor', 1.0):.2f}<br>
#         Market Saturation: {pred.get('market_saturation', 0.0):.1%}<br>
#         Distance to CBD: {pred.get('distance_to_cbd', 0.0):.3f}<br>
#         Opportunity Score: {pred.get('opportunity_score', 0):,.0f}
#         """
        
#         folium.Marker(
#             [pred['latitude'], pred['longitude']],
#             popup=folium.Popup(popup_text, max_width=300),
#             tooltip=f"Rank {i+1}: Rp {pred['predicted_monthly_gtv']:,.0f}/month",
#             icon=folium.Icon(color=color, icon='cutlery', prefix='fa')
#         ).add_to(m)
    
#     # Add existing branches if provided
#     if existing_branches is not None:
#         for _, branch in existing_branches.iterrows():
#             folium.CircleMarker(
#                 [branch['latitude'], branch['longitude']],
#                 radius=5,
#                 popup=f"Existing: {branch['Category']}<br>Monthly GTV: Rp {branch['monthly_gtv']:,.0f}",
#                 color='black',
#                 weight=2,
#                 fillColor='white',
#                 fillOpacity=0.7
#             ).add_to(m)
    
#     return m

# def create_revenue_comparison_chart(predictions):
#     """Create revenue comparison chart"""
    
#     if not predictions:
#         return None
    
#     df_chart = pd.DataFrame(predictions)
#     df_chart['rank'] = range(1, len(df_chart) + 1)
    
#     fig = go.Figure()
    
#     # Monthly revenue bars
#     fig.add_trace(go.Bar(
#         x=df_chart['rank'],
#         y=df_chart['predicted_monthly_gtv'],
#         name='Monthly Revenue',
#         text=[f"Rp {val:,.0f}" for val in df_chart['predicted_monthly_gtv']],
#         textposition='auto',
#         marker_color='lightblue'
#     ))
    
#     # Opportunity score line (if available)
#     if 'opportunity_score' in df_chart.columns:
#         fig.add_trace(go.Scatter(
#             x=df_chart['rank'],
#             y=df_chart['opportunity_score'],
#             mode='lines+markers',
#             name='Opportunity Score',
#             yaxis='y2',
#             line=dict(color='red', width=2),
#             marker=dict(size=8)
#         ))
        
#         fig.update_layout(
#             yaxis2=dict(
#                 title="Opportunity Score",
#                 overlaying='y',
#                 side='right'
#             )
#         )
    
#     fig.update_layout(
#         title='Revenue Predictions Comparison',
#         xaxis_title="Location Rank",
#         yaxis_title="Predicted Monthly Revenue (Rp)",
#         height=500,
#         showlegend=True
#     )
    
#     return fig

# def main():
#     # Header
#     st.markdown('<h1 class="main-header">🍽️ Restaurant Location ML System</h1>', unsafe_allow_html=True)
#     st.markdown('<p style="text-align: center; font-size: 1.2rem; color: #666;">Optimal Restaurant Location Prediction for Jakarta Selatan</p>', unsafe_allow_html=True)
    
#     # Load models
#     with st.spinner("Loading ML models..."):
#         models, scalers, encoders, feature_names, branch_performance = load_models()
    
#     if models is None:
#         st.error("❌ Could not load trained models. Please ensure models are trained and saved.")
#         st.info("📝 Run the training script first to generate models.")
#         st.code("""
# # Run this first:
# python restaurant_ml_training.py
#         """)
#         return
    
#     st.success(f"✅ Successfully loaded {len(models)} trained models!")
    
#     # Sidebar navigation
#     st.sidebar.title("🧭 Navigation")
#     page = st.sidebar.selectbox(
#         "Select Analysis Type",
#         ["🎯 Single Location Prediction", "🗺️ Optimal Location Finder", "📊 Model Performance", "📈 Data Insights"]
#     )
    
#     if page == "🎯 Single Location Prediction":
#         st.header("🎯 Single Location Prediction")
#         st.write("Predict revenue for a specific location and restaurant category.")
        
#         col1, col2 = st.columns([2, 1])
        
#         with col1:
#             # Input parameters
#             st.subheader("📍 Location Parameters")
            
#             # Category selection
#             available_categories = list(models.keys())
#             selected_category = st.selectbox(
#                 "Restaurant Category",
#                 available_categories,
#                 help="Select the type of restaurant"
#             )
            
#             # Location inputs
#             col_lat, col_lng = st.columns(2)
#             with col_lat:
#                 latitude = st.number_input(
#                     "Latitude", 
#                     value=-6.2608, 
#                     min_value=-6.4, 
#                     max_value=-6.1,
#                     step=0.0001,
#                     format="%.4f"
#                 )
            
#             with col_lng:
#                 longitude = st.number_input(
#                     "Longitude", 
#                     value=106.7811, 
#                     min_value=106.7, 
#                     max_value=106.9,
#                     step=0.0001,
#                     format="%.4f"
#                 )
            
#             # AOV input
#             cat_data = branch_performance[branch_performance['Category'] == selected_category]
#             default_aov = int(cat_data['aov_2024'].median()) if len(cat_data) > 0 else 75000
            
#             aov_estimate = st.number_input(
#                 "Expected Average Order Value (AOV)",
#                 value=default_aov,
#                 min_value=10000,
#                 max_value=500000,
#                 step=5000,
#                 help=f"Default is median AOV for {selected_category}: Rp {default_aov:,}"
#             )
            
#             # Predict button
#             if st.button("🔮 Predict Revenue", type="primary", use_container_width=True):
#                 with st.spinner("Generating prediction..."):
#                     prediction = predict_single_location(
#                         models, scalers, encoders, feature_names, branch_performance,
#                         latitude, longitude, selected_category, aov_estimate
#                     )
                    
#                     if prediction:
#                         st.session_state.single_prediction = prediction
#                         st.success("✅ Prediction completed!")
#                     else:
#                         st.error("❌ Failed to generate prediction")
        
#         with col2:
#             # Display results
#             if hasattr(st.session_state, 'single_prediction'):
#                 pred = st.session_state.single_prediction
                
#                 st.subheader("📊 Prediction Results")
                
#                 # Metrics
#                 st.metric(
#                     "💰 Monthly Revenue", 
#                     f"Rp {pred['predicted_monthly_gtv']:,.0f}"
#                 )
                
#                 st.metric(
#                     "📅 Annual Revenue", 
#                     f"Rp {pred['predicted_annual_gtv']:,.0f}"
#                 )
                
#                 st.metric(
#                     "🛒 Daily Transactions", 
#                     f"{pred['estimated_daily_transactions']}"
#                 )
                
#                 st.metric(
#                     "💳 Input AOV", 
#                     f"Rp {pred['input_aov']:,.0f}"
#                 )
                
#                 # Create simple map for single location
#                 simple_map = folium.Map(
#                     location=[pred['latitude'], pred['longitude']],
#                     zoom_start=15
#                 )
                
#                 folium.Marker(
#                     [pred['latitude'], pred['longitude']],
#                     popup=f"Predicted Monthly Revenue: Rp {pred['predicted_monthly_gtv']:,.0f}",
#                     tooltip="Predicted Location",
#                     icon=folium.Icon(color='red', icon='cutlery', prefix='fa')
#                 ).add_to(simple_map)
                
#                 st.subheader("📍 Location Map")
#                 st_folium(simple_map, width=350, height=300)
    
#     elif page == "🗺️ Optimal Location Finder":
#         st.header("🗺️ Optimal Location Finder")
#         st.write("Find the best locations for your restaurant category across Jakarta Selatan.")
        
#         # Parameters
#         col1, col2 = st.columns([3, 1])
        
#         with col2:
#             st.subheader("⚙️ Search Parameters")
            
#             available_categories = list(models.keys())
#             selected_category = st.selectbox(
#                 "Restaurant Category",
#                 available_categories
#             )
            
#             num_locations = st.slider(
#                 "Number of locations to find",
#                 min_value=5,
#                 max_value=15,
#                 value=10
#             )
            
#             show_existing = st.checkbox(
#                 "Show existing branches",
#                 value=True,
#                 help="Display existing branches of the same category on the map"
#             )
            
#             if st.button("🔍 Find Optimal Locations", type="primary", use_container_width=True):
#                 with st.spinner(f"Analyzing optimal locations for {selected_category}..."):
#                     predictions = find_optimal_locations(
#                         models, scalers, encoders, feature_names, branch_performance,
#                         selected_category, num_locations
#                     )
                    
#                     if predictions:
#                         st.session_state.optimal_predictions = predictions
#                         st.session_state.optimal_category = selected_category
#                         st.session_state.show_existing = show_existing
#                         st.success(f"✅ Found {len(predictions)} optimal locations!")
#                     else:
#                         st.error("❌ Failed to find optimal locations")
        
#         with col1:
#             # Display results
#             if hasattr(st.session_state, 'optimal_predictions'):
#                 predictions = st.session_state.optimal_predictions
#                 category = st.session_state.optimal_category
#                 show_existing = st.session_state.show_existing
                
#                 st.subheader(f"🏆 Top Locations for {category}")
                
#                 # Create results table
#                 df_results = pd.DataFrame(predictions)
#                 df_results['Rank'] = range(1, len(df_results) + 1)
#                 df_results['Monthly Revenue'] = df_results['predicted_monthly_gtv'].apply(lambda x: f"Rp {x:,.0f}")
#                 df_results['Annual Revenue'] = df_results['predicted_annual_gtv'].apply(lambda x: f"Rp {x:,.0f}")
#                 df_results['Opportunity Score'] = df_results['opportunity_score'].apply(lambda x: f"{x:,.0f}")
#                 df_results['Traffic Factor'] = df_results['traffic_factor'].apply(lambda x: f"{x:.2f}")
#                 df_results['Market Saturation'] = df_results['market_saturation'].apply(lambda x: f"{x:.1%}")
#                 df_results['Distance to CBD'] = df_results['distance_to_cbd'].apply(lambda x: f"{x:.3f}")
                
#                 display_cols = ['Rank', 'latitude', 'longitude', 'Monthly Revenue', 'Annual Revenue', 
#                                'Traffic Factor', 'Market Saturation', 'Distance to CBD', 'Opportunity Score']
#                 column_names = ['Rank', 'Latitude', 'Longitude', 'Monthly Revenue', 'Annual Revenue', 
#                                'Traffic Factor', 'Saturation', 'Distance to CBD', 'Opportunity Score']
                
#                 df_show = df_results[display_cols].copy()
#                 df_show.columns = column_names
                
#                 st.dataframe(df_show, use_container_width=True, hide_index=True)
                
#                 # Charts and map
#                 st.subheader("📊 Analysis")
                
#                 # Revenue chart
#                 fig = create_revenue_comparison_chart(predictions)
#                 if fig:
#                     st.plotly_chart(fig, use_container_width=True)
                
#                 # Map
#                 st.subheader("🗺️ Location Map")
#                 existing_branches = None
#                 if show_existing:
#                     existing_branches = branch_performance[branch_performance['Category'] == category]
                
#                 location_map = create_location_map(predictions, existing_branches)
#                 if location_map:
#                     st_folium(location_map, width=700, height=500)
    
#     elif page == "📊 Model Performance":
#         st.header("📊 Model Performance Dashboard")
        
#         # Model overview
#         st.subheader("🤖 Available Models")
        
#         model_data = []
#         for category in models.keys():
#             cat_branches = branch_performance[branch_performance['Category'] == category]
#             model_data.append({
#                 'Category': category,
#                 'Status': '✅ Trained',
#                 'Training Data Points': len(cat_branches),
#                 'Avg Monthly Revenue': f"Rp {cat_branches['monthly_gtv'].mean():,.0f}" if len(cat_branches) > 0 else "N/A"
#             })
        
#         df_models = pd.DataFrame(model_data)
#         st.dataframe(df_models, use_container_width=True, hide_index=True)
        
#         # Feature importance info
#         st.subheader("🎯 Model Features")
#         st.write("Models use the following features for prediction:")
        
#         if feature_names:
#             feature_info = {
#                 'Feature': feature_names,
#                 'Description': [
#                     'Latitude coordinate',
#                     'Longitude coordinate', 
#                     'Average Order Value',
#                     'Estimated daily transactions',
#                     'Revenue per transaction',
#                     'High value customer indicator',
#                     'High volume branch indicator',
#                     'Subdistrict encoding'
#                 ]
#             }
            
#             st.dataframe(pd.DataFrame(feature_info), use_container_width=True, hide_index=True)
        
#         st.info("💡 Model performance metrics (R², MAE, RMSE) are displayed during training.")
    
#     elif page == "📈 Data Insights":
#         st.header("📈 Data Insights")
        
#         # Overview metrics
#         col1, col2, col3, col4 = st.columns(4)
        
#         with col1:
#             st.metric("🏪 Total Branches", len(branch_performance))
        
#         with col2:
#             st.metric("🏷️ Categories", branch_performance['Category'].nunique())
        
#         with col3:
#             st.metric("📍 Subdistricts", branch_performance['subdistrictName'].nunique())
        
#         with col4:
#             avg_revenue = branch_performance['monthly_gtv'].mean()
#             st.metric("💰 Avg Monthly Revenue", f"Rp {avg_revenue:,.0f}")
        
#         # Revenue distribution
#         st.subheader("💰 Revenue Distribution")
        
#         fig_hist = px.histogram(
#             branch_performance,
#             x='monthly_gtv',
#             nbins=30,
#             title="Distribution of Monthly Revenue"
#         )
#         fig_hist.update_xaxis(title="Monthly Revenue (Rp)")
#         fig_hist.update_yaxis(title="Number of Branches")
        
#         st.plotly_chart(fig_hist, use_container_width=True)
        
#         # Category performance
#         st.subheader("🏷️ Performance by Category")
        
#         category_stats = branch_performance.groupby('Category').agg({
#             'monthly_gtv': ['count', 'mean', 'median'],
#             'aov_2024': 'mean'
#         }).round(0)
        
#         category_stats.columns = ['Branch Count', 'Avg Revenue', 'Median Revenue', 'Avg AOV']
#         category_stats = category_stats.sort_values('Avg Revenue', ascending=False).reset_index()
        
#         # Format currency columns
#         for col in ['Avg Revenue', 'Median Revenue', 'Avg AOV']:
#             category_stats[col] = category_stats[col].apply(lambda x: f"Rp {x:,.0f}")
        
#         st.dataframe(category_stats, use_container_width=True, hide_index=True)
        
#         # Geographic distribution
#         st.subheader("📍 Geographic Distribution")
        
#         district_stats = branch_performance.groupby('subdistrictName').agg({
#             'monthly_gtv': ['count', 'mean'],
#             'Category': 'nunique'
#         }).round(0)
        
#         district_stats.columns = ['Branch Count', 'Avg Revenue', 'Category Variety']
#         district_stats = district_stats.sort_values('Branch Count', ascending=False).head(10).reset_index()
#         district_stats['Avg Revenue'] = district_stats['Avg Revenue'].apply(lambda x: f"Rp {x:,.0f}")
        
#         st.write("**Top 10 Subdistricts by Branch Count:**")
#         st.dataframe(district_stats, use_container_width=True, hide_index=True)
        
#         # Top performers
#         st.subheader("🏆 Top Performing Branches")
        
#         top_branches = branch_performance.nlargest(10, 'monthly_gtv')[
#             ['Category', 'subdistrictName', 'monthly_gtv', 'aov_2024']
#         ].reset_index(drop=True)
        
#         top_branches['Rank'] = range(1, len(top_branches) + 1)
#         top_branches['Monthly Revenue'] = top_branches['monthly_gtv'].apply(lambda x: f"Rp {x:,.0f}")
#         top_branches['AOV'] = top_branches['aov_2024'].apply(lambda x: f"Rp {x:,.0f}")
        
#         display_top = top_branches[['Rank', 'Category', 'subdistrictName', 'Monthly Revenue', 'AOV']]
#         display_top.columns = ['Rank', 'Category', 'Subdistrict', 'Monthly Revenue', 'AOV']
        
#         st.dataframe(display_top, use_container_width=True, hide_index=True)

# if __name__ == "__main__":
#     main()

2025-09-29 07:34:43.699 
  command:

    streamlit run C:\Users\ESB\anaconda3\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]
2025-09-29 07:34:43.703 No runtime found, using MemoryCacheStorageManager
2025-09-29 07:34:43.727 No runtime found, using MemoryCacheStorageManager
2025-09-29 07:34:43.854 Session state does not function when running a script without `streamlit run`
