In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random
import math

def generate_campus_food_waste_data_with_location(start_date, num_days, base_capacity_kg, num_locations=5):
   # Konfigurasi lingkungan kampus
   CAMPUS_COORDS = {
       'min_lat': 25.02,
       'max_lat': 25.04,
       'min_lon': 121.54,
       'max_lon': 121.57
   }
  
   # Fungsi untuk menghasilkan pola harian
   def daily_pattern(hour):
       return 0.3 * math.sin(2 * math.pi * (hour - 8)/24) + 0.4
  
   locations = {}
   for i in range(num_locations):
       lokasi_id = f"TPS_{i+1:03d}"
       traffic_level = random.choice([0.5, 0.7, 0.8, 0.9, 1.0])  # Variasi traffic level
       capacity = base_capacity_kg * random.uniform(0.8, 1.2)     # Variasi kapasitas
       locations[lokasi_id] = {
           'coords': (
               random.uniform(CAMPUS_COORDS['min_lat'], CAMPUS_COORDS['max_lat']),
               random.uniform(CAMPUS_COORDS['min_lon'], CAMPUS_COORDS['max_lon'])
           ),
           'traffic_level': traffic_level,
           'capacity': capacity,
           'decomposition_rate': random.uniform(0.8, 1.2)  # Laju dekomposisi berbeda
       }
  
   data = []
   jadwal_pengambilan = ["Monday", "Wednesday", "Friday"]
  
   for lokasi_id, loc_info in locations.items():
       berat = 0
       days_since_empty = 0
       last_pickup_time = None
      
       for i in range(num_days * 24):
           timestamp = start_date + timedelta(hours=i)
           hari = timestamp.strftime("%A")
           akhir_pekan = hari in ["Saturday", "Sunday"]
           jam = timestamp.hour
           bulan = timestamp.month
          
           # Penyesuaian musim (contoh: liburan di bulan Juli dan Desember)
           seasonal_factor = 0.4 if bulan in [7, 12] else 1.0  # Penurunan 60% saat liburan
          
           # Pola harian + variasi acak
           base_increment = daily_pattern(jam) * random.uniform(0.8, 1.2)
          
           # Logika pengisian
           is_pickup_time = (hari in jadwal_pengambilan) and (jam == 8)  # Pengambilan jam 8 pagi
          
           # 5% kemungkinan pickup gagal
           if is_pickup_time and random.random() < 0.95:
               berat = random.uniform(0, 0.2)  # Sisa sedikit setelah pengambilan
               days_since_empty = 0
               last_pickup_time = timestamp
               hari_pengambilan = True
           else:
               hari_pengambilan = False
              
               # Faktor yang mempengaruhi penambahan sampah
               increment = base_increment * loc_info['traffic_level'] * seasonal_factor
              
               # Weekend pattern
               if akhir_pekan:
                   increment *= random.uniform(0.3, 0.7)
              
               # Acara khusus (1% kemungkinan terjadi peningkatan sampah)
               if random.random() < 0.01:
                   increment *= random.uniform(3, 5)
                   increment += random.uniform(2, 4)
              
               berat += increment
               days_since_empty += 1/24  # Tambahan per jam

               # Pengambilan darurat (10% kemungkinan)
               if (berat / loc_info['capacity']) > 0.85 and random.random() < 0.1:
                    berat *= 0.4  # Emergency removal
                    days_since_empty = 0
              
               # Batasi kapasitas maksimum
               if berat > loc_info['capacity']:
                    # Allow temporary overflow (20% of excess)
                    excess = berat - loc_info['capacity']
                    berat = loc_info['capacity'] + (excess * 0.2)
                    
                    # Simulate decomposition (0.1% hourly loss)
                    berat *= 0.999

                    # Ensure max 30% over capacity
                    berat = min(berat, loc_info['capacity'] * 1.3)
          
           fill_percentage = (berat / loc_info['capacity']) * 100
          
           # Simulasi kualitas gas dengan dekomposisi
           decomposition_factor = days_since_empty * loc_info['decomposition_rate']
           ch4 = (berat * 3 + decomposition_factor * 2) * random.uniform(0.9, 1.1)
           nh3 = (berat * 1 + decomposition_factor * 0.5) * random.uniform(0.9, 1.1)
          
           # Tambahkan noise sensor
           ch4 += random.gauss(0, 0.5)
           nh3 += random.gauss(0, 0.2)
          
           # Pastikan nilai tidak negatif
           ch4 = max(0, ch4)
           nh3 = max(0, nh3)
          
           data.append([
               timestamp,
               berat,
               fill_percentage,
               ch4,
               nh3,
               hari,
               akhir_pekan,
               hari_pengambilan,
               lokasi_id,
               loc_info['coords'][0],
               loc_info['coords'][1]
           ])
  
   df = pd.DataFrame(data, columns=[
       'Timestamp', 'Berat (kg)', 'Fill_Percentage (%)',
       'Kualitas_Gas_CH4 (ppm)', 'Kualitas_Gas_NH3 (ppm)',
       'Hari_dalam_Minggu', 'Akhir_Pekan', 'Hari_Pengambilan',
       'Lokasi_ID', 'Latitude', 'Longitude'
   ])
  
   return df

# Parameter simulasi
start_date = datetime(2024, 4, 12)
num_days = 360
base_capacity_kg = 15  # Kapasitas dasar yang akan divariasikan
num_locations = 8

# Generate dataset
simulated_data = generate_campus_food_waste_data_with_location(
   start_date, num_days, base_capacity_kg, num_locations
)

# Simpan ke CSV
simulated_data.to_csv('synthetic_food_waste_data.csv', index=False)

print("Dataset generated with shape:", simulated_data.shape)
print(simulated_data.head())

Dataset generated with shape: (69120, 11)
            Timestamp  Berat (kg)  Fill_Percentage (%)  \
0 2024-04-12 00:00:00    0.119377             0.767254   
1 2024-04-12 01:00:00    0.245605             1.578541   
2 2024-04-12 02:00:00    4.217177            27.104413   
3 2024-04-12 03:00:00    4.323073            27.785026   
4 2024-04-12 04:00:00    4.484658            28.823553   

   Kualitas_Gas_CH4 (ppm)  Kualitas_Gas_NH3 (ppm) Hari_dalam_Minggu  \
0                0.000641                0.000000            Friday   
1                0.724290                0.118396            Friday   
2               13.584027                4.471376            Friday   
3               13.432985                4.674006            Friday   
4               12.552413                4.954533            Friday   

   Akhir_Pekan  Hari_Pengambilan Lokasi_ID   Latitude  Longitude  
0        False             False   TPS_001  25.020601  121.56524  
1        False             False   TPS_001  25.0