In [None]:
import numpy as np
import pandas as pd


# Set seed agar hasil konsisten (Reproducibility)
np.random.seed(42)

def generate_datasets():
    print("Sedang membangkitkan 4 Dataset Realistis...")

    # --- 1. DATA EKONOMI (High Correlation) ---
    # 200 Wilayah, 50 Indikator.
    # Fitur dibuat berkorelasi linier (multikolinearitas tinggi)
    n_samples, n_features = 200, 50
    # Fitur dasar (latent factors)
    base_factors = np.random.rand(n_samples, 5) 
    # Indikator turunan = kombinasi linier base factors + sedikit noise
    weights = np.random.rand(5, n_features)
    data_eco = np.dot(base_factors, weights) + np.random.normal(0, 0.1, (n_samples, n_features))
    pd.DataFrame(data_eco).to_csv("data_ekonomi.csv", index=False)
    print("✅ data_ekonomi.csv (200x50) - Multikolinearitas Tinggi")

    # --- 2. DATA SENSOR (Signal + Noise) ---
    # 100 timestamp, 30 sensor.
    # Sinyal asli (gelombang sinus) + Gaussian Noise
    t = np.linspace(0, 10, 100)
    true_signal = np.sin(t).reshape(-1, 1) # Sinyal murni
    # Duplikasi sinyal ke 30 sensor dengan amplitudo beda
    data_sensor = true_signal @ np.random.rand(1, 30) 
    # Tambahkan Noise Masif
    noise = np.random.normal(0, 0.5, (100, 30))
    data_sensor_noisy = data_sensor + noise
    pd.DataFrame(data_sensor_noisy).to_csv("data_sensor.csv", index=False)
    print("✅ data_sensor.csv (100x30) - Sinyal dengan Gaussian Noise")

    # --- 3. DATA SAHAM (Technical Indicators) ---
    # 150 hari, 40 indikator teknikal.
    # Random Walk (saham) + Fitur turunan
    prices = np.cumsum(np.random.randn(150)) + 100
    data_stock = np.zeros((150, 40))
    for i in range(40):
        # Buat variasi Moving Average dan Momentum
        window = np.random.randint(3, 20)
        data_stock[:, i] = pd.Series(prices).rolling(window=window).mean().fillna(method='bfill') + np.random.normal(0, 1, 150)
    pd.DataFrame(data_stock).to_csv("data_saham.csv", index=False)
    print("✅ data_saham.csv (150x40) - Time Series Features")

    # --- 4. CITRA DIGITAL (Spatial Data) ---
    # Pola Gradien & Grid (512x512 pixel)
    x = np.linspace(0, 1, 512)
    y = np.linspace(0, 1, 512)
    X, Y = np.meshgrid(x, y)
    # Pola kompleks: Sinusoidal + Grid
    image_data = np.sin(10 * X) * np.cos(10 * Y) * 255
    pd.DataFrame(image_data).to_csv("citra_digital.csv", index=False, header=False)
    print("✅ citra_digital.csv (512x512) - Matriks Piksel")

generate_datasets()

Sedang membangkitkan 4 Dataset Realistis...
✅ data_ekonomi.csv (200x50) - Multikolinearitas Tinggi
✅ data_sensor.csv (100x30) - Sinyal dengan Gaussian Noise
✅ data_saham.csv (150x40) - Time Series Features


  data_stock[:, i] = pd.Series(prices).rolling(window=window).mean().fillna(method='bfill') + np.random.normal(0, 1, 150)


✅ citra_digital.csv (512x512) - Matriks Piksel
