In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random

# Configurações do dataset
num_customers = 100
days = 30
readings_per_day = 24  # leituras horárias
anomaly_rate = 0.02

# Geração dos dados
timestamps = [datetime(2025, 1, 1) + timedelta(hours=i) for i in range(days * readings_per_day)]
data = []

for customer_id in range(1, num_customers + 1):
    for timestamp in timestamps:
        # Valores normais
        consumption = np.random.normal(1.5, 0.5)  # em kWh
        voltage = np.random.normal(220, 5)        # em volts
        current = consumption * 1000 / voltage    # corrente estimada (I = P / V)
        power_factor = np.random.uniform(0.85, 0.99)
        anomaly = 0

        # Simular anomalia
        if random.random() < anomaly_rate:
            anomaly = 1
            anomaly_type = random.choice(['low_consumption', 'high_consumption', 'bad_power_factor', 'voltage_spike'])
            if anomaly_type == 'low_consumption':
                consumption = np.random.uniform(0, 0.2)
            elif anomaly_type == 'high_consumption':
                consumption = np.random.uniform(5, 10)
            elif anomaly_type == 'bad_power_factor':
                power_factor = np.random.uniform(0.2, 0.5)
            elif anomaly_type == 'voltage_spike':
                voltage = np.random.uniform(260, 300)

            current = consumption * 1000 / voltage

        data.append([customer_id, timestamp, consumption, voltage, current, power_factor, anomaly])

# Criar o DataFrame
df = pd.DataFrame(data, columns=[
    'customer_id', 'timestamp', 'consumption_kwh', 'voltage', 'current', 'power_factor', 'anomaly'
])

# Salvar como CSV
df.to_csv("smart_meter_anomaly_dataset.csv", index=False)
