# Random Datasets generator

In [16]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta


def gen_sintetic_data(n_dots, freq):
    begin = datetime.now() - timedelta(minutes=n_dots)
    timestamps = pd.date_range(start=begin, periods=n_dots, freq=freq)

    base_consumption = np.random.uniform(0.3, 2.5, n_dots)
    seasonality = np.sin(np.linspace(0, 4 * np.pi, n_dots)) * 0.5
    noise = np.random.normal(0, 0.1, n_dots)
    total_consumption = base_consumption + seasonality + noise

    df = pd.DataFrame({
      'timestamp': timestamps,
      'power_W': total_consumption,
      'voltage_V': np.random.uniform(210, 240, n_dots),
      'current_A': total_consumption / np.random.uniform(210, 240, n_dots),
      'power_factor': np.random.uniform(0.8, 1, n_dots),
      'temperature_C': np.random.uniform(15, 35, n_dots),
      'humidity_%': np.random.uniform(30, 80, n_dots)
    })

    return df


In [17]:
sintetic_data = gen_sintetic_data(20000, '1min')
print(sintetic_data)

                       timestamp   power_W   voltage_V  current_A  \
0     2025-02-24 20:44:52.113561  0.330870  234.854028   0.001398   
1     2025-02-24 20:45:52.113561  2.324513  230.046691   0.009817   
2     2025-02-24 20:46:52.113561  0.535257  230.773517   0.002292   
3     2025-02-24 20:47:52.113561  2.127344  212.850490   0.009698   
4     2025-02-24 20:48:52.113561  1.450152  216.600450   0.006175   
...                          ...       ...         ...        ...   
19995 2025-03-10 17:59:52.113561  0.830829  238.271475   0.003501   
19996 2025-03-10 18:00:52.113561  1.699577  236.791173   0.008053   
19997 2025-03-10 18:01:52.113561  0.917076  235.395946   0.004232   
19998 2025-03-10 18:02:52.113561  0.977196  231.626371   0.004486   
19999 2025-03-10 18:03:52.113561  0.796756  231.644741   0.003769   

       power_factor  temperature_C  humidity_%  
0          0.915060      16.150071   34.574934  
1          0.850095      22.066307   54.163969  
2          0.940574     

In [18]:
def save_to_csv(data, filename, path):
    data.to_csv(f"{path}/{filename}.csv", index=False)
    print(f"Data saved as {filename}.csv")

save_to_csv(sintetic_data, "energy_data", "./../data")

Data saved as energy_data.csv


In [19]:
energy_dataset = pd.read_csv('./../data/energy_data.csv')

energy_dataset.head()

Unnamed: 0,timestamp,power_W,voltage_V,current_A,power_factor,temperature_C,humidity_%
0,2025-02-24 20:44:52.113561,0.33087,234.854028,0.001398,0.91506,16.150071,34.574934
1,2025-02-24 20:45:52.113561,2.324513,230.046691,0.009817,0.850095,22.066307,54.163969
2,2025-02-24 20:46:52.113561,0.535257,230.773517,0.002292,0.940574,22.248496,50.263001
3,2025-02-24 20:47:52.113561,2.127344,212.85049,0.009698,0.91986,23.228894,63.260545
4,2025-02-24 20:48:52.113561,1.450152,216.60045,0.006175,0.844777,32.713557,79.213044
