In [1]:
import pandas as pd
import numpy as np

In [2]:
def generate_random_meters(meter_count, exit_zones):
    # Create a random list of exit zones for the meters
    exit_zone_list = np.random.choice(exit_zones, size=meter_count)

    # Create a DataFrame with the exit zones and random annual quantities
    meter_df = pd.DataFrame({
        'exit_zone': exit_zone_list,
        'aq_kwh': np.random.randint(low=1000, high=50000, size=meter_count)
    })

    # Generate unique meter IDs
    meter_df['meter_id'] = 'meter_' + pd.Series(range(1, meter_count + 1)).astype(str)

    # Reorder the columns to match the desired order
    meter_df = meter_df[['meter_id', 'aq_kwh', 'exit_zone']]

    return meter_df

In [3]:
def generate_consumption_data(meters, start_date, duration, exit_zones_list):
    # Generate a list of dates for the forecast period
    dates = pd.date_range(start_date, periods=duration, freq='D')

    # Initialize an empty DataFrame to store the consumption data
    consumption_data = pd.DataFrame(columns=['meter_id', 'date', 'rate_p_per_kwh', 'exit_zone', 'aq_min_kwh', 'aq_max_kwh'])

    # Loop through each meter and generate random consumption data for each day
    for meter_id in meters['meter_id']:
        meter = meters[meters['meter_id'] == meter_id].iloc[0]

        # Generate random consumption data for each day
        consumption = np.random.randint(10, 100, size=duration) * .005
        a=1
        aq_max_kwh_rnd = np.random.randint(10, 2500, size=duration)
        a=1
        rand_exit_zones = [exit_zones_list[np.random.randint(0, len(exit_zones_list))] for _ in range(duration)]
        a=1
        # Add the consumption data to the DataFrame
        meter_data = pd.DataFrame({
            'meter_id': [meter_id] * duration,
            'date': dates,
            'rate_p_per_kwh': consumption,
            'exit_zone': rand_exit_zones,
            'aq_min_kwh': 0,
            'aq_max_kwh': aq_max_kwh_rnd
        })

        # Append the meter data to the consumption data DataFrame
        consumption_data = pd.concat([consumption_data, meter_data], ignore_index=True)

    return consumption_data

In [6]:

# Generate a list of 100 random meters
exit_zones = ['EA1', 'EA2', 'EA3', 'EA4', 'EA5', 'EA6']
meters = generate_random_meters(10, exit_zones)

In [7]:
display(meters)

Unnamed: 0,meter_id,aq_kwh,exit_zone
0,meter_1,31158,EA2
1,meter_2,22264,EA2
2,meter_3,13951,EA5
3,meter_4,5391,EA5
4,meter_5,12135,EA6
5,meter_6,6987,EA3
6,meter_7,33305,EA2
7,meter_8,18625,EA5
8,meter_9,47536,EA4
9,meter_10,21576,EA1


In [9]:
# Generate consumption data for the meters over a 30-day period starting on January 1, 2022
start_date = pd.to_datetime('2022-01-01')
duration = 30
consumption_data = generate_consumption_data(meters, start_date, duration, exit_zones)


In [10]:
display(consumption_data)

Unnamed: 0,meter_id,date,rate_p_per_kwh,exit_zone,aq_min_kwh,aq_max_kwh
0,meter_1,2022-01-01,0.215,EA4,0,1032
1,meter_1,2022-01-02,0.355,EA1,0,1617
2,meter_1,2022-01-03,0.285,EA6,0,1814
3,meter_1,2022-01-04,0.475,EA6,0,1426
4,meter_1,2022-01-05,0.280,EA2,0,1165
...,...,...,...,...,...,...
295,meter_10,2022-01-26,0.285,EA2,0,1023
296,meter_10,2022-01-27,0.240,EA4,0,240
297,meter_10,2022-01-28,0.050,EA6,0,1273
298,meter_10,2022-01-29,0.315,EA2,0,729
