In [1]:
from gridmeter._utils.data_processing import Data
from gridmeter._utils.data_processing_settings import Data_Settings
from gridmeter._utils import const as _const
import pandas as pd
import numpy as np
import time

## Time Series loadshapes

In [2]:
# Create a testing dataframe having an id, datetime of 15 min intervals, observed and modeled values
num_intervals = 4 * 24 * 365  # 4 intervals/hour * 24 hours/day * 365 days

# Create a DataFrame with 'id', 'datetime', 'observed', and 'modeled' columns
df = pd.DataFrame(
    {
        "id": np.repeat(
            ["id1", "id2", "id3"], num_intervals
        ),  # only 3 ids for easier comparison
        "datetime": pd.date_range(
            start="2023-01-01", periods=num_intervals, freq="15T"
        ).tolist()
        * 3,
        "observed": np.random.rand(num_intervals * 3),  # randomized
        "modeled": np.random.rand(num_intervals * 3),  # randomized
    }
)

# # Create a boolean mask for Mondays and Wednesdays , will give ValueError at 80% threshold
day_mask = df['datetime'].dt.dayofweek.isin([2])

# # Set 'observed' and 'modeled' values to NaN for all Mondays and Wednesdays
# df.loc[day_mask, ['observed', 'modeled']] = np.nan

# # Remove all rows with NaN values
df = df.loc[~day_mask]

# Convert 'datetime' column to datetime type
df["datetime"] = pd.to_datetime(df["datetime"])

df

Unnamed: 0,id,datetime,observed,modeled
0,id1,2023-01-01 00:00:00,0.547960,0.037949
1,id1,2023-01-01 00:15:00,0.521766,0.367302
2,id1,2023-01-01 00:30:00,0.244515,0.404846
3,id1,2023-01-01 00:45:00,0.770039,0.439308
4,id1,2023-01-01 01:00:00,0.185729,0.262990
...,...,...,...,...
105115,id3,2023-12-31 22:45:00,0.364843,0.184650
105116,id3,2023-12-31 23:00:00,0.466790,0.678109
105117,id3,2023-12-31 23:15:00,0.222130,0.710218
105118,id3,2023-12-31 23:30:00,0.330719,0.472336


In [3]:
data1 = Data(None).set_data(time_series_df=df)
data1.loadshape

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,...,495,496,497,498,499,500,501,502,503,504
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
id1,0.523234,0.444197,0.543226,0.4772,0.499122,0.52988,0.478994,0.542424,0.451581,0.498146,...,0.451793,0.451793,0.451793,0.451793,0.451793,0.451793,0.451793,0.451793,0.451793,0.451793
id2,0.512938,0.595527,0.543043,0.525953,0.518777,0.528703,0.509195,0.459431,0.498508,0.508708,...,0.525863,0.525863,0.525863,0.525863,0.525863,0.525863,0.525863,0.525863,0.525863,0.525863
id3,0.466689,0.513957,0.482222,0.513554,0.445215,0.516137,0.530889,0.496336,0.484747,0.548073,...,0.507209,0.507209,0.507209,0.507209,0.507209,0.507209,0.507209,0.507209,0.507209,0.507209


In [4]:
settings = Data_Settings(TIME_PERIOD=_const.TimePeriod.SEASONAL_DAY_OF_WEEK)


data = Data(settings).set_data(time_series_df=df)
data.loadshape

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,...,12,13,14,15,16,17,18,19,20,21
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
id1,0.511339,0.503907,0.507128,0.498457,0.495924,0.496906,0.498048,0.513948,0.505046,0.507621,...,0.510816,0.508783,0.493736,0.499378,0.496749,0.50726,0.49394,0.49394,0.49394,0.49394
id2,0.50281,0.513786,0.5008,0.496712,0.485501,0.498,0.497817,0.506996,0.494967,0.509893,...,0.501165,0.502095,0.501979,0.503407,0.501799,0.510316,0.501904,0.501904,0.501904,0.501904
id3,0.502164,0.506218,0.495945,0.4995,0.510796,0.505319,0.497791,0.492094,0.505805,0.500109,...,0.492252,0.495058,0.497563,0.496854,0.508655,0.510244,0.502793,0.502793,0.502793,0.502793


## Unstacked loadshapes

In [5]:
# Assuming ids is a list of unique ids
ids = ["id1", "id2", "id3"]

# Create a range of values
values = range(1, _const.time_period_row_counts["day_of_week"] + 1)
row_cnt_per_id = 1

# Repeat each id len(values) times and tile values len(ids) times
df_new = pd.DataFrame({
    'id': np.repeat(ids, row_cnt_per_id),
    **{str(i): np.random.randint(1, 100, len(ids) * row_cnt_per_id) for i in range(1, len(values) + 1)}
})

# Create a boolean mask with True values representing 10% of the total number of elements
mask = np.random.choice([True, False], size=df_new.drop('id', axis=1).shape, p=[0.2, 0.8])

# Use the mask to set 10% of the values in df_new to NaN, excluding 'id' column
df_new.loc[:, df_new.columns != 'id'] = df_new.drop('id', axis=1).where(~mask, np.nan)

df_new

Unnamed: 0,id,1,2,3,4,5,6,7
0,id1,57,41,,,11.0,38.0,97
1,id2,44,49,68.0,35.0,91.0,,90
2,id3,26,16,6.0,,,,64


In [6]:
settings = Data_Settings(AGG_TYPE = None, LOADSHAPE_TYPE = None, TIME_PERIOD = None, INTERPOLATE_MISSING = True)
unstack_df = df_new.copy()
data_new = Data(settings).set_data(loadshape_df=unstack_df)
data_new.loadshape

Unnamed: 0_level_0,1,2,3,4,5,6,7
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
id1,57.0,41.0,31.0,21.0,11.0,38.0,97.0
id2,44.0,49.0,68.0,35.0,91.0,90.5,90.0
id3,26.0,16.0,6.0,20.5,35.0,49.5,64.0


## Normal Loadshapes