In [1]:
import subprocess
import sys

for package in ['gridmeter', 'pandas', 'plotnine']:
    try:
        __import__(package)
    except ImportError:
        subprocess.check_call([sys.executable, "-m", "pip", "install", package])

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import gridmeter as gm

In [2]:
# Loadshape and feature data

n_treatment = 100
n_pool = 1000

filter = lambda df: df[(df.summer_usage > df.summer_usage.quantile(0.5)) & (df.winter_usage > df.winter_usage.quantile(0.5))]
population = gm.DummyTreatmentPoolPopulation(n_treatment=n_treatment, n_pool=n_pool, 
                                              treatment_filter_function=filter)

df_features= population.features()
df_features_pool = df_features[df_features['set']=='pool']
df_features_treatment = df_features[df_features['set']=='treatment']
df_loadshape = population.features_monthly()

df_features_pool = df_features_pool.rename(columns={'meter_id':'id'}).drop(columns=['set'])
df_ls_pool = df_loadshape.loc[df_features_pool["id"]]
df_ls_pool = df_ls_pool.stack().reset_index().rename(columns={'meter_id':'id', 'month':'time', 0:'loadshape'})

df_features_treatment = df_features_treatment.rename(columns={'meter_id':'id'}).drop(columns=['set'])
df_ls_treatment = df_loadshape.loc[df_features_treatment["id"]]
df_ls_treatment = df_ls_treatment.stack().reset_index().rename(columns={'meter_id':'id', 'month':'time', 0:'loadshape'})

# Fake time series data
# Create a testing dataframe having an id, datetime of 1 month intervals, observed and modeled values 
num_intervals = 12  # 1 per month

# Create a DataFrame with 'id', 'datetime', 'observed', and 'modeled' columns
df_ts_treatment = pd.DataFrame({
    'id': np.repeat(df_features_treatment["id"].values, num_intervals),  # only 3 ids for easier comparison
    'datetime': pd.date_range(start='2023-01-01', periods=num_intervals, freq='M').tolist() * n_treatment, 
    'observed': np.random.rand(num_intervals * n_treatment),  # randomized
    'modeled': np.random.rand(num_intervals * n_treatment)  # randomized
})

INFO:root:Caching objects to .cache


In [3]:
# Set data classes
# for the purposes of this tutorial, we'll set both loadshapes and features

data_settings = gm.Data_Settings(AGG_TYPE=None, LOADSHAPE_TYPE=None, time_period=None)

pool_data = gm.Data(data_settings)
pool_data.set_data(loadshape_df=df_ls_pool, features_df=df_features_pool)

treatment_data = gm.Data(data_settings)
treatment_data.set_data(loadshape_df=df_ls_treatment, features_df=df_features_treatment)

<gridmeter._utils.data_processing.Data at 0x7faa4ff28250>

In [4]:
clustering_settings = gm.Clustering_Settings()
df_cg, df_t_coeffs = gm.Clustering(clustering_settings).get_comparison_group(treatment_data, pool_data)
df_cg

Unnamed: 0_level_0,cluster
id,Unnamed: 1_level_1
meter_540,-1
meter_884,-1
meter_526,-1
meter_528,-1
meter_529,-1
...,...
meter_843,17
meter_715,17
meter_1037,17
meter_966,17


In [5]:
np.unique(df_cg, return_counts=True)

(array([-1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
        16, 17]),
 array([718,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,
         15,  15,  15,  15,  15,  15]))

In [6]:
df_t_coeffs

Unnamed: 0_level_0,pct_cluster_0,pct_cluster_1,pct_cluster_2,pct_cluster_3,pct_cluster_4,pct_cluster_5,pct_cluster_6,pct_cluster_7,pct_cluster_8,pct_cluster_9,pct_cluster_10,pct_cluster_11,pct_cluster_12,pct_cluster_13,pct_cluster_14,pct_cluster_15,pct_cluster_16,pct_cluster_17
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
meter_0,0.000000,0.000000,0.000000,0.000000,0.000000,0.146114,0.699488,0.087208,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.067190,0.000000,0.000000,0.000000
meter_100,0.000000,0.000000,0.000000,0.000000,0.102607,0.185856,0.004853,0.000000,0.000000,0.000000,0.692002,0.000000,0.000000,0.007339,0.004280,0.003063,0.000000,0.000000
meter_1001,0.000007,0.741419,0.018993,0.000020,0.000000,0.000004,0.000000,0.086968,0.000055,0.000086,0.000185,0.000041,0.000049,0.000015,0.000000,0.000014,0.132058,0.020087
meter_1012,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.601734,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.398266,0.000000,0.000000,0.000000
meter_1016,0.000283,0.000000,0.000000,0.338514,0.277650,0.000000,0.021688,0.020005,0.000394,0.193261,0.000126,0.000035,0.000013,0.000000,0.147790,0.000000,0.000000,0.000242
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
meter_928,0.000000,0.375161,0.000000,0.000000,0.000000,0.000000,0.000000,0.487172,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.031563,0.000000,0.000000,0.106104
meter_94,0.000000,0.000000,0.133041,0.000000,0.184343,0.000000,0.013029,0.000000,0.000000,0.000000,0.000000,0.000000,0.000001,0.000001,0.511004,0.000000,0.158581,0.000000
meter_963,0.000000,0.054396,0.005232,0.000000,0.000000,0.165016,0.000000,0.046163,0.000000,0.000000,0.198056,0.160730,0.088183,0.000000,0.000000,0.282224,0.000000,0.000000
meter_98,0.000001,0.000000,0.000000,0.000000,0.222379,0.000000,0.204836,0.174262,0.000000,0.000933,0.000000,0.000000,0.220176,0.030730,0.068607,0.078075,0.000000,0.000000
