In [None]:
# Generate synthetic energy panel (2 years daily × 500 households)
import numpy as np
import pandas as pd
from datetime import datetime, timedelta

In [None]:
def treat_effect(event_day):
    at = np.maximum(event_day,0)
    return -1.0*(at>=0) - 0.02*np.minimum(at,60)

In [None]:
rng = np.random.default_rng(123)

In [None]:
N_HOUSEHOLDS = 500
T_DAYS = 365*2
START_DATE = datetime(2023,1,1)

house_id = np.arange(1, N_HOUSEHOLDS+1)
region = rng.integers(1,6,size=N_HOUSEHOLDS)
dwelling_type = rng.choice(['apartment','detached','semi'], size=N_HOUSEHOLDS, p=[0.4,0.4,0.2])
income_class = rng.choice(['low','mid','high'], size=N_HOUSEHOLDS, p=[0.3,0.5,0.2])
baseline_kwh = rng.normal(25,5,size=N_HOUSEHOLDS)
efficiency = rng.beta(2,5,size=N_HOUSEHOLDS)
house_fe = rng.normal(0,2,size=N_HOUSEHOLDS)

treated_flag = rng.random(N_HOUSEHOLDS) < 0.5
adopt_day = np.full(N_HOUSEHOLDS, np.inf)
adopt_day[treated_flag] = rng.integers(30, T_DAYS-30, treated_flag.sum())

In [None]:
dates = [START_DATE + timedelta(days=t) for t in range(T_DAYS)]
day_idx = np.arange(T_DAYS)
temp = 20 + 10*np.sin(2*np.pi*day_idx/365) + rng.normal(0,2,size=T_DAYS)
price_index = 100 + 0.05*day_idx + 5*np.sin(2*np.pi*day_idx/180)
is_weekend = np.array([d.weekday()>=5 for d in dates], dtype=int)

rows=[]
cons_prev = rng.normal(25,5,size=N_HOUSEHOLDS)
for t in range(T_DAYS):
    treat_t = (t >= adopt_day).astype(float)
    event_t = t - adopt_day
    tau = treat_effect(event_t)
    eps = rng.normal(0,2,size=N_HOUSEHOLDS)
    cons = baseline_kwh + house_fe \
           + 0.5*temp[t] + (-0.01)*price_index[t] \
           - 2*efficiency*temp[t]/30.0 \
           -0.5*is_weekend[t] \
           + tau + 0.6*cons_prev + eps
    prob_high = 1/(1+np.exp(-(cons-30)/5))
    high_usage = rng.binomial(1, prob_high)
    mask = rng.random(N_HOUSEHOLDS)<0.02
    cons[mask] = np.nan
    for i in range(N_HOUSEHOLDS):
        rows.append((house_id[i],t,dates[t],region[i],dwelling_type[i],income_class[i],efficiency[i],
                     treat_t[i],adopt_day[i] if np.isfinite(adopt_day[i]) else -1,
                     temp[t],price_index[t],is_weekend[t],cons[i],high_usage[i]))
    cons_prev = np.nan_to_num(cons, nan=np.nanmean(cons_prev))

cols = ["house_id","t","date","region","dwelling_type","income_class","efficiency",
        "treatment","adopt_day","temperature","price_index","is_weekend",
        "kwh","high_usage"]

energy_df = pd.DataFrame(rows,columns=cols)
energy_df["post"] = (energy_df["t"] >= energy_df["adopt_day"]).astype(int)
energy_df.loc[energy_df["adopt_day"]<0,"post"]=0
energy_df["event_time"] = energy_df["t"]-energy_df["adopt_day"]
energy_df.loc[energy_df["adopt_day"]<0,"event_time"]=np.nan

# Ready to use
print(energy_df.head())


   house_id  t       date  region dwelling_type income_class  efficiency  \
0         1  0 2023-01-01       1      detached         high    0.377498   
1         2  0 2023-01-01       4      detached         high    0.112848   
2         3  0 2023-01-01       3      detached          low    0.138131   
3         4  0 2023-01-01       1          semi          mid    0.093936   
4         5  0 2023-01-01       5      detached          low    0.280438   

   treatment  adopt_day  temperature  price_index  is_weekend        kwh  \
0        0.0      397.0    21.095264        100.0           1  47.234640   
1        0.0       -1.0    21.095264        100.0           1  49.606964   
2        0.0      250.0    21.095264        100.0           1  41.784376   
3        0.0      338.0    21.095264        100.0           1  51.684636   
4        0.0       -1.0    21.095264        100.0           1        NaN   

   high_usage  post  event_time  
0           1     0      -397.0  
1           1     

In [3]:
energy_df.to_csv("energy_data_panel.csv", index=False)