In [None]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
import subprocess

# Define dataset path
dataset_path = './dataset/LEAD_temp/'
df = pd.read_csv('./imputed_train.csv')

# Make sure the dataset path exists
os.makedirs(dataset_path, exist_ok=True)

# Loop through each unique building_id
for bid in df.building_id.unique():
    print("*" * 40 + ' ' + str(bid))

    # Filter the DataFrame for the current building_id
    df1 = df.loc[df['building_id'] == bid]

    # Define the target labels (anomaly column)
    y = df1['anomaly']

    # Split the data into train and test sets (30% each)
    df_train, df_test = train_test_split(df1, test_size=0.3, stratify=y, random_state=42)

    # Prepare training data
    df_train = df_train.rename(columns={'meter_reading': 'feature_0'})
    df_train = df_train.drop(columns=['building_id', 'anomaly'])
    train_path = os.path.join(dataset_path, f'train_{bid}.csv')
    df_train.to_csv(train_path, index=False)

    # Prepare test data
    df_test = df_test.rename(columns={'meter_reading': 'feature_0'})
    df_test = df_test.drop(columns=['building_id', 'anomaly'])
    test_path = os.path.join(dataset_path, f'test_{bid}.csv')
    df_test.to_csv(test_path, index=False)

    # Prepare test labels (drop feature_0 column)
    df_test_labels = df_test.drop(columns=['feature_0'])
    test_label_path = os.path.join(dataset_path, f'test_label_{bid}.csv')
    df_test_labels.to_csv(test_label_path, index=False)

    # Execute the shell script for anomaly detection
    !bash ./scripts/few_shot_anomaly_detection/UniTS_finetune_few_shot_anomaly_detection.sh
    
    print("\n" * 3)  # Print newlines for separation between iterations