In [1]:
import dill
fname = 'agent_objects/gpt-4o/classification/Qtime_text_image_C3_B3/AtrialFibrillation.pkl'
with open(fname, "rb") as f:
    agent = dill.load(f)

2024-12-14 20:08:20.642954: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:479] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-12-14 20:08:20.665515: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:10575] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-12-14 20:08:20.665553: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1442] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-12-14 20:08:20.680048: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
agent.user_requirements

{'task_description': 'The user wants to build a classification model to categorize ECG signals into three types of atrial fibrillation (AF) using a dataset of ECG records. The model should be deployable on wearable devices like Fitbit trackers.',
 'data_aspects': {'name': 'PhysioNet ECG Dataset',
  'description': 'The dataset consists of two-channel ECG recordings created from data used in the Computers in Cardiology Challenge 2004. It includes 5-second segments of atrial fibrillation, with each signal sampled at 128 samples per second.',
  'features': 'The dataset contains two 1-D ECG signals per instance. The class labels are: n (non-terminating AF), s (self-terminating AF after at least one minute), and t (terminating immediately within one second).',
  'context': 'The dataset was part of an open competition aimed at developing automated methods for predicting spontaneous termination of atrial fibrillation.',
  'patterns': "The time series plots show distinct patterns for each class

### Multimodal Time Series Generation and Understanding

- Plain numerical TS
- Basic Data Description
- TS Image-based Description by Self-Analysis
- TS Image (combined if multivariate)

In [None]:
import numpy as np
import pandas as pd

from utils.data_desc import CLS_DATASETS, REG_DATASETS
from utils.data_desc import data_contexts, feature_descriptions, feature_names
from utils.data_loader import load_dataset

In [None]:
import os
import matplotlib.pyplot as plt

for data_name in sorted(REG_DATASETS):
    task = "regression"
    X_train, y_train, _, _ = load_dataset(data_name, task)
    q75, q25 = np.percentile(y_train, [75 ,25])
    print(data_name, X_train.shape, q25, q75)
    class_numbers = ['q1', 'iqr', 'q3']
    for cname in class_numbers:
        if cname == 'q1':
            cidx = np.where(y_train <= q25)[0]
        elif cname == 'iqr':
            cidx = np.where((y_train > q25) & (y_train < q75))[0]
        else:
            cidx = np.where(y_train >= q75)[0]

        # group ALL training samples into ONE sample using mean ± 2 * std as representative class samples OR bin sample for regression
        ts_mean = np.mean(X_train[cidx], axis=0)
        ts_std = np.std(X_train[cidx], axis=0)
        ts_upper = ts_mean + (2 * ts_std)
        ts_lower = ts_mean - (2 * ts_std)
        # save values for numerical modality
        os.makedirs(f"ts_values/{data_name}", exist_ok=True)
   
        # group ALL training samples into ONE sample using mean ± 2 * std as representative class samples OR bin sample for regression
        ts_mean = np.mean(X_train[cidx], axis=0)
        ts_std = np.std(X_train[cidx], axis=0)
        ts_upper = ts_mean + (2 * ts_std)
        ts_lower = ts_mean - (2 * ts_std)
        t = np.arange(X_train.shape[1])
        n_features = X_train.shape[2]

        fsize = 1024 / 300
        fig, axs = plt.subplots(n_features, 1, figsize=(fsize, fsize), dpi=300)
        for fidx in range(n_features):
            if n_features > 1:
                axs[fidx].plot(t, ts_mean[:, fidx])
                axs[fidx].fill_between(t, ts_lower[:, fidx], ts_upper[:, fidx], color="b", alpha=0.1)
                axs[fidx].set_title(feature_names[data_name][fidx])
            else:
                axs.plot(t, ts_mean[:, fidx])
                axs.fill_between(t, ts_lower[:, fidx], ts_upper[:, fidx], color="b", alpha=0.1)
                axs.set_title(feature_names[data_name][fidx])

        if cname == 'q1':
            # fig.suptitle(f'Time Series Plot for Label Values "<= {q25:.2f}"',fontweight="bold")
            np.save(f"ts_values/{data_name}/lower-than-{q25:.2f}_mean.npy", ts_mean)
            np.save(f"ts_values/{data_name}/lower-than-{q25:.2f}_std.npy", ts_std) 
        elif cname == 'iqr':
            # fig.suptitle(f'Time Series Plot for Label Values "between {q25:.2f} and {q75:.2f}"',fontweight="bold")
            np.save(f"ts_values/{data_name}/between-{q25:.2f}-and-{q75:.2f}_mean.npy", ts_mean)
            np.save(f"ts_values/{data_name}/between-{q25:.2f}-and-{q75:.2f}_std.npy", ts_std) 
        else:
            # fig.suptitle(f'Time Series Plot for Label Values ">= {q75:.2f}"',fontweight="bold")
            np.save(f"ts_values/{data_name}/higher-than-{q75:.2f}_mean.npy", ts_mean)
            np.save(f"ts_values/{data_name}/higher-than-{q75:.2f}_std.npy", ts_std)            

        plt.tight_layout()
        os.makedirs(f"ts_images/{data_name}", exist_ok=True)
        plt.savefig(f"ts_images/{data_name}/{cname}.png", bbox_inches="tight")
        # plt.show()

In [None]:
import os
import matplotlib.pyplot as plt

for data_name in sorted(CLS_DATASETS):
    task = "classification"
    X_train, y_train, _, _, class_names = load_dataset(data_name, task)
    class_numbers = np.unique(y_train)
    print(data_name, X_train.shape, class_numbers)
    for cnum in class_numbers:
        cidx = np.where(y_train == cnum)[0]
        cname = class_names[cnum].replace(".0", "")
        # group ALL training samples into ONE sample using mean ± 2 * std as representative class samples OR bin sample for regression
        ts_mean = np.mean(X_train[cidx], axis=0)
        ts_std = np.std(X_train[cidx], axis=0)
        ts_upper = ts_mean + (2 * ts_std)
        ts_lower = ts_mean - (2 * ts_std)
        # save values for numerical modality
        os.makedirs(f"ts_values/{data_name}", exist_ok=True)
        np.save(f"ts_values/{data_name}/{cname}_mean.npy", ts_mean)
        np.save(f"ts_values/{data_name}/{cname}_std.npy", ts_std)                
        
        t = np.arange(X_train.shape[1])
        n_features = X_train.shape[2]

        fsize = 1024 / 300
        fig, axs = plt.subplots(n_features, 1, figsize=(fsize, fsize), dpi=300)
        for fidx in range(n_features):
            if n_features > 1:
                axs[fidx].plot(t, ts_mean[:, fidx])
                axs[fidx].fill_between(
                    t, ts_lower[:, fidx], ts_upper[:, fidx], color="b", alpha=0.1
                )
                axs[fidx].set_title(feature_names[data_name][fidx])
            else:
                axs.plot(t, ts_mean[:, fidx])
                axs.fill_between(
                    t, ts_lower[:, fidx], ts_upper[:, fidx], color="b", alpha=0.1
                )
                axs.set_title(feature_names[data_name][fidx])

        # fig.suptitle(
        #     f'Time Series Plot for "{cname}" Class Label',
        #     fontweight="bold",
        # )
        plt.tight_layout()
        os.makedirs(f"ts_images/{data_name}", exist_ok=True)
        plt.savefig(f"ts_images/{data_name}/{cname}.png", bbox_inches="tight")
        # plt.show()

In [None]:
glob('ts_values/AppliancesEnergy/*_mean.npy')[0].split('/')[-1].split('_')

In [None]:
from glob import glob
from exp_prompts import complete_by_values, complete_by_contexts, complete_by_images, complete_mm_prompt

print(complete_mm_prompt(task='classification', query_type=['time', 'text', 'image'], data_name='UCIHAR'))

In [None]:
# import base64
# from configs import Configs
# from openai import OpenAI

# client = OpenAI(api_key=Configs.OPENAI_KEY)


# # Function to encode the image
# def encode_image(image_path):
#     with open(image_path, "rb") as image_file:
#         return base64.b64encode(image_file.read()).decode("utf-8")
    

In [None]:
# # Path to your image
# image_path = "path_to_your_image.jpg"
# # Getting the base64 string
# base64_image = encode_image(image_path)

# response = client.chat.completions.create(
#   model="gpt-4o-mini",
#   messages=[
#     {
#       "role": "user",
#       "content": [
#         {
#           "type": "text",
#           "text": "What is in this image?",
#         },
#         {
#           "type": "image_url",
#           "image_url": {
#             "url":  f"data:image/jpeg;base64,{base64_image}"
#           },
#         },
#       ],
#     }
#   ],
# )

# print(response.choices[0])
