In [1]:

#mount drive
from google.colab import drive
drive.mount('/content/MyDrive')
import seaborn as sns
sns.set_theme("paper")



Drive already mounted at /content/MyDrive; to attempt to forcibly remount, call drive.mount("/content/MyDrive", force_remount=True).


In [2]:
# @title Initialize Config

import torch
import numpy
class Config:
    def __init__(self, **kwargs):
        self.channels_imu_acc = kwargs.get('channels_imu_acc', [])
        self.channels_imu_gyr = kwargs.get('channels_imu_gyr', [])
        self.channels_joints = kwargs.get('channels_joints', [])
        self.channels_emg = kwargs.get('channels_emg', [])
        self.seed = kwargs.get('seed', 42)
        self.data_folder_name = kwargs.get('data_folder_name', 'default_data_folder_name')
        self.dataset_root = kwargs.get('dataset_root', 'default_dataset_root')
        self.imu_transforms = kwargs.get('imu_transforms', [])
        self.joint_transforms = kwargs.get('joint_transforms', [])
        self.emg_transforms = kwargs.get('emg_transforms', [])
        self.input_format = kwargs.get('input_format', 'csv')


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

config = Config(
    data_folder_name='/content/MyDrive/MyDrive/sd_datacollection_v4/all_subjects_data_final.h5',
    dataset_root='/content/datasets',
    input_format="csv",
    channels_imu_acc=['ACCX1', 'ACCY1', 'ACCZ1','ACCX2', 'ACCY2', 'ACCZ2', 'ACCX3', 'ACCY3', 'ACCZ3', 'ACCX4', 'ACCY4', 'ACCZ4', 'ACCX5', 'ACCY5', 'ACCZ5', 'ACCX6', 'ACCY6', 'ACCZ6'],
    channels_imu_gyr=['GYROX1', 'GYROY1', 'GYROZ1', 'GYROX2', 'GYROY2', 'GYROZ2', 'GYROX3', 'GYROY3', 'GYROZ3', 'GYROX4', 'GYROY4', 'GYROZ4', 'GYROX5', 'GYROY5', 'GYROZ5', 'GYROX6', 'GYROY6', 'GYROZ6'],
    channels_joints=['elbow_flex_r', 'arm_flex_r', 'arm_add_r'],
    channels_emg=['IM EMG4', 'IM EMG5', 'IM EMG6'],
)

#set seeds
torch.manual_seed(config.seed)
numpy.random.seed(config.seed)


In [3]:
class DataSharder:
    def __init__(self, config, split):
        self.config = config
        self.h5_file_path = config.data_folder_name  # Path to the HDF5 file
        self.split = split

    def load_data(self, subjects, window_length, window_overlap, dataset_name):
        print(f"Processing subjects: {subjects} with window length: {window_length}, overlap: {window_overlap}")

        self.window_length = window_length
        self.window_overlap = window_overlap

        # Process the data from the HDF5 file
        self._process_and_save_patients_h5(subjects, dataset_name)

    def _process_and_save_patients_h5(self, subjects, dataset_name):
        # Open the HDF5 file
        with h5py.File(self.h5_file_path, 'r') as h5_file:
            dataset_folder = os.path.join(self.config.dataset_root, dataset_name, self.split).replace("subject", "").replace("__", "_")
            print("Dataset folder:", dataset_folder)

            if os.path.exists(dataset_folder):
                print("Dataset Exists, Skipping...")
                return

            os.makedirs(dataset_folder, exist_ok=True)
            print("Dataset folder created: ", dataset_folder)

            for subject_id in tqdm(subjects, desc="Processing subjects"):
                subject_key = subject_id
                if subject_key not in h5_file:
                    print(f"Subject {subject_key} not found in the HDF5 file. Skipping.")
                    continue

                subject_data = h5_file[subject_key]
                session_keys = list(subject_data.keys())  # Sessions for this subject

                for session_id in session_keys:
                    session_data_group = subject_data[session_id]

                    for sessions_speed in session_data_group.keys():
                        session_data = session_data_group[sessions_speed]

                        # Extract IMU, EMG, and Joint data as numpy arrays
                        imu_data, imu_columns = self._extract_channel_data(session_data, self.config.channels_imu_acc + self.config.channels_imu_gyr)
                        emg_data, emg_columns = self._extract_channel_data(session_data, self.config.channels_emg)
                        joint_data, joint_columns = self._extract_channel_data(session_data, self.config.channels_joints)

                        # Shard the data into windows and save each window
                        self._save_windowed_data(imu_data, emg_data, joint_data, subject_key, session_id,sessions_speed, dataset_folder, imu_columns, emg_columns, joint_columns)

    def _save_windowed_data(self, imu_data, emg_data, joint_data, subject_key, session_id, session_speed, dataset_folder, imu_columns, emg_columns, joint_columns):
        window_size = self.window_length
        overlap = self.window_overlap
        step_size = window_size - overlap

        # Path to the CSV log file
        csv_file_path = os.path.join(dataset_folder, '..', f"{self.split}_info.csv")

        # Ensure the folder exists
        os.makedirs(dataset_folder, exist_ok=True)

        # Prepare CSV log headers (ensure the columns are 'file_name' and 'file_path')
        csv_headers = ['file_name', 'file_path']

        # Create or append to the CSV log file
        file_exists = os.path.isfile(csv_file_path)
        with open(csv_file_path, mode='a', newline='') as csv_file:
            writer = csv.writer(csv_file)

            # Write the headers only if the file is new
            if not file_exists:
                writer.writerow(csv_headers)

            # Determine the total data length based on the minimum length across the data sources
            total_data_length = min(imu_data.shape[1], emg_data.shape[1], joint_data.shape[1])

            # Adjust the starting point for windows based on total data length
            start = 2000 if total_data_length > 4000 else 0

            # Ensure that each window across imu_data, emg_data, and joint_data has the same shape before concatenation
            for i in range(start, total_data_length - window_size + 1, step_size):
                imu_window = imu_data[:, i:i + window_size]
                emg_window = emg_data[:, i:i + window_size]
                joint_window = joint_data[:, i:i + window_size]

                # Check if the window sizes are valid
                if imu_window.shape[1] == window_size and emg_window.shape[1] == window_size and joint_window.shape[1] == window_size:
                    # Convert windowed data to pandas DataFrame



                    imu_df = pd.DataFrame(imu_window.T, columns=imu_columns)
                    emg_df = pd.DataFrame(emg_window.T, columns=emg_columns)
                    joint_df = pd.DataFrame(joint_window.T, columns=joint_columns)



                    # Concatenate the data along the column axis
                    combined_df = pd.concat([imu_df, emg_df, joint_df], axis=1)

                    # Save the combined windowed data as a CSV file
                    file_name = f"{subject_key}_{session_id}_{session_speed}_win_{i}_ws{window_size}_ol{overlap}.csv"
                    file_path = os.path.join(dataset_folder, file_name)
                    combined_df.to_csv(file_path, index=False)

                    # Log the file name and path in the CSV (in the correct columns)
                    writer.writerow([file_name, file_path])
                else:
                    print(f"Skipping window {i} due to mismatched window sizes.")

    def _extract_channel_data(self, session_data, channels):
      extracted_data = []
      new_column_names = []  # Initialize here

      if isinstance(session_data, h5py.Dataset):
          if session_data.dtype.names:
              # Compound dataset
              column_names = session_data.dtype.names
              for channel in channels:
                  if channel in column_names:
                      channel_data = session_data[channel][:]
                      channel_data = pd.to_numeric(channel_data, errors='coerce')
                      df = pd.DataFrame(channel_data)
                      df_interpolated = df.interpolate(method='linear', axis=0, limit_direction='both')
                      extracted_data.append(df_interpolated.to_numpy().flatten())
                      new_column_names.append(channel)  # Populate here
                  else:
                      print(f"Channel {channel} not found in compound dataset.")
          else:
              # Simple dataset
              column_names = list(session_data.attrs.get('column_names', []))
              assert len(column_names) > 0, "column_names not found in dataset attributes"
              for channel in channels:
                  if channel in column_names:
                      col_idx = column_names.index(channel)
                      channel_data = session_data[:, col_idx]
                      channel_data = pd.to_numeric(channel_data, errors='coerce')
                      df = pd.DataFrame(channel_data)
                      df_interpolated = df.interpolate(method='linear', axis=0, limit_direction='both')
                      extracted_data.append(df_interpolated.to_numpy().flatten())
                      new_column_names.append(channel)
                  else:
                      print(f"Channel {channel} not found in session data.")

      return np.array(extracted_data), new_column_names


In [4]:
# @title Dataset creation
from torch.utils.data import Dataset, DataLoader
from torch.utils.data import random_split
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from torch.utils.data import ConcatDataset
import random
from torch.utils.data import TensorDataset

class ImuJointPairDataset(Dataset):
    def __init__(self, config, subjects, window_length, window_overlap, split='train', dataset_train_name='train', dataset_test_name='test'):
        self.config = config
        self.split = split
        self.subjects = subjects
        self.window_length = window_length
        self.window_overlap = window_overlap if split == 'train' else 0
        self.input_format = config.input_format
        self.channels_imu_acc = config.channels_imu_acc
        self.channels_imu_gyr = config.channels_imu_gyr
        self.channels_joints = config.channels_joints
        self.channels_emg = config.channels_emg

        # Convert the list of subjects to a string that is path-safe
        subjects_str = "_".join(map(str, subjects)).replace('subject', '').replace('__', '_')

        # Use dataset_train_name or dataset_test_name based on split
        if split == 'train':
            dataset_name = f"dataset_wl{self.window_length}_ol{self.window_overlap}_train{subjects_str}"
        else:
            dataset_name = f"dataset_wl{self.window_length}_ol{self.window_overlap}_test{subjects_str}"

        self.dataset_name = dataset_name

        # Define the root directory based on dataset name
        self.root_dir = os.path.join(self.config.dataset_root, self.dataset_name)

        # Ensure sharded data exists, if not, reshard
        self.ensure_resharded(subjects, dataset_train_name if split == 'train' else dataset_test_name)

        info_path = os.path.join(self.root_dir, f"{split}_info.csv")
        self.data = pd.read_csv(info_path)

    def ensure_resharded(self, subjects, dataset_name):
        if not os.path.exists(self.root_dir):
            print(f"Sharded data not found at {self.root_dir}. Resharding...")
            data_sharder = DataSharder(self.config,self.split)
            # Pass dynamic parameters to sharder
            data_sharder.load_data(subjects, window_length=self.window_length, window_overlap=self.window_overlap, dataset_name=self.dataset_name)
        else:
            print(f"Sharded data found at {self.root_dir}. Skipping resharding.")

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        file_path = os.path.join(self.root_dir,self.split, self.data.iloc[idx, 0])

        if self.input_format == "csv":
            combined_data = pd.read_csv(file_path)
        else:
            raise ValueError("Unsupported input format: {}".format(self.input_format))

        imu_data_acc, imu_data_gyr, joint_data, emg_data = self._extract_and_transform(combined_data)
        return imu_data_acc, imu_data_gyr, joint_data, emg_data

    def _extract_and_transform(self, combined_data):
        imu_data_acc = self._extract_channels(combined_data, self.channels_imu_acc)
        imu_data_gyr = self._extract_channels(combined_data, self.channels_imu_gyr)
        joint_data = self._extract_channels(combined_data, self.channels_joints)
        emg_data = self._extract_channels(combined_data, self.channels_emg)

        imu_data_acc = self.apply_transforms(imu_data_acc, self.config.imu_transforms)
        imu_data_gyr = self.apply_transforms(imu_data_gyr, self.config.imu_transforms)
        joint_data = self.apply_transforms(joint_data, self.config.joint_transforms)
        emg_data = self.apply_transforms(emg_data, self.config.emg_transforms)

        return imu_data_acc, imu_data_gyr, joint_data, emg_data

    def _extract_channels(self, combined_data, channels):
        return combined_data[channels].values if self.input_format == "csv" else combined_data[:, channels]

    def apply_transforms(self, data, transforms):
        for transform in transforms:
            data = transform(data)
        return torch.tensor(data, dtype=torch.float32)

def create_base_data_loaders(
    config,
    train_subjects,
    test_subjects,
    window_length=100,
    window_overlap=75,
    batch_size=64,
    dataset_train_name='train',
    dataset_test_name='test'
):
    # Create datasets with explicit parameters
    train_dataset = ImuJointPairDataset(
        config=config,
        subjects=train_subjects,
        window_length=window_length,
        window_overlap=window_overlap,
        split='train',
        dataset_train_name=dataset_train_name
    )

    test_dataset = ImuJointPairDataset(
        config=config,
        subjects=test_subjects,
        window_length=window_length,
        window_overlap=window_overlap,
        split='test',
        dataset_test_name=dataset_test_name
    )

    # Split train dataset into training and validation sets
    train_size = int(0.9 * len(train_dataset))
    val_size = len(train_dataset) - train_size
    train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])

    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, val_loader, test_loader



In [5]:
# @title Kinematicsnet Architecture
import torch
import torch.nn as nn
import torch.nn.functional as F
import time
from scipy.signal import butter, filtfilt
from sklearn.metrics import mean_squared_error
import numpy as np
class Encoder_1(nn.Module):
    def __init__(self, input_dim, dropout):
        super(Encoder_1, self).__init__()
        self.lstm_1 = nn.LSTM(input_dim, 128, bidirectional=True, batch_first=True, dropout=0)
        self.lstm_2 = nn.LSTM(256, 64, bidirectional=True, batch_first=True, dropout=0)
        self.flatten = nn.Flatten()
        self.fc = nn.Linear(128, 32)
        self.dropout_1 = nn.Dropout(dropout)
        self.dropout_2 = nn.Dropout(dropout)

    def forward(self, x):
        out_1, (h_1, _) = self.lstm_1(x)
        out_1 = self.dropout_1(out_1)
        out_2, (h_2, _) = self.lstm_2(out_1)
        out_2 = self.dropout_2(out_2)
        return out_2, (h_1, h_2)

class Encoder_2(nn.Module):
    def __init__(self, input_dim, dropout):
        super(Encoder_2, self).__init__()
        self.gru_1 = nn.GRU(input_dim, 128, bidirectional=True, batch_first=True, dropout=0)
        self.gru_2 = nn.GRU(256, 64, bidirectional=True, batch_first=True, dropout=0)
        self.flatten = nn.Flatten()
        self.fc = nn.Linear(128, 32)
        self.dropout_1 = nn.Dropout(dropout)
        self.dropout_2 = nn.Dropout(dropout)

    def forward(self, x):
        out_1, h_1 = self.gru_1(x)
        out_1 = self.dropout_1(out_1)
        out_2, h_2 = self.gru_2(out_1)
        out_2 = self.dropout_2(out_2)
        return out_2, (h_1, h_2)


class GatingModule(nn.Module):
    def __init__(self, input_size):
        super(GatingModule, self).__init__()
        self.gate = nn.Sequential(
            nn.Linear(2*input_size, input_size),
            nn.Sigmoid()
        )

    def forward(self, input1, input2):
        # Apply gating mechanism
        gate_output = self.gate(torch.cat((input1,input2),dim=-1))

        # Scale the inputs based on the gate output
        gated_input1 = input1 * gate_output
        gated_input2 = input2 * (1 - gate_output)

        # Combine the gated inputs
        output = gated_input1 + gated_input2
        return output
#variable w needs to be checked for correct value, stand-in value used
class teacher(nn.Module):
    def __init__(self, input_acc, input_gyr, input_emg, drop_prob=0.25, w=100):
        """
        input_acc, input_gyr, input_emg: Dimensionalities per modality
        drop_prob: dropout probability
        w: sequence length processed by LSTM/GRU
        """
        super(teacher, self).__init__()
        self.w = w

        # ------------------
        # Encoders
        # ------------------
        self.encoder_1_acc = Encoder_1(input_acc, drop_prob)
        self.encoder_1_gyr = Encoder_1(input_gyr, drop_prob)
        self.encoder_1_emg = Encoder_1(input_emg, drop_prob)

        self.encoder_2_acc = Encoder_2(input_acc, drop_prob)
        self.encoder_2_gyr = Encoder_2(input_gyr, drop_prob)
        self.encoder_2_emg = Encoder_2(input_emg, drop_prob)

        # ------------------
        # BatchNorms
        # ------------------
        self.BN_acc = nn.BatchNorm1d(input_acc, affine=False)
        self.BN_gyr = nn.BatchNorm1d(input_gyr, affine=False)
        self.BN_emg = nn.BatchNorm1d(input_emg, affine=False)

        # ------------------
        # Gating Modules
        # ------------------
        self.gate_1 = GatingModule(128)
        self.gate_2 = GatingModule(128)
        self.gate_3 = GatingModule(128)

        # ------------------
        # Additional Layers
        # ------------------
        self.dropout = nn.Dropout(p=0.05)
        self.fc_kd = nn.Linear(3 * 128, 2 * 128)  # Some knowledge-distillation head
        self.weighted_feat = nn.Sequential(nn.Linear(128, 1), nn.Sigmoid())
        self.attention = nn.MultiheadAttention(3 * 128, 4, batch_first=True)

        # Gating
        self.gating_net = nn.Sequential(
            nn.Linear(128 * 3, 3 * 128),
            nn.Sigmoid()
        )
        self.gating_net_1 = nn.Sequential(
            nn.Linear(2 * 3 * 128 + 128, 2 * 3 * 128 + 128),
            nn.Sigmoid()
        )

        # Final output head: shape [batch, w, 3]
        # Interpreted as "predicted noise" for each modality dimension
        self.fc = nn.Linear(2 * 3 * 128 + 128, 3)

    def forward(self, X_acc, X_gyr, X_emg, timestep):
        """
        Predict noise.
        - X_acc, X_gyr, X_emg: [batch, seq_len, feat_dim] each
        - timestep: [batch, seq_len, 1] or broadcast shape,
                    we won't use it inside the LSTM/GRUs directly here,
                    but we keep the signature consistent with the snippet.
        Return: predicted noise, shape [batch, seq_len, 3]
        """
        out, _, _ = self._base_forward(X_acc, X_gyr, X_emg)
        return out

    def _base_forward(self, x_acc, x_gyr, x_emg):
        """
        The core teacher architecture, originally returning (out, x_kd, hidden_states).
        We'll keep it intact. 'out' is used as the final predicted noise.
        """
        # Flatten to [b*w, feats]
        b, w_, f_acc = x_acc.size()
        b2, w2_, f_gyr = x_gyr.size()
        b3, w3_, f_emg = x_emg.size()

        x_acc_1 = x_acc.view(b * w_, f_acc)
        x_gyr_1 = x_gyr.view(b2 * w2_, f_gyr)
        x_emg_1 = x_emg.view(b3 * w3_, f_emg)

        # ------------------
        # BatchNorm each
        # ------------------
        x_acc_1 = self.BN_acc(x_acc_1)
        x_gyr_1 = self.BN_gyr(x_gyr_1)
        x_emg_1 = self.BN_emg(x_emg_1)

        # Reshape back
        x_acc_2 = x_acc_1.view(b, w_, f_acc)
        x_gyr_2 = x_gyr_1.view(b2, w2_, f_gyr)
        x_emg_2 = x_emg_1.view(b3, w3_, f_emg)

        # ------------------
        # Encoder 1
        # ------------------
        x_acc_1, (h_acc_1, _) = self.encoder_1_acc(x_acc_2)
        x_gyr_1, (h_gyr_1, _) = self.encoder_1_gyr(x_gyr_2)
        x_emg_1, (h_emg_1, _) = self.encoder_1_emg(x_emg_2)

        # ------------------
        # Encoder 2
        # ------------------
        x_acc_2, (h_acc_2, _) = self.encoder_2_acc(x_acc_2)
        x_gyr_2, (h_gyr_2, _) = self.encoder_2_gyr(x_gyr_2)
        x_emg_2, (h_emg_2, _) = self.encoder_2_emg(x_emg_2)

        # ------------------
        # Gating
        # ------------------
        x_acc = self.gate_1(x_acc_1, x_acc_2)  # [b, w, 128]
        x_gyr = self.gate_2(x_gyr_1, x_gyr_2)
        x_emg = self.gate_3(x_emg_1, x_emg_2)

        # Merge
        x = torch.cat((x_acc, x_gyr, x_emg), dim=-1)  # [b, w, 3*128]
        x_kd = self.fc_kd(x)  # [b, w, 2*128]

        # Multihead Attention
        out_1, attn_weights = self.attention(x, x, x)  # [b, w, 3*128]

        gating_weights = self.gating_net(x)
        out_2 = gating_weights * x

        # Weighted features
        w1 = self.weighted_feat(x[:, :, :128])    # [b, w, 1]
        w2 = self.weighted_feat(x[:, :, 128:256]) # [b, w, 1]
        w3 = self.weighted_feat(x[:, :, 256:384]) # [b, w, 1]

        x_1 = w1 * x[:, :, 0:128]
        x_2 = w2 * x[:, :, 128:256]
        x_3 = w3 * x[:, :, 256:384]
        out_3 = x_1 + x_2 + x_3

        out = torch.cat((out_1, out_2, out_3), dim=-1)  # [b, w, 9*128]
        gating_weights_1 = self.gating_net_1(out)
        out = gating_weights_1 * out

        # Final linear: produce shape [b, w, 3] = predicted noise
        out = self.fc(out)
        return out, x_kd, (h_acc_1, h_acc_2, h_gyr_1, h_gyr_2, h_emg_1, h_emg_2)



In [6]:
# @title Loss Functions
import statistics

class RMSELoss(nn.Module):
    def __init__(self):
        super(RMSELoss, self).__init__()
    def forward(self, output, target):
        loss = torch.sqrt(torch.mean((output - target) ** 2))
        return loss

#prediction function
def RMSE_prediction(yhat_4,test_y, output_dim,print_losses=True):

  s1=yhat_4.shape[0]*yhat_4.shape[1]

  test_o=test_y.reshape((s1,output_dim))
  yhat=yhat_4.reshape((s1,output_dim))




  y_1_no=yhat[:,0]
  y_2_no=yhat[:,1]
  y_3_no=yhat[:,2]

  y_1=y_1_no
  y_2=y_2_no
  y_3=y_3_no


  y_test_1=test_o[:,0]
  y_test_2=test_o[:,1]
  y_test_3=test_o[:,2]



  cutoff=6
  fs=200
  order=4

  nyq = 0.5 * fs
  ## filtering data ##
  def butter_lowpass_filter(data, cutoff, fs, order):
      normal_cutoff = cutoff / nyq
      # Get the filter coefficients
      b, a = butter(order, normal_cutoff, btype='low', analog=False)
      y = filtfilt(b, a, data)
      return y



  Z_1=y_1
  Z_2=y_2
  Z_3=y_3



  ###calculate RMSE

  rmse_1 =((np.sqrt(mean_squared_error(y_test_1,y_1))))
  rmse_2 =((np.sqrt(mean_squared_error(y_test_2,y_2))))
  rmse_3 =((np.sqrt(mean_squared_error(y_test_3,y_3))))





  p_1=np.corrcoef(y_1, y_test_1)[0, 1]
  p_2=np.corrcoef(y_2, y_test_2)[0, 1]
  p_3=np.corrcoef(y_3, y_test_3)[0, 1]




              ### Correlation ###
  p=np.array([p_1,p_2,p_3])
  #,p_4,p_5,p_6,p_7])




      #### Mean and standard deviation ####

  rmse=np.array([rmse_1,rmse_2,rmse_3])
  #,rmse_4,rmse_5,rmse_6,rmse_7])

      #### Mean and standard deviation ####
  m=statistics.mean(rmse)
  SD=statistics.stdev(rmse)


  m_c=statistics.mean(p)
  SD_c=statistics.stdev(p)


  if print_losses:
    print(rmse_1)
    print(rmse_2)
    print(rmse_3)
    print("\n")
    print(p_1)
    print(p_2)
    print(p_3)
    print('Mean: %.3f' % m,'+/- %.3f' %SD)
    print('Mean: %.3f' % m_c,'+/- %.3f' %SD_c)

  return rmse, p, Z_1,Z_2,Z_3
  #,Z_4,Z_5,Z_6,Z_7

In [7]:





# @title Model Utils

def reverse_diffusion(
    model,
    data_acc,   # [B, seq_len, feats_acc]
    data_gyr,   # [B, seq_len, feats_gyr]
    data_EMG,   # [B, seq_len, feats_emg]
    betas,      # beta_schedule_vals (1D tensor)
    num_steps,  # total diffusion steps
    device,
    batch_size, # data_acc.size(0)
    seq_len     # data_acc.size(1)
):
    """
    Start from pure noise Y_T and iteratively denoise back to Y_0.
    Returns final Y_0 estimate of shape [B, seq_len].
    This uses the model(...) to predict noise at each step.
    """
    model.eval()

    # Y_T ~ N(0,I) shape [B, seq_len]
    # We expand it to [B, seq_len, 1] to match model's expected target shape
    Y_t = torch.randn(batch_size, seq_len, 1, device=device)

    with torch.no_grad():
        for t in reversed(range(num_steps)):
            beta_t = betas[t]
            # 1) Build a timestep embedding: t / num_steps
            #    shape [B, seq_len, 1]
            time_embed = torch.full((batch_size, seq_len, 1), t/num_steps, device=device)

            # 2) Predict noise using the model
            #    The model signature: model(acc, Y_noisy, time_embed) => predicted_noise
            predicted_noise = model(data_acc, data_gyr, data_EMG,time_embed)

            # 3) Approximate Y_{t-1}
            #    If Y_t = sqrt(1-beta_t)*Y_{t-1} + sqrt(beta_t)*noise,
            #    => Y_{t-1} = (Y_t - sqrt(beta_t)*predicted_noise) / sqrt(1 - beta_t)
            Y_t_minus_1 = (Y_t - torch.sqrt(beta_t)*predicted_noise) / torch.sqrt(1 - beta_t)

            # 4) Optionally add random noise if t > 0
            #    For purely deterministic sampling, omit this noise addition
            if t > 0:
                z = torch.randn_like(Y_t)
                Y_t_minus_1 += torch.sqrt(beta_t)*z

            # Update
            Y_t = Y_t_minus_1

    # Now Y_t is Y_0 => shape [B, seq_len, 1]. Convert to [B, seq_len].
    return Y_t.squeeze(-1)

def evaluate_model(
    device,
    model,
    loader,
    criterion,            # Typically MSELoss or similar
    beta_schedule_vals,   # 1D tensor of betas
    num_steps
):
    """
    During evaluation, we do reverse diffusion to get final Y_0,
    then measure RMSE/PCC between Y_0 and the true target Y.
    """
    model.eval()
    total_loss = 0.0
    total_pcc = np.zeros(len(config.channels_joints))
    total_rmse = np.zeros(len(config.channels_joints))

    with torch.no_grad():
        for (data_acc, data_gyr, target, data_EMG) in loader:
            # Move to device
            data_acc = data_acc.to(device).float()
            data_gyr = data_gyr.to(device).float()
            data_EMG = data_EMG.to(device).float()
            target   = target.to(device).float()  # [B, seq_len]

            batch_size = data_acc.size(0)
            seq_len    = data_acc.size(1)

            # ---------------------------
            # 1) Reverse diffusion
            # ---------------------------
            Y_hat = reverse_diffusion(
                model   = model,
                data_acc= data_acc,
                data_gyr= data_gyr,
                data_EMG= data_EMG,
                betas   = beta_schedule_vals,
                num_steps= num_steps,
                device  = device,
                batch_size= batch_size,
                seq_len= seq_len
            )
            # Y_hat shape: [B, seq_len]

            # ---------------------------
            # 2) Compute MSE loss
            #    Now that we have a final guess for Y_0, we can do
            #    typical reconstruction metrics
            # ---------------------------
            # "criterion" is e.g. MSELoss, but it expects [B, seq_len, *] => might need unsqueeze
            # If your criterion is a standard MSELoss over 2D, just flatten the dims or do some reshape.
            # We'll do a simple approach: flatten both
            loss = criterion(Y_hat, target)
            total_loss += loss.item()

            # ---------------------------
            # 3) Compute RMSE/PCC channel-wise
            #    If your target is shaped [B, seq_len, #channels], adjust accordingly.
            #    If each "channel_joints" is just 1 dimension of seq_len, adapt the slicing.
            # ---------------------------
            # We'll assume 'RMSE_prediction' is a function that can handle [B, seq_len] => [channels].
            # If your real data has multiple channels, you might have shape [B, seq_len, C].
            # For demonstration, let's assume we keep it at [B, seq_len] = 1 channel only, or we map it.
            batch_rmse, batch_pcc, _, _, _ = RMSE_prediction(
                Y_hat.detach().cpu().numpy(),
                target.detach().cpu().numpy(),
                len(config.channels_joints),
                print_losses=False
            )
            total_pcc += batch_pcc
            total_rmse += batch_rmse

    avg_loss = total_loss / len(loader)
    avg_pcc  = total_pcc / len(loader)
    avg_rmse = total_rmse / len(loader)
    return avg_loss, avg_pcc, avg_rmse


def save_checkpoint(model, optimizer, epoch, filename, train_loss, val_loss, test_loss=None,
                    channelwise_metrics=None, history=None, curriculum_schedule=None):

    checkpoint = {
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'train_loss': train_loss,
        'val_loss': val_loss,
        'train_channelwise_metrics': channelwise_metrics['train'],
        'val_channelwise_metrics': channelwise_metrics['val'],
    }

    if test_loss is not None:
        checkpoint['test_loss'] = test_loss
        checkpoint['test_channelwise_metrics'] = channelwise_metrics['test']

    # Save the history (losses, PCCs, RMSEs, channel-wise metrics)
    if history:
        checkpoint['history'] = history

    # Save curriculum schedule
    if curriculum_schedule:
        checkpoint['curriculum_schedule'] = curriculum_schedule

    torch.save(checkpoint, filename)
    print(f"Checkpoint saved for epoch {epoch + 1}")


def beta_schedule(num_steps, start=1e-4, end=2e-2):
    """
    Generates a linear beta schedule from `start` to `end`
    over `num_steps` diffusion steps.
    Returns: 1D tensor of shape (num_steps,)
    """
    return torch.linspace(start, end, steps=num_steps)

def train_teacher(device,
                  train_loader,
                  val_loader,
                  test_loader,
                  learn_rate,
                  epochs,
                  model,
                  filename,
                  loss_function,
                  optimizer=None,
                  l1_lambda=None,
                  train_from_last_epoch=False,
                  curriculum_loader=None,
                  beta_schedule_vals=beta_schedule(50, start=1e-4, end=2e-2),
                  num_steps=50):
    """
    Train a diffusion-style model (noise prediction) but preserve
    the same function signature and basic structure. We'll accumulate
    MSE over timesteps for noise prediction during training.
    We'll then evaluate with the evaluate_model function above,
    which also measures noise-prediction MSE. If you want final
    signal metrics (RMSE/PCC), you must implement a reverse-diffusion
    pass in your evaluation.

    Arguments:
      device: torch device
      train_loader, val_loader, test_loader: DataLoaders
      learn_rate: float learning rate
      epochs: number of epochs
      model: diffusion teacher model
      filename: checkpoint filename
      loss_function: typically nn.MSELoss
      optimizer: optional, if not provided we build an Adam
      l1_lambda: optional L1 regularization term
      train_from_last_epoch: whether to continue from last checkpoint
      curriculum_loader: optional curriculum logic
      beta_schedule_vals: 1D tensor for betas, must be provided for diffusion
      num_steps: total diffusion steps
    """
    import time
    import os
    from tqdm import tqdm

    if beta_schedule_vals is None:
        raise ValueError("beta_schedule_vals must be provided for diffusion training.")

    model.to(device)
    criterion = loss_function

    if optimizer is None:
        optimizer = torch.optim.Adam(model.parameters(), lr=learn_rate)

    train_losses = []
    val_losses = []
    test_losses = []

    train_pccs = []
    val_pccs = []
    test_pccs = []

    train_rmses = []
    val_rmses = []
    test_rmses = []

    train_pccs_channelwise = []
    val_pccs_channelwise = []
    test_pccs_channelwise = []

    train_rmses_channelwise = []
    val_rmses_channelwise = []
    test_rmses_channelwise = []

    # Check for existing checkpoint
    last_epoch = 0
    checkpoint_path = f"/content/MyDrive/MyDrive/models/{filename}/"
    if train_from_last_epoch and os.path.exists(checkpoint_path):
        checkpoints = [f for f in os.listdir(checkpoint_path) if f.endswith('.pth')]
        if checkpoints:
            checkpoints.sort(key=lambda x: int(x.split('_')[-1].split('.')[0]))
            latest_checkpoint = checkpoints[-1]
            print(f"Loading model from checkpoint: {latest_checkpoint}")
            checkpoint = torch.load(os.path.join(checkpoint_path, latest_checkpoint))
            model.load_state_dict(checkpoint['model_state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
            last_epoch = checkpoint['epoch']

            # Load the history
            train_losses = checkpoint['history']['train_losses']
            val_losses = checkpoint['history']['val_losses']
            test_losses = checkpoint['history']['test_losses']
            train_pccs = checkpoint['history']['train_pccs']
            val_pccs = checkpoint['history']['val_pccs']
            test_pccs = checkpoint['history']['test_pccs']
            train_rmses = checkpoint['history']['train_rmses']
            val_rmses = checkpoint['history']['val_rmses']
            test_rmses = checkpoint['history']['test_rmses']
            train_pccs_channelwise = checkpoint['history']['train_pccs_channelwise']
            val_pccs_channelwise = checkpoint['history']['val_pccs_channelwise']
            test_pccs_channelwise = checkpoint['history']['test_pccs_channelwise']
            train_rmses_channelwise = checkpoint['history']['train_rmses_channelwise']
            val_rmses_channelwise = checkpoint['history']['val_rmses_channelwise']
            test_rmses_channelwise = checkpoint['history']['test_rmses_channelwise']
            if 'curriculum_schedule' in checkpoint:
                curriculum_loader.curriculum_schedule = checkpoint['curriculum_schedule']
        else:
            print("No checkpoints found, starting from scratch.")
    else:
        print("Starting from scratch.")

    start_time = time.time()
    best_val_loss = float('inf')
    patience = 10
    patience_counter = 0

    for epoch in range(last_epoch, epochs):
        epoch_start_time = time.time()
        model.train()

        if curriculum_loader:
            curriculum_loader.update_epoch(epoch)
            train_loader, val_loader, test_loader = curriculum_loader.get_loaders()

        # For channelwise metrics
        epoch_train_loss = np.zeros(len(config.channels_joints))
        epoch_train_pcc = np.zeros(len(config.channels_joints))
        epoch_train_rmse = np.zeros(len(config.channels_joints))

        # ------------------------
        # TRAIN (Noise Prediction)
        # ------------------------
        for i, (data_acc, data_gyr, target, data_EMG) in enumerate(tqdm(train_loader, desc=f"Epoch {epoch + 1}/{epochs} Training")):
            optimizer.zero_grad()

            # Move inputs and targets to device
            data_acc = data_acc.to(device).float()  # [batch, seq_len, feat_dim]
            data_gyr = data_gyr.to(device).float()
            data_EMG = data_EMG.to(device).float()
            target = target.to(device).float()      # [batch, seq_len]

            total_loss = 0.0
            for t in range(num_steps - 1, 0, -1):
                beta = beta_schedule_vals[t]
                noise = torch.randn_like(target)  # [batch, seq_len]
                y_noisy = torch.sqrt(1 - beta) * target + torch.sqrt(beta) * noise
                y_noisy_3d = y_noisy.unsqueeze(-1)  # Add last dim for compatibility: [batch, seq_len, 1]

                # Timestep embedding, broadcasted across all modalities
                timestep = torch.full(
                    (data_acc.size(0), data_acc.size(1), 1),  # [batch, seq_len, 1]
                    t / num_steps,
                    device=device
                )

                # Predict noise for all modalities
                noise_pred = model(data_acc, data_gyr, data_EMG, timestep)

                # Loss computation
                step_loss = criterion(noise_pred, noise)
                total_loss += step_loss


            # L1 regularization if needed
            if l1_lambda is not None:
                l1_norm = sum(p.abs().sum() for p in model.parameters())
                total_loss += l1_lambda * l1_norm

          # Backpropagation
            total_loss.backward()
            optimizer.step()

            # For logging, we track the final 'total_loss' of the batch.
            # In standard diffusion, there's no direct final "RMSE" or "PCC" unless you do reconstruction.
            epoch_train_loss += total_loss.detach().cpu().numpy()
            # We keep placeholders for pcc and rmse
            epoch_train_pcc += 0.0
            epoch_train_rmse += 0.0

        # Average train loss
        avg_train_loss = epoch_train_loss / len(train_loader)
        avg_train_pcc  = epoch_train_pcc  / len(train_loader)
        avg_train_rmse = epoch_train_rmse / len(train_loader)

        train_losses.append(avg_train_loss)
        train_pccs.append(np.mean(avg_train_pcc))
        train_rmses.append(np.mean(avg_train_rmse))

        train_pccs_channelwise.append(avg_train_pcc)
        train_rmses_channelwise.append(avg_train_rmse)

        # ------------------------
        # VALIDATION
        # ------------------------
        avg_val_loss, avg_val_pcc, avg_val_rmse = evaluate_model(
            device,
            model,
            val_loader,
            criterion,
            beta_schedule_vals=beta_schedule_vals,  # pass to evaluate
            num_steps=num_steps
        )
        val_losses.append(avg_val_loss)
        val_pccs.append(np.mean(avg_val_pcc))
        val_rmses.append(np.mean(avg_val_rmse))

        val_pccs_channelwise.append(avg_val_pcc)
        val_rmses_channelwise.append(avg_val_rmse)

        # ------------------------
        # TEST
        # ------------------------
        avg_test_loss, avg_test_pcc, avg_test_rmse = evaluate_model(
            device,
            model,
            test_loader,
            criterion,
            beta_schedule_vals=beta_schedule_vals,  # pass to evaluate
            num_steps=num_steps
        )
        test_losses.append(avg_test_loss)
        test_pccs.append(np.mean(avg_test_pcc))
        test_rmses.append(np.mean(avg_test_rmse))

        test_pccs_channelwise.append(avg_test_pcc)
        test_rmses_channelwise.append(avg_test_rmse)

        print(f"Epoch: {epoch + 1}, "
              f"Training Loss: {np.mean(avg_train_loss):.4f}, "
              f"Validation Loss: {np.mean(avg_val_loss):.4f}, "
              f"Test Loss: {np.mean(avg_test_loss):.4f}")
        print(f"Training RMSE: {np.mean(avg_train_rmse):.4f}, "
              f"Validation RMSE: {np.mean(avg_val_rmse):.4f}, "
              f"Test RMSE: {np.mean(avg_test_rmse):.4f}")
        print(f"Training PCC: {np.mean(avg_train_pcc):.4f}, "
              f"Validation PCC: {np.mean(avg_val_pcc):.4f}, "
              f"Test PCC: {np.mean(avg_test_pcc):.4f}")

        # Save checkpoint
        if not os.path.exists(checkpoint_path):
            os.makedirs(checkpoint_path)

        history = {
            'train_losses': train_losses,
            'val_losses': val_losses,
            'test_losses': test_losses,
            'train_pccs': train_pccs,
            'val_pccs': val_pccs,
            'test_pccs': test_pccs,
            'train_rmses': train_rmses,
            'val_rmses': val_rmses,
            'test_rmses': test_rmses,
            'train_pccs_channelwise': train_pccs_channelwise,
            'val_pccs_channelwise': val_pccs_channelwise,
            'test_pccs_channelwise': test_pccs_channelwise,
            'train_rmses_channelwise': train_rmses_channelwise,
            'val_rmses_channelwise': val_rmses_channelwise,
            'test_rmses_channelwise': test_rmses_channelwise
        }

        save_checkpoint(
            model,
            optimizer,
            epoch,
            f"{checkpoint_path}/{filename}_epoch_{epoch + 1}.pth",
            train_loss=avg_train_loss,
            val_loss=avg_val_loss,
            test_loss=avg_test_loss,
            channelwise_metrics={
                'train': {'pcc': avg_train_pcc, 'rmse': avg_train_rmse},
                'val':   {'pcc': avg_val_pcc,   'rmse': avg_val_rmse},
                'test':  {'pcc': avg_test_pcc,  'rmse': avg_test_rmse},
            },
            history=history,
            curriculum_schedule=curriculum_loader.curriculum_schedule if curriculum_loader else None
        )

        # Early stopping
        if np.mean(avg_val_loss) < best_val_loss:
            best_val_loss = np.mean(avg_val_loss)
            torch.save(model.state_dict(), filename)
            patience_counter = 0
        else:
            patience_counter += 1

        if patience_counter >= patience:
            print(f"Stopping early after {epoch + 1} epochs")
            break

    end_time = time.time()
    print(f"Total training time: {end_time - start_time:.2f} seconds")

    print(f"loading best model from {filename}")
    model.load_state_dict(torch.load(filename))
    model.eval()

    return (model,
            train_losses, val_losses, test_losses,
            train_pccs, val_pccs, test_pccs,
            train_rmses, val_rmses, test_rmses)





In [8]:
# @title Helper Functions


# Function to create the teacher model with defaults from config
def create_teacher_model(input_acc, input_gyr, input_emg, base_weights_path=None, drop_prob=0.25, w=100):
    model = teacher(input_acc, input_gyr, input_emg, drop_prob=drop_prob, w=w)

    if base_weights_path:
        # Load the initial weights from the base model
        model.load_state_dict(torch.load(base_weights_path))

    return model




In [9]:
import matplotlib.pyplot as plt
import numpy as np
import os
import h5py
from tqdm.notebook import tqdm
import pandas as pd
import csv

all_subjects= [f"subject_{x}" for x in range(1,14)]
input_acc, input_gyr, input_emg = 18,18,3
batch_size = 16

# Placeholder for storing best RMSEs
best_rmse_per_subject = []
best_pcc_per_subject = []

train_flag = True

for test_subject in all_subjects:



    print(f"Running training with {test_subject} as the test subject.")

    # Set up the training subjects (all except the test subject)
    train_subjects = [subject for subject in all_subjects if subject != test_subject]

    model_name = f'TeacherModel_RMSELoss_test_{test_subject}_wl{100}_ol{75}_diffusion'
    print(f"Model: {model_name}")

    # Load the model configuration and data loaders
    model_config = {
        'model': create_teacher_model(input_acc, input_gyr, input_emg, w=100),
        'loss': RMSELoss(),
        'loaders': create_base_data_loaders(
            config=config,
            train_subjects=train_subjects,
            test_subjects=[test_subject],
            window_length=100,
            window_overlap=75,
            batch_size=batch_size
        ),
        'epochs': 10,
        'use_curriculum': False
    }

    model = model_config['model']
    loss_function = model_config['loss']
    epochs = model_config.get("epochs", 10)
    device = model_config.get("device", torch.device("cuda" if torch.cuda.is_available() else "cpu"))
    learn_rate = model_config.get("learn_rate", 0.001)
    use_curriculum = model_config.get("use_curriculum", False)

    optimizer = model_config.get("optimizer", None)
    l1_lambda = model_config.get("l1_lambda", None)

    print(f"Running model: {model_name}")

    # Unpack the static loaders tuple (train_loader, val_loader, test_loader)
    train_loader, val_loader, test_loader = model_config['loaders']
    if train_flag:
    # Train the model and save only the best based on validation loss
      model, train_losses, val_losses, test_losses, train_pccs, val_pccs, test_pccs, train_rmses, val_rmses, test_rmses = train_teacher(
          device=device,
          train_loader=train_loader,
          val_loader=val_loader,
          test_loader=test_loader,
          learn_rate=learn_rate,
          epochs=epochs,
          model=model,
          filename=model_name,
          loss_function=loss_function,
          optimizer=optimizer,
          l1_lambda=l1_lambda,
          train_from_last_epoch=False
      )
    else:
      #load filename as model
      model.load_state_dict(torch.load(f"{model_name}"))
      model.to(device)
      model.eval()

     #run model on test set and record result
    test_loss, test_pcc, test_rmse = evaluate_model(device, model, test_loader, loss_function)
    print(f"Test Loss: {test_loss:.4f}, Test PCC: {np.mean(test_pcc):.4f}, Test RMSE: {np.mean(test_rmse):.4f}")
    best_rmse_per_subject.append(np.mean(test_rmse))
    best_pcc_per_subject.append(np.mean(test_pcc))


# Compute the average of the best RMSEs across all subjects



Running training with subject_1 as the test subject.
Model: TeacherModel_RMSELoss_test_subject_1_wl100_ol75_diffusion
Sharded data found at /content/datasets/dataset_wl100_ol75_train_2_3_4_5_6_7_8_9_10_11_12_13. Skipping resharding.
Sharded data found at /content/datasets/dataset_wl100_ol0_test_1. Skipping resharding.
Running model: TeacherModel_RMSELoss_test_subject_1_wl100_ol75_diffusion
Starting from scratch.


Epoch 1/10 Training: 100%|██████████| 1305/1305 [27:19<00:00,  1.26s/it]


Epoch: 1, Training Loss: 49.0024, Validation Loss: 53.8429, Test Loss: 54.1862
Training RMSE: 0.0000, Validation RMSE: 51.7754, Test RMSE: 47.7363
Training PCC: 0.0000, Validation PCC: -0.0001, Test PCC: 0.0009
Checkpoint saved for epoch 1


Epoch 2/10 Training: 100%|██████████| 1305/1305 [27:16<00:00,  1.25s/it]


Epoch: 2, Training Loss: 49.0027, Validation Loss: 53.8567, Test Loss: 54.1983
Training RMSE: 0.0000, Validation RMSE: 51.7892, Test RMSE: 47.7513
Training PCC: 0.0000, Validation PCC: 0.0012, Test PCC: 0.0035
Checkpoint saved for epoch 2


Epoch 3/10 Training: 100%|██████████| 1305/1305 [27:21<00:00,  1.26s/it]


Epoch: 3, Training Loss: 48.9975, Validation Loss: 53.8453, Test Loss: 54.1791
Training RMSE: 0.0000, Validation RMSE: 51.7769, Test RMSE: 47.7297
Training PCC: 0.0000, Validation PCC: 0.0018, Test PCC: 0.0084
Checkpoint saved for epoch 3


Epoch 4/10 Training: 100%|██████████| 1305/1305 [27:18<00:00,  1.26s/it]


Epoch: 4, Training Loss: 48.9996, Validation Loss: 53.8468, Test Loss: 54.1879
Training RMSE: 0.0000, Validation RMSE: 51.7792, Test RMSE: 47.7382
Training PCC: 0.0000, Validation PCC: 0.0016, Test PCC: -0.0020
Checkpoint saved for epoch 4


Epoch 5/10 Training: 100%|██████████| 1305/1305 [27:27<00:00,  1.26s/it]


Epoch: 5, Training Loss: 48.9968, Validation Loss: 53.8514, Test Loss: 54.1967
Training RMSE: 0.0000, Validation RMSE: 51.7844, Test RMSE: 47.7456
Training PCC: 0.0000, Validation PCC: -0.0006, Test PCC: -0.0015
Checkpoint saved for epoch 5


Epoch 6/10 Training: 100%|██████████| 1305/1305 [27:38<00:00,  1.27s/it]


Epoch: 6, Training Loss: 48.9954, Validation Loss: 53.8484, Test Loss: 54.1967
Training RMSE: 0.0000, Validation RMSE: 51.7812, Test RMSE: 47.7428
Training PCC: 0.0000, Validation PCC: 0.0020, Test PCC: 0.0023
Checkpoint saved for epoch 6


Epoch 7/10 Training: 100%|██████████| 1305/1305 [27:34<00:00,  1.27s/it]


Epoch: 7, Training Loss: 48.9977, Validation Loss: 53.8536, Test Loss: 54.1879
Training RMSE: 0.0000, Validation RMSE: 51.7872, Test RMSE: 47.7415
Training PCC: 0.0000, Validation PCC: 0.0005, Test PCC: 0.0025
Checkpoint saved for epoch 7


Epoch 8/10 Training: 100%|██████████| 1305/1305 [27:44<00:00,  1.28s/it]


Epoch: 8, Training Loss: 48.9982, Validation Loss: 53.8537, Test Loss: 54.1886
Training RMSE: 0.0000, Validation RMSE: 51.7846, Test RMSE: 47.7404
Training PCC: 0.0000, Validation PCC: -0.0016, Test PCC: -0.0016
Checkpoint saved for epoch 8


Epoch 9/10 Training: 100%|██████████| 1305/1305 [27:36<00:00,  1.27s/it]


Epoch: 9, Training Loss: 48.9957, Validation Loss: 53.8509, Test Loss: 54.1858
Training RMSE: 0.0000, Validation RMSE: 51.7853, Test RMSE: 47.7395
Training PCC: 0.0000, Validation PCC: 0.0001, Test PCC: 0.0006
Checkpoint saved for epoch 9


Epoch 10/10 Training: 100%|██████████| 1305/1305 [27:37<00:00,  1.27s/it]


Epoch: 10, Training Loss: 49.0009, Validation Loss: 53.8562, Test Loss: 54.1914
Training RMSE: 0.0000, Validation RMSE: 51.7893, Test RMSE: 47.7403
Training PCC: 0.0000, Validation PCC: -0.0027, Test PCC: -0.0004
Checkpoint saved for epoch 10
Total training time: 17205.23 seconds
loading best model from TeacherModel_RMSELoss_test_subject_1_wl100_ol75_diffusion


  model.load_state_dict(torch.load(filename))


TypeError: evaluate_model() missing 2 required positional arguments: 'beta_schedule_vals' and 'num_steps'

In [None]:

average_best_rmse = np.mean(best_rmse_per_subject)
average_best_pcc = np.mean(best_pcc_per_subject)
print(f"Average of best RMSEs across all subjects: {average_best_rmse:.4f}")
print(f"Average of best PCCs across all subjects: {average_best_pcc:.4f}")
print(best_rmse_per_subject)
print(best_pcc_per_subject)

# subjects = [f'Subject {i+1}' for i in range(len(best_rmse_per_subject))]

# print(best_rmse_per_subject)
# # Plot a bar chart with subject labels on the x-axis
# plt.figure(figsize=(10, 6))
# plt.bar(subjects, best_rmse_per_subject, color='blue', edgecolor='black')
# plt.title('Best RMSEs for Each Subject')
# plt.xlabel('Subjects')
# plt.ylabel('Best RMSE')
# plt.xticks(rotation=45, ha='right')
# plt.grid(True, axis='y')
# plt.tight_layout()
# plt.show()

In [None]:
import os
import zipfile
from datetime import datetime

notebook_name = 'regression_benchmark_diffusion'

# Create a timestamped folder name based on the notebook name
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
folder_name = f"{notebook_name}_checkpoints_{timestamp}"

# Make sure the folder exists
os.makedirs(folder_name, exist_ok=True)

checkpoint_dir = '.'

# Zip all checkpoint files and save in the new folder
zip_filename = f"{folder_name}.zip"
with zipfile.ZipFile(zip_filename, 'w') as zipf:
    # List files only in the current directory (no subfolders)
    for file in os.listdir(checkpoint_dir):
        if "TeacherModel" in str(file):
          file_path = os.path.join(checkpoint_dir, file)
          zipf.write(file_path, os.path.relpath(file_path, checkpoint_dir))
          print(f"Checkpoint {file} has been added to the zip file.")
print(f"All checkpoints have been zipped and saved as {zip_filename}.")




In [None]:
# Download the zip file to your local machine
from google.colab import files
files.download(zip_filename)