In [1]:

#mount drive
from google.colab import drive
drive.mount('/content/MyDrive')
import seaborn as sns
sns.set_theme("paper")



Drive already mounted at /content/MyDrive; to attempt to forcibly remount, call drive.mount("/content/MyDrive", force_remount=True).


In [2]:
# @title Initialize Config
import copy
import torch
import numpy
class Config:
    def __init__(self, **kwargs):
        self.channels_imu_acc = kwargs.get('channels_imu_acc', [])
        self.channels_imu_gyr = kwargs.get('channels_imu_gyr', [])
        self.channels_joints = kwargs.get('channels_joints', [])
        self.channels_emg = kwargs.get('channels_emg', [])
        self.seed = kwargs.get('seed', 42)
        self.data_folder_name = kwargs.get('data_folder_name', 'default_data_folder_name')
        self.dataset_root = kwargs.get('dataset_root', 'default_dataset_root')
        self.imu_transforms = kwargs.get('imu_transforms', [])
        self.joint_transforms = kwargs.get('joint_transforms', [])
        self.emg_transforms = kwargs.get('emg_transforms', [])
        self.input_format = kwargs.get('input_format', 'csv')


    def copy(self):
        return copy.deepcopy(self)


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

config = Config(
    data_folder_name='/content/MyDrive/MyDrive/sd_datacollection_v4/all_subjects_data_final.h5',
    dataset_root='/content/datasets',
    input_format="csv",
    channels_imu_acc=['ACCX1', 'ACCY1', 'ACCZ1','ACCX2', 'ACCY2', 'ACCZ2', 'ACCX3', 'ACCY3', 'ACCZ3', 'ACCX4', 'ACCY4', 'ACCZ4', 'ACCX5', 'ACCY5', 'ACCZ5', 'ACCX6', 'ACCY6', 'ACCZ6'],
    channels_imu_gyr=['GYROX1', 'GYROY1', 'GYROZ1', 'GYROX2', 'GYROY2', 'GYROZ2', 'GYROX3', 'GYROY3', 'GYROZ3', 'GYROX4', 'GYROY4', 'GYROZ4', 'GYROX5', 'GYROY5', 'GYROZ5', 'GYROX6', 'GYROY6', 'GYROZ6'],
    channels_joints=['elbow_flex_r', 'arm_flex_r', 'arm_add_r'],
    channels_emg=['IM EMG4', 'IM EMG5', 'IM EMG6'],
)

#set seeds
torch.manual_seed(config.seed)
numpy.random.seed(config.seed)


In [3]:
class DataSharder:
    def __init__(self, config, split):
        self.config = config
        self.h5_file_path = config.data_folder_name  # Path to the HDF5 file
        self.split = split

    def load_data(self, subjects, window_length, window_overlap, dataset_name):
        print(f"Processing subjects: {subjects} with window length: {window_length}, overlap: {window_overlap}")

        self.window_length = window_length
        self.window_overlap = window_overlap

        # Process the data from the HDF5 file
        self._process_and_save_patients_h5(subjects, dataset_name)

    def _process_and_save_patients_h5(self, subjects, dataset_name):
        # Open the HDF5 file
        with h5py.File(self.h5_file_path, 'r') as h5_file:
            dataset_folder = os.path.join(self.config.dataset_root, dataset_name, self.split).replace("subject", "").replace("__", "_")
            print("Dataset folder:", dataset_folder)

            if os.path.exists(dataset_folder):
                print("Dataset Exists, Skipping...")
                return

            os.makedirs(dataset_folder, exist_ok=True)
            print("Dataset folder created: ", dataset_folder)

            for subject_id in tqdm(subjects, desc="Processing subjects"):
                subject_key = subject_id
                if subject_key not in h5_file:
                    print(f"Subject {subject_key} not found in the HDF5 file. Skipping.")
                    continue

                subject_data = h5_file[subject_key]
                session_keys = list(subject_data.keys())  # Sessions for this subject

                for session_id in session_keys:
                    session_data_group = subject_data[session_id]

                    for sessions_speed in session_data_group.keys():
                        session_data = session_data_group[sessions_speed]

                        # Extract IMU, EMG, and Joint data as numpy arrays
                        imu_data, imu_columns = self._extract_channel_data(session_data, self.config.channels_imu_acc + self.config.channels_imu_gyr)
                        emg_data, emg_columns = self._extract_channel_data(session_data, self.config.channels_emg)
                        joint_data, joint_columns = self._extract_channel_data(session_data, self.config.channels_joints)

                        # Shard the data into windows and save each window
                        self._save_windowed_data(imu_data, emg_data, joint_data, subject_key, session_id,sessions_speed, dataset_folder, imu_columns, emg_columns, joint_columns)

    def _save_windowed_data(self, imu_data, emg_data, joint_data, subject_key, session_id, session_speed, dataset_folder, imu_columns, emg_columns, joint_columns):
        window_size = self.window_length
        overlap = self.window_overlap
        step_size = window_size - overlap

        # Path to the CSV log file
        csv_file_path = os.path.join(dataset_folder, '..', f"{self.split}_info.csv")

        # Ensure the folder exists
        os.makedirs(dataset_folder, exist_ok=True)

        # Prepare CSV log headers (ensure the columns are 'file_name' and 'file_path')
        csv_headers = ['file_name', 'file_path']

        # Create or append to the CSV log file
        file_exists = os.path.isfile(csv_file_path)
        with open(csv_file_path, mode='a', newline='') as csv_file:
            writer = csv.writer(csv_file)

            # Write the headers only if the file is new
            if not file_exists:
                writer.writerow(csv_headers)

            # Determine the total data length based on the minimum length across the data sources
            total_data_length = min(imu_data.shape[1], emg_data.shape[1], joint_data.shape[1])

            # Adjust the starting point for windows based on total data length
            start = 2000 if total_data_length > 4000 else 0

            # Ensure that each window across imu_data, emg_data, and joint_data has the same shape before concatenation
            for i in range(start, total_data_length - window_size + 1, step_size):
                imu_window = imu_data[:, i:i + window_size]
                emg_window = emg_data[:, i:i + window_size]
                joint_window = joint_data[:, i:i + window_size]

                # Check if the window sizes are valid
                if imu_window.shape[1] == window_size and emg_window.shape[1] == window_size and joint_window.shape[1] == window_size:
                    # Convert windowed data to pandas DataFrame



                    imu_df = pd.DataFrame(imu_window.T, columns=imu_columns)
                    emg_df = pd.DataFrame(emg_window.T, columns=emg_columns)
                    joint_df = pd.DataFrame(joint_window.T, columns=joint_columns)



                    # Concatenate the data along the column axis
                    combined_df = pd.concat([imu_df, emg_df, joint_df], axis=1)

                    # Save the combined windowed data as a CSV file
                    file_name = f"{subject_key}_{session_id}_{session_speed}_win_{i}_ws{window_size}_ol{overlap}.csv"
                    file_path = os.path.join(dataset_folder, file_name)
                    combined_df.to_csv(file_path, index=False)

                    # Log the file name and path in the CSV (in the correct columns)
                    writer.writerow([file_name, file_path])
                else:
                    print(f"Skipping window {i} due to mismatched window sizes.")

    def _extract_channel_data(self, session_data, channels):
      extracted_data = []
      new_column_names = []  # Initialize here

      if isinstance(session_data, h5py.Dataset):
          if session_data.dtype.names:
              # Compound dataset
              column_names = session_data.dtype.names
              for channel in channels:
                  if channel in column_names:
                      channel_data = session_data[channel][:]
                      channel_data = pd.to_numeric(channel_data, errors='coerce')
                      df = pd.DataFrame(channel_data)
                      df_interpolated = df.interpolate(method='linear', axis=0, limit_direction='both')
                      extracted_data.append(df_interpolated.to_numpy().flatten())
                      new_column_names.append(channel)  # Populate here
                  else:
                      print(f"Channel {channel} not found in compound dataset.")
          else:
              # Simple dataset
              column_names = list(session_data.attrs.get('column_names', []))
              assert len(column_names) > 0, "column_names not found in dataset attributes"
              for channel in channels:
                  if channel in column_names:
                      col_idx = column_names.index(channel)
                      channel_data = session_data[:, col_idx]
                      channel_data = pd.to_numeric(channel_data, errors='coerce')
                      df = pd.DataFrame(channel_data)
                      df_interpolated = df.interpolate(method='linear', axis=0, limit_direction='both')
                      extracted_data.append(df_interpolated.to_numpy().flatten())
                      new_column_names.append(channel)
                  else:
                      print(f"Channel {channel} not found in session data.")

      return np.array(extracted_data), new_column_names


In [4]:
# @title Dataset creation
from torch.utils.data import Dataset, DataLoader
from torch.utils.data import random_split
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from torch.utils.data import ConcatDataset
import random
from torch.utils.data import TensorDataset

class ImuJointPairDataset(Dataset):
    def __init__(self, config, subjects, window_length, window_overlap, split='train', dataset_train_name='train', dataset_test_name='test'):
        self.config = config
        self.split = split
        self.subjects = subjects
        self.window_length = window_length
        self.window_overlap = window_overlap if split == 'train' else 0
        self.input_format = config.input_format
        self.channels_imu_acc = config.channels_imu_acc
        self.channels_imu_gyr = config.channels_imu_gyr
        self.channels_joints = config.channels_joints
        self.channels_emg = config.channels_emg

        # Convert the list of subjects to a string that is path-safe
        subjects_str = "_".join(map(str, subjects)).replace('subject', '').replace('__', '_')

        # Use dataset_train_name or dataset_test_name based on split
        if split == 'train':
            dataset_name = f"dataset_wl{self.window_length}_ol{self.window_overlap}_train{subjects_str}"
        else:
            dataset_name = f"dataset_wl{self.window_length}_ol{self.window_overlap}_test{subjects_str}"

        self.dataset_name = dataset_name

        # Define the root directory based on dataset name
        self.root_dir = os.path.join(self.config.dataset_root, self.dataset_name)

        # Ensure sharded data exists, if not, reshard
        self.ensure_resharded(subjects, dataset_train_name if split == 'train' else dataset_test_name)

        info_path = os.path.join(self.root_dir, f"{split}_info.csv")
        self.data = pd.read_csv(info_path)

    def ensure_resharded(self, subjects, dataset_name):
        if not os.path.exists(self.root_dir):
            print(f"Sharded data not found at {self.root_dir}. Resharding...")
            data_sharder = DataSharder(self.config,self.split)
            # Pass dynamic parameters to sharder
            data_sharder.load_data(subjects, window_length=self.window_length, window_overlap=self.window_overlap, dataset_name=self.dataset_name)
        else:
            print(f"Sharded data found at {self.root_dir}. Skipping resharding.")

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        file_path = os.path.join(self.root_dir,self.split, self.data.iloc[idx, 0])

        if self.input_format == "csv":
            combined_data = pd.read_csv(file_path)
        else:
            raise ValueError("Unsupported input format: {}".format(self.input_format))

        imu_data_acc, imu_data_gyr, joint_data, emg_data = self._extract_and_transform(combined_data)
        return imu_data_acc, imu_data_gyr, joint_data, emg_data

    def _extract_and_transform(self, combined_data):
        imu_data_acc = self._extract_channels(combined_data, self.channels_imu_acc)
        imu_data_gyr = self._extract_channels(combined_data, self.channels_imu_gyr)
        joint_data = self._extract_channels(combined_data, self.channels_joints)
        emg_data = self._extract_channels(combined_data, self.channels_emg)

        imu_data_acc = self.apply_transforms(imu_data_acc, self.config.imu_transforms)
        imu_data_gyr = self.apply_transforms(imu_data_gyr, self.config.imu_transforms)
        joint_data = self.apply_transforms(joint_data, self.config.joint_transforms)
        emg_data = self.apply_transforms(emg_data, self.config.emg_transforms)

        return imu_data_acc, imu_data_gyr, joint_data, emg_data

    def _extract_channels(self, combined_data, channels):
        return combined_data[channels].values if self.input_format == "csv" else combined_data[:, channels]

    def apply_transforms(self, data, transforms):
        for transform in transforms:
            data = transform(data)
        return torch.tensor(data, dtype=torch.float32)

class ImuJointPairSubjectDataset(ImuJointPairDataset):
    def __init__(self, config, subjects, window_length, window_overlap, split='train', dataset_train_name='train', dataset_test_name='test'):
        super().__init__(config, subjects, window_length, window_overlap, split, dataset_train_name, dataset_test_name)

        # Create a mapping from subject strings (e.g., 'subject_1') to class indices
        self.subject_mapping = {subject: i for i, subject in enumerate(sorted(subjects))}

    def __getitem__(self, idx):
        # Retrieve the original data from the parent class
        imu_data_acc, imu_data_gyr, joint_data, emg_data = super().__getitem__(idx)

        # Get the filename from the data index
        filename = self.data.iloc[idx, 0]

        # Extract the subject ID from the filename
        filename_base = os.path.basename(filename)
        filename_without_ext = os.path.splitext(filename_base)[0]
        parts = filename_without_ext.split('_')

        # Construct subject string in the format 'subject_x'
        try:
            subject_index = parts.index('subject')
            subject_str = f"subject_{parts[subject_index + 1]}"  # Create string like 'subject_1'
        except ValueError:
            raise ValueError(f"'subject' not found in filename: {filename}")
        except IndexError:
            raise ValueError(f"Subject ID not found after 'subject' in filename: {filename}")

        # Map subject_str to class index
        if subject_str not in self.subject_mapping:
            raise ValueError(f"Subject ID {subject_str} not found in training set.")

        mapped_class = self.subject_mapping[subject_str]

        # Return class index instead of one-hot encoding
        return imu_data_acc, imu_data_gyr, joint_data, emg_data, mapped_class

class ImuJointPairSubjectNormalizedDataset(ImuJointPairSubjectDataset):
    def __init__(self, config, subjects, window_length, window_overlap, split='train', dataset_train_name='train', dataset_test_name='test'):
        super().__init__(config, subjects, window_length, window_overlap, split, dataset_train_name, dataset_test_name)

        # Compute normalization statistics per subject for IMU and EMG data
        self.normalization_stats = {subject: {'imu_acc': [], 'imu_gyr': [], 'emg': []} for subject in subjects}

        # Loop through the dataset once to gather data for each subject
        for idx in range(len(self.data)):
            filename = self.data.iloc[idx]['file_name']
            subject_str = next((subject for subject in subjects if subject in filename), None)
            if subject_str:
                imu_data_acc, imu_data_gyr, joint_data, emg_data, mapped_class = super().__getitem__(idx)
                self.normalization_stats[subject_str]['imu_acc'].append(imu_data_acc)
                self.normalization_stats[subject_str]['imu_gyr'].append(imu_data_gyr)
                self.normalization_stats[subject_str]['emg'].append(emg_data)

        # Compute mean and std for each subject
        for subject, data in self.normalization_stats.items():
            if data['imu_acc']:
                imu_acc_data = torch.stack(data['imu_acc'])
                imu_gyr_data = torch.stack(data['imu_gyr'])
                emg_data = torch.stack(data['emg'])

                self.normalization_stats[subject] = {
                    'imu_acc_mean': imu_acc_data.mean(dim=0),
                    'imu_acc_std': imu_acc_data.std(dim=0),
                    'imu_gyr_mean': imu_gyr_data.mean(dim=0),
                    'imu_gyr_std': imu_gyr_data.std(dim=0),
                    'emg_mean': emg_data.mean(dim=0),
                    'emg_std': emg_data.std(dim=0)
                }

    def __getitem__(self, idx):
        # Retrieve the original data from the parent class
        imu_data_acc, imu_data_gyr, joint_data, emg_data, mapped_class = super().__getitem__(idx)

        # Get the filename from the data index
        filename = self.data.iloc[idx]['file_name']

        # Extract the subject ID from the filename
        subject_str = next((subject for subject in self.normalization_stats.keys() if subject in filename), None)
        if not subject_str:
            raise ValueError(f"Normalization stats not found for subject in filename: {filename}")

        # Apply normalization for each subject separately
        stats = self.normalization_stats[subject_str]
        imu_data_acc = (imu_data_acc - stats['imu_acc_mean']) / (stats['imu_acc_std'] + 1e-8)
        imu_data_gyr = (imu_data_gyr - stats['imu_gyr_mean']) / (stats['imu_gyr_std'] + 1e-8)
        emg_data = (emg_data - stats['emg_mean']) / (stats['emg_std'] + 1e-8)

        # Return normalized IMU data, joint data, EMG data, and class index
        return imu_data_acc, imu_data_gyr, joint_data, emg_data

def create_base_data_loaders(
    config,
    train_subjects,
    test_subjects,
    window_length=100,
    window_overlap=75,
    batch_size=64,
    dataset_train_name='train',
    dataset_test_name='test'
):
    # Create datasets with explicit parameters
    train_dataset = ImuJointPairDataset(
        config=config,
        subjects=train_subjects,
        window_length=window_length,
        window_overlap=window_overlap,
        split='train',
        dataset_train_name=dataset_train_name
    )

    test_dataset = ImuJointPairDataset(
        config=config,
        subjects=test_subjects,
        window_length=window_length,
        window_overlap=window_overlap,
        split='test',
        dataset_test_name=dataset_test_name
    )

    # Split train dataset into training and validation sets
    train_size = int(0.9 * len(train_dataset))
    val_size = len(train_dataset) - train_size
    train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])

    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, val_loader, test_loader

def create_normbysub_data_loaders(
    config,
    train_subjects,
    test_subjects,
    window_length=100,
    window_overlap=75,
    batch_size=64,
    dataset_train_name='train',
    dataset_test_name='test'
):
    # Create datasets with explicit parameters
    train_dataset = ImuJointPairSubjectNormalizedDataset(
        config=config,
        subjects=train_subjects,
        window_length=window_length,
        window_overlap=window_overlap,
        split='train',
        dataset_train_name=dataset_train_name
    )

    test_dataset = ImuJointPairSubjectNormalizedDataset(
        config=config,
        subjects=test_subjects,
        window_length=window_length,
        window_overlap=window_overlap,
        split='test',
        dataset_test_name=dataset_test_name
    )

    # Split train dataset into training and validation sets
    train_size = int(0.9 * len(train_dataset))
    val_size = len(train_dataset) - train_size
    train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])

    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, val_loader, test_loader




In [5]:
# @title Kinematicsnet Architecture
import torch
import torch.nn as nn
import torch.nn.functional as F
import time
from scipy.signal import butter, filtfilt
from sklearn.metrics import mean_squared_error
import numpy as np
class Encoder_1(nn.Module):
    def __init__(self, input_dim, dropout):
        super(Encoder_1, self).__init__()
        self.lstm_1 = nn.LSTM(input_dim, 128, bidirectional=True, batch_first=True, dropout=0)
        self.lstm_2 = nn.LSTM(256, 64, bidirectional=True, batch_first=True, dropout=0)
        self.flatten = nn.Flatten()
        self.fc = nn.Linear(128, 32)
        self.dropout_1 = nn.Dropout(dropout)
        self.dropout_2 = nn.Dropout(dropout)

    def forward(self, x):
        out_1, (h_1, _) = self.lstm_1(x)
        out_1 = self.dropout_1(out_1)
        out_2, (h_2, _) = self.lstm_2(out_1)
        out_2 = self.dropout_2(out_2)
        return out_2, (h_1, h_2)

class Encoder_2(nn.Module):
    def __init__(self, input_dim, dropout):
        super(Encoder_2, self).__init__()
        self.gru_1 = nn.GRU(input_dim, 128, bidirectional=True, batch_first=True, dropout=0)
        self.gru_2 = nn.GRU(256, 64, bidirectional=True, batch_first=True, dropout=0)
        self.flatten = nn.Flatten()
        self.fc = nn.Linear(128, 32)
        self.dropout_1 = nn.Dropout(dropout)
        self.dropout_2 = nn.Dropout(dropout)

    def forward(self, x):
        out_1, h_1 = self.gru_1(x)
        out_1 = self.dropout_1(out_1)
        out_2, h_2 = self.gru_2(out_1)
        out_2 = self.dropout_2(out_2)
        return out_2, (h_1, h_2)


class GatingModule(nn.Module):
    def __init__(self, input_size):
        super(GatingModule, self).__init__()
        self.gate = nn.Sequential(
            nn.Linear(2*input_size, input_size),
            nn.Sigmoid()
        )

    def forward(self, input1, input2):
        # Apply gating mechanism
        gate_output = self.gate(torch.cat((input1,input2),dim=-1))

        # Scale the inputs based on the gate output
        gated_input1 = input1 * gate_output
        gated_input2 = input2 * (1 - gate_output)

        # Combine the gated inputs
        output = gated_input1 + gated_input2
        return output
#variable w needs to be checked for correct value, stand-in value used
class teacher(nn.Module):
    def __init__(self, input_acc, input_gyr, input_emg, drop_prob=0.25, w=100):
        super(teacher, self).__init__()

        self.w=w
        self.encoder_1_acc=Encoder_1(input_acc, drop_prob)
        self.encoder_1_gyr=Encoder_1(input_gyr, drop_prob)
        self.encoder_1_emg=Encoder_1(input_emg, drop_prob)

        self.encoder_2_acc=Encoder_2(input_acc, drop_prob)
        self.encoder_2_gyr=Encoder_2(input_gyr, drop_prob)
        self.encoder_2_emg=Encoder_2(input_emg, drop_prob)

        self.BN_acc= nn.BatchNorm1d(input_acc, affine=False)
        self.BN_gyr= nn.BatchNorm1d(input_gyr, affine=False)
        self.BN_emg= nn.BatchNorm1d(input_emg, affine=False)


        self.fc = nn.Linear(2*3*128+128,3)
        self.dropout=nn.Dropout(p=.05)

        self.gate_1=GatingModule(128)
        self.gate_2=GatingModule(128)
        self.gate_3=GatingModule(128)

        self.fc_kd = nn.Linear(3*128, 2*128)

               # Define the gating network
        self.weighted_feat = nn.Sequential(
            nn.Linear(128, 1),
            nn.Sigmoid())

        self.attention=nn.MultiheadAttention(3*128,4,batch_first=True)
        self.gating_net = nn.Sequential(nn.Linear(128*3, 3*128), nn.Sigmoid())
        self.gating_net_1 = nn.Sequential(nn.Linear(2*3*128+128, 2*3*128+128), nn.Sigmoid())

        self.pool = nn.MaxPool1d(kernel_size=2)


    def forward(self, x_acc, x_gyr, x_emg):

        x_acc_1=x_acc.view(x_acc.size(0)*x_acc.size(1),x_acc.size(-1))
        x_gyr_1=x_gyr.view(x_gyr.size(0)*x_gyr.size(1),x_gyr.size(-1))
        x_emg_1=x_emg.view(x_emg.size(0)*x_emg.size(1),x_emg.size(-1))

        x_acc_1=self.BN_acc(x_acc_1)
        x_gyr_1=self.BN_gyr(x_gyr_1)
        x_emg_1=self.BN_emg(x_emg_1)

        x_acc_2=x_acc_1.view(-1, self.w, x_acc_1.size(-1))
        x_gyr_2=x_gyr_1.view(-1, self.w, x_gyr_1.size(-1))
        x_emg_2=x_emg_1.view(-1, self.w, x_emg_1.size(-1))

        # Pass through Encoder 1 for each modality and capture hidden states
        x_acc_1, (h_acc_1, _) = self.encoder_1_acc(x_acc_2)
        x_gyr_1, (h_gyr_1, _) = self.encoder_1_gyr(x_gyr_2)
        x_emg_1, (h_emg_1, _) = self.encoder_1_emg(x_emg_2)

        # Pass through Encoder 2 for each modality and capture hidden states
        x_acc_2, (h_acc_2, _) = self.encoder_2_acc(x_acc_2)
        x_gyr_2, (h_gyr_2, _) = self.encoder_2_gyr(x_gyr_2)
        x_emg_2, (h_emg_2, _) = self.encoder_2_emg(x_emg_2)

        # x_acc=torch.cat((x_acc_1,x_acc_2),dim=-1)
        # x_gyr=torch.cat((x_gyr_1,x_gyr_2),dim=-1)
        # x_emg=torch.cat((x_emg_1,x_emg_2),dim=-1)

        x_acc=self.gate_1(x_acc_1,x_acc_2)
        x_gyr=self.gate_2(x_gyr_1,x_gyr_2)
        x_emg=self.gate_3(x_emg_1,x_emg_2)

        x=torch.cat((x_acc,x_gyr,x_emg),dim=-1)
        x_kd=self.fc_kd(x)


        out_1, attn_output_weights=self.attention(x,x,x)

        gating_weights = self.gating_net(x)
        out_2=gating_weights*x

        weights_1 = self.weighted_feat(x[:,:,0:128])
        weights_2 = self.weighted_feat(x[:,:,128:2*128])
        weights_3 = self.weighted_feat(x[:,:,2*128:3*128])
        x_1=weights_1*x[:,:,0:128]
        x_2=weights_2*x[:,:,128:2*128]
        x_3=weights_3*x[:,:,2*128:3*128]
        out_3=x_1+x_2+x_3

        out=torch.cat((out_1,out_2,out_3),dim=-1)

        gating_weights_1 = self.gating_net_1(out)
        out=gating_weights_1*out

        out=self.fc(out)

        #print(out.shape)
        return out, x_kd



class student_KD(nn.Module):
    def __init__(self, input_acc, input_gyr, drop_prob=0.25, w=100):
        super(student_KD, self).__init__()
        self.w=w

        self.encoder_1_acc=Encoder_1(input_acc, drop_prob)
        self.encoder_1_gyr=Encoder_1(input_gyr, drop_prob)

        self.encoder_2_acc=Encoder_2(input_acc, drop_prob)
        self.encoder_2_gyr=Encoder_2(input_gyr, drop_prob)

        self.BN_acc= nn.BatchNorm1d(input_acc, affine=False)
        self.BN_gyr= nn.BatchNorm1d(input_gyr, affine=False)

        self.fc_kd=nn.Linear(2*128,2*128)

        self.fc = nn.Linear(2*2*128+128,3)
        self.dropout=nn.Dropout(p=0.05)

        self.gate_1=GatingModule(128)
        self.gate_2=GatingModule(128)



               # Define the gating network
        self.weighted_feat = nn.Sequential(
            nn.Linear(128, 1),
            nn.Sigmoid())

        self.attention=nn.MultiheadAttention(2*128,4,batch_first=True)
        self.gating_net = nn.Sequential(nn.Linear(128*2, 2*128), nn.Sigmoid())
        self.gating_net_1 = nn.Sequential(nn.Linear(2*2*128+128, 2*2*128+128), nn.Sigmoid())


    def forward(self, x_acc, x_gyr):

        x_acc_1=x_acc.view(x_acc.size(0)*x_acc.size(1),x_acc.size(-1))
        x_gyr_1=x_gyr.view(x_gyr.size(0)*x_gyr.size(1),x_gyr.size(-1))

        x_acc_1=self.BN_acc(x_acc_1)
        x_gyr_1=self.BN_gyr(x_gyr_1)

        x_acc_2=x_acc_1.view(-1, self.w, x_acc_1.size(-1))
        x_gyr_2=x_gyr_1.view(-1, self.w, x_gyr_1.size(-1))

        x_acc_1,_=self.encoder_1_acc(x_acc_2)
        x_gyr_1,_=self.encoder_1_gyr(x_gyr_2)

        x_acc_2,_=self.encoder_2_acc(x_acc_2)
        x_gyr_2,_=self.encoder_2_gyr(x_gyr_2)


        x_acc=self.gate_1(x_acc_1,x_acc_2)
        x_gyr=self.gate_2(x_gyr_1,x_gyr_2)

        x=torch.cat((x_acc,x_gyr),dim=-1)

        x_KD=self.fc_kd(x)

        out_1, attn_output_weights=self.attention(x,x,x)

        gating_weights = self.gating_net(x)
        out_2=gating_weights*x

        weights_1 = self.weighted_feat(x[:,:,0:128])
        weights_2 = self.weighted_feat(x[:,:,128:2*128])
        x_1=weights_1*x[:,:,0:128]
        x_2=weights_2*x[:,:,128:2*128]
        out_3=x_1+x_2


        out=torch.cat((out_1,out_2,out_3),dim=-1)

        gating_weights_1 = self.gating_net_1(out)
        out=gating_weights_1*out

        out=self.fc(out)

        return out,x_KD




In [6]:
# @title Loss Functions
import statistics

class RMSELoss(nn.Module):
    def __init__(self):
        super(RMSELoss, self).__init__()
    def forward(self, output, target):
        loss = torch.sqrt(torch.mean((output - target) ** 2))
        return loss

#prediction function
def RMSE_prediction(yhat_4,test_y, output_dim,print_losses=True):

  s1=yhat_4.shape[0]*yhat_4.shape[1]

  test_o=test_y.reshape((s1,output_dim))
  yhat=yhat_4.reshape((s1,output_dim))




  y_1_no=yhat[:,0]
  y_2_no=yhat[:,1]
  y_3_no=yhat[:,2]

  y_1=y_1_no
  y_2=y_2_no
  y_3=y_3_no


  y_test_1=test_o[:,0]
  y_test_2=test_o[:,1]
  y_test_3=test_o[:,2]



  cutoff=6
  fs=200
  order=4

  nyq = 0.5 * fs
  ## filtering data ##
  def butter_lowpass_filter(data, cutoff, fs, order):
      normal_cutoff = cutoff / nyq
      # Get the filter coefficients
      b, a = butter(order, normal_cutoff, btype='low', analog=False)
      y = filtfilt(b, a, data)
      return y



  Z_1=y_1
  Z_2=y_2
  Z_3=y_3



  ###calculate RMSE

  rmse_1 =((np.sqrt(mean_squared_error(y_test_1,y_1))))
  rmse_2 =((np.sqrt(mean_squared_error(y_test_2,y_2))))
  rmse_3 =((np.sqrt(mean_squared_error(y_test_3,y_3))))





  p_1=np.corrcoef(y_1, y_test_1)[0, 1]
  p_2=np.corrcoef(y_2, y_test_2)[0, 1]
  p_3=np.corrcoef(y_3, y_test_3)[0, 1]




              ### Correlation ###
  p=np.array([p_1,p_2,p_3])
  #,p_4,p_5,p_6,p_7])




      #### Mean and standard deviation ####

  rmse=np.array([rmse_1,rmse_2,rmse_3])
  #,rmse_4,rmse_5,rmse_6,rmse_7])

      #### Mean and standard deviation ####
  m=statistics.mean(rmse)
  SD=statistics.stdev(rmse)


  m_c=statistics.mean(p)
  SD_c=statistics.stdev(p)


  if print_losses:
    print(rmse_1)
    print(rmse_2)
    print(rmse_3)
    print("\n")
    print(p_1)
    print(p_2)
    print(p_3)
    print('Mean: %.3f' % m,'+/- %.3f' %SD)
    print('Mean: %.3f' % m_c,'+/- %.3f' %SD_c)

  return rmse, p, Z_1,Z_2,Z_3
  #,Z_4,Z_5,Z_6,Z_7

In [7]:





# @title Model Utils

# Evaluation function
def evaluate_model(device, model, loader, criterion,imu_channels=None,gyro_channels=None):
    """Runs evaluation on the validation or test set."""
    model.eval()
    total_loss = 0.0
    total_pcc = np.zeros(len(config.channels_joints))
    total_rmse = np.zeros(len(config.channels_joints))

    with torch.no_grad():
        for i, (data_acc, data_gyr, target, data_EMG) in enumerate(loader):


            #check which model it is
            if isinstance(model, teacher):
              output= model(data_acc.to(device).float(), data_gyr.to(device).float(), data_EMG.to(device).float())
            elif isinstance(model, student_KD):
              student_data_acc = data_acc[:, :, imu_channels].to(device)
              student_data_gyr = data_gyr[:, :, gyro_channels].to(device)
              output= model(student_data_acc.to(device).float(), student_data_gyr.to(device).float())


            output,knowledge_distillation = output
            loss = criterion(output, target.to(device).float())


            batch_rmse, batch_pcc, _, _, _ = RMSE_prediction(output.detach().cpu().numpy(), target.detach().cpu().numpy(), len(config.channels_joints), print_losses=False)
            total_loss += loss.item()
            total_pcc += batch_pcc
            total_rmse += batch_rmse

    avg_loss = total_loss / len(loader)
    avg_pcc = total_pcc / len(loader)
    avg_rmse = total_rmse / len(loader)

    return avg_loss, avg_pcc, avg_rmse



def save_checkpoint(model, optimizer, epoch, filename, train_loss, val_loss, test_loss=None,
                    channelwise_metrics=None, history=None, curriculum_schedule=None):

    checkpoint = {
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'train_loss': train_loss,
        'val_loss': val_loss,
        'train_channelwise_metrics': channelwise_metrics['train'],
        'val_channelwise_metrics': channelwise_metrics['val'],
    }

    if test_loss is not None:
        checkpoint['test_loss'] = test_loss
        checkpoint['test_channelwise_metrics'] = channelwise_metrics['test']

    # Save the history (losses, PCCs, RMSEs, channel-wise metrics)
    if history:
        checkpoint['history'] = history

    # Save curriculum schedule
    if curriculum_schedule:
        checkpoint['curriculum_schedule'] = curriculum_schedule

    torch.save(checkpoint, filename)
    print(f"Checkpoint saved for epoch {epoch + 1}")



def train_teacher(device, train_loader, val_loader, test_loader, learn_rate, epochs, model, filename, loss_function, optimizer=None, l1_lambda=None, train_from_last_epoch=False, curriculum_loader=None):
    model.to(device)
    criterion = loss_function

    if optimizer is None:
        # Create a default Adam optimizer if none is passed
        optimizer = torch.optim.Adam(model.parameters(), lr=learn_rate)

    train_losses = []
    val_losses = []
    test_losses = []

    train_pccs = []
    val_pccs = []
    test_pccs = []

    train_rmses = []
    val_rmses = []
    test_rmses = []

    train_pccs_channelwise = []
    val_pccs_channelwise = []
    test_pccs_channelwise = []

    train_rmses_channelwise = []
    val_rmses_channelwise = []
    test_rmses_channelwise = []

    # Check for existing checkpoint to resume training
    last_epoch = 0
    checkpoint_path = f"/content/MyDrive/MyDrive/models/{filename}/"

    if train_from_last_epoch and os.path.exists(checkpoint_path):
        # Scan for the latest saved checkpoint
        checkpoints = [f for f in os.listdir(checkpoint_path) if f.endswith('.pth')]
        if checkpoints:
            checkpoints.sort(key=lambda x: int(x.split('_')[-1].split('.')[0]))  # Sort by epoch number
            latest_checkpoint = checkpoints[-1]
            print(f"Loading model from checkpoint: {latest_checkpoint}")
            checkpoint = torch.load(os.path.join(checkpoint_path, latest_checkpoint))
            model.load_state_dict(checkpoint['model_state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
            last_epoch = checkpoint['epoch']  # Continue from the next epoch

            # Load the history from checkpoint
            train_losses = checkpoint['history']['train_losses']
            val_losses = checkpoint['history']['val_losses']
            test_losses = checkpoint['history']['test_losses']
            train_pccs = checkpoint['history']['train_pccs']
            val_pccs = checkpoint['history']['val_pccs']
            test_pccs = checkpoint['history']['test_pccs']
            train_rmses = checkpoint['history']['train_rmses']
            val_rmses = checkpoint['history']['val_rmses']
            test_rmses = checkpoint['history']['test_rmses']
            train_pccs_channelwise = checkpoint['history']['train_pccs_channelwise']
            val_pccs_channelwise = checkpoint['history']['val_pccs_channelwise']
            test_pccs_channelwise = checkpoint['history']['test_pccs_channelwise']
            train_rmses_channelwise = checkpoint['history']['train_rmses_channelwise']
            val_rmses_channelwise = checkpoint['history']['val_rmses_channelwise']
            test_rmses_channelwise = checkpoint['history']['test_rmses_channelwise']
            if 'curriculum_schedule' in checkpoint:
                curriculum_loader.curriculum_schedule = checkpoint['curriculum_schedule']  # Load saved curriculum schedule
        else:
            print("No checkpoints found, starting from scratch.")
    else:
        print("Starting from scratch.")

    start_time = time.time()
    best_val_loss = float('inf')
    patience = 10
    patience_counter = 0

    for epoch in range(last_epoch, epochs):
        epoch_start_time = time.time()
        model.train()

        if curriculum_loader:
            curriculum_loader.update_epoch(epoch)
            train_loader, val_loader, test_loader = curriculum_loader.get_loaders()
        # Track metrics per channel
        epoch_train_loss = np.zeros(len(config.channels_joints))
        epoch_train_pcc = np.zeros(len(config.channels_joints))
        epoch_train_rmse = np.zeros(len(config.channels_joints))

        # Use epoch starting from `epoch + 1` since we want to reflect actual starting epoch correctly
        for i, (data_acc, data_gyr, target, data_EMG) in enumerate(tqdm(train_loader, desc=f"Epoch {epoch + 1}/{epochs} Training")):
            optimizer.zero_grad()

            # Ensure inputs are properly sent to device and are of correct type
            output = model(data_acc.to(device).float(), data_gyr.to(device).float(), data_EMG.to(device).float())


            # Check if output is a tuple, take the first element if true
            if isinstance(model, teacher):
                output,knowledge_distillation = output
                loss = criterion(output, target.to(device).float())

            else:
                loss = criterion(output, target.to(device).float())

            # Compute loss



            # Apply L1 regularization if specified
            if l1_lambda is not None:
                l1_norm = sum(p.abs().sum() for p in model.parameters())
                total_loss = loss + l1_lambda * l1_norm
            else:
                total_loss = loss

            # Backpropagate the gradients for total_loss
            total_loss.backward()
            optimizer.step()

            # Detach tensors and move to CPU to prevent issues with gradient computation
            batch_rmse, batch_pcc, _, _, _ = RMSE_prediction(output.detach().cpu().numpy(), target.detach().cpu().numpy(), len(config.channels_joints), print_losses=False)

            # Accumulate loss, pcc, and rmse without modifying in-place
            epoch_train_loss += loss.detach().cpu().numpy()
            epoch_train_pcc += batch_pcc
            epoch_train_rmse += batch_rmse



        avg_train_loss = epoch_train_loss / len(train_loader)
        avg_train_pcc = epoch_train_pcc / len(train_loader)
        avg_train_rmse = epoch_train_rmse / len(train_loader)

        train_losses.append(avg_train_loss)
        train_pccs.append(np.mean(avg_train_pcc))  # Overall average PCC
        train_rmses.append(np.mean(avg_train_rmse))  # Overall average RMSE

        # Save channel-wise metrics
        train_pccs_channelwise.append(avg_train_pcc)  # Per channel
        train_rmses_channelwise.append(avg_train_rmse)  # Per channel

        # Evaluate on validation set every epoch
        avg_val_loss, avg_val_pcc, avg_val_rmse = evaluate_model(device, model, val_loader, criterion)
        val_losses.append(avg_val_loss)
        val_pccs.append(np.mean(avg_val_pcc))  # Overall average PCC
        val_rmses.append(np.mean(avg_val_rmse))  # Overall average RMSE

        # Save channel-wise metrics
        val_pccs_channelwise.append(avg_val_pcc)  # Per channel
        val_rmses_channelwise.append(avg_val_rmse)  # Per channel

        # Evaluate on test set and checkpoint every epoch
        avg_test_loss, avg_test_pcc, avg_test_rmse = evaluate_model(device, model, test_loader, criterion)
        test_losses.append(avg_test_loss)
        test_pccs.append(np.mean(avg_test_pcc))  # Overall average PCC
        test_rmses.append(np.mean(avg_test_rmse))  # Overall average RMSE

        # Save channel-wise metrics
        test_pccs_channelwise.append(avg_test_pcc)  # Per channel
        test_rmses_channelwise.append(avg_test_rmse)  # Per channel

        print(f"Epoch: {epoch + 1}, Training Loss: {np.mean(avg_train_loss):.4f}, Validation Loss: {np.mean(avg_val_loss):.4f}, Test Loss: {np.mean(avg_test_loss):.4f}")
        print(f"Training RMSE: {np.mean(avg_train_rmse)}, Validation RMSE: {np.mean(avg_val_rmse):.4f}, Test RMSE: {np.mean(avg_test_rmse):.4f}")
        print(f"Training PCC: {np.mean(avg_train_pcc)}, Validation PCC: {np.mean(avg_val_pcc):.4f}, Test PCC: {np.mean(avg_test_pcc):.4f}")

        # Save checkpoint, including curriculum schedule
        if not os.path.exists(checkpoint_path):
            os.makedirs(checkpoint_path)

        # Save checkpoint with the curriculum schedule
        history = {
            'train_losses': train_losses,
            'val_losses': val_losses,
            'test_losses': test_losses,
            'train_pccs': train_pccs,
            'val_pccs': val_pccs,
            'test_pccs': test_pccs,
            'train_rmses': train_rmses,
            'val_rmses': val_rmses,
            'test_rmses': test_rmses,
            'train_pccs_channelwise': train_pccs_channelwise,
            'val_pccs_channelwise': val_pccs_channelwise,
            'test_pccs_channelwise': test_pccs_channelwise,
            'train_rmses_channelwise': train_rmses_channelwise,
            'val_rmses_channelwise': val_rmses_channelwise,
            'test_rmses_channelwise': test_rmses_channelwise
        }

        save_checkpoint(
            model,
            optimizer,
            epoch,
            f"{checkpoint_path}/{filename}_epoch_{epoch + 1}.pth",
            train_loss=avg_train_loss,
            val_loss=avg_val_loss,
            test_loss=avg_test_loss,
            channelwise_metrics={
                'train': {'pcc': avg_train_pcc, 'rmse': avg_train_rmse},
                'val': {'pcc': avg_val_pcc, 'rmse': avg_val_rmse},
                'test': {'pcc': avg_test_pcc, 'rmse': avg_test_rmse},
            },
            history=history,  # Save history in the checkpoint
            curriculum_schedule=curriculum_loader.curriculum_schedule if curriculum_loader else None # Save curriculum schedule
        )

        # Early stopping logic
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            torch.save(model.state_dict(), filename)
            patience_counter = 0
        else:
            patience_counter += 1

        if patience_counter >= patience:
            print(f"Stopping early after {epoch + 1} epochs")
            break

    end_time = time.time()
    print(f"Total training time: {end_time - start_time:.2f} seconds")

    print(f"loading best model from {filename}")
    model.load_state_dict(torch.load(filename))
    model.eval()
    return model, train_losses, val_losses, test_losses, train_pccs, val_pccs, test_pccs, train_rmses, val_rmses, test_rmses






In [8]:
# @title Helper Functions


# Function to create the teacher model with defaults from config
def create_teacher_model(input_acc, input_gyr, input_emg, base_weights_path=None, drop_prob=0.25, w=100):
    model = teacher(input_acc, input_gyr, input_emg, drop_prob=drop_prob, w=w)

    if base_weights_path:
        # Load the initial weights from the base model
        model.load_state_dict(torch.load(base_weights_path))

    return model




In [9]:
import copy

def expand_indices(indices):
    """
    Given a list of indices, expand each index into its corresponding 3-channel range.
    """
    expanded = []
    for idx in indices:
        expanded.extend([idx * 3, idx * 3 + 1, idx * 3 + 2])  # Expand each index into 3 consecutive channels
    return expanded

def train_students(device, students_imu_gyro_channels, teacher_model, train_loader, val_loader, test_loader, learn_rate, epochs, student_model_creator, filename, loss_function, optimizer=None, l1_lambda=None, alpha=0.5, beta=0.5):
    for student_idx, (imu_indices, gyro_indices) in enumerate(students_imu_gyro_channels):
        print(f"Training for student {student_idx + 1} with IMU indices {imu_indices} and Gyro indices {gyro_indices}")

        # Expand the indices into the corresponding 3 channels per index
        imu_channels = expand_indices(imu_indices)  # Automatically expand indices for IMU
        gyro_channels = expand_indices(gyro_indices)  # Automatically expand indices for Gyroscope

        print(f"Expanded IMU channels: {imu_channels}, Expanded Gyro channels: {gyro_channels}")

        # Create a new student model for each student
        student_model = student_model_creator(imu_channels, gyro_channels)
        student_model = student_model.to(device)
        student_optimizer = torch.optim.Adam(student_model.parameters(), lr=learn_rate) if optimizer is None else copy.deepcopy(optimizer)

        # Train the student model
        student_filename = f"{filename}_student_{student_idx + 1}"
        student_model, train_losses, val_losses = train_student(
            device=device,
            alpha=alpha,
            beta=beta,
            train_loader=train_loader,
            val_loader=val_loader,
            learn_rate=learn_rate,
            epochs=epochs,
            student_model=student_model,
            filename=student_filename,
            teacher_model=teacher_model,
            optimizer=student_optimizer,
            loss_function=loss_function,
            imu_channels=imu_channels,
            gyro_channels=gyro_channels,
            l1_lambda=l1_lambda
        )

        print(f"Completed training for student {student_idx + 1}")


def train_student(device, alpha, beta, train_loader, val_loader, learn_rate, epochs, student_model, filename, teacher_model, optimizer, loss_function, imu_channels, gyro_channels, l1_lambda=None):
    criterion_2 = loss_function

    train_losses = []
    val_losses = []
    running_loss = 0
    best_val_loss = float('inf')
    patience = 10
    patience_counter = 0

    for epoch in range(epochs):
        epoch_start_time = time.time()
        student_model.train()

        for i, (data_acc, data_gyr, target, data_EMG) in enumerate(train_loader):
            optimizer.zero_grad()

            # Extract the relevant channels for this student
            student_data_acc = data_acc[:, :, imu_channels].to(device)  # Select channels based on expanded imu_channels
            student_data_gyr = data_gyr[:, :, gyro_channels].to(device)  # Select channels based on expanded gyro_channels

            # Forward pass for student
            student_output, student_features = student_model(student_data_acc, student_data_gyr)

            # Forward pass for teacher
            with torch.no_grad():
                teacher_output, teacher_features = teacher_model(data_acc.to(device), data_gyr.to(device), data_EMG.to(device))

            # Calculate loss
            loss = criterion_2(student_output, target.to(device).float()) \
                   + alpha * criterion_2(student_output, teacher_output) \
                   + beta * criterion_2(student_features, teacher_features)

            # Backpropagation
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        # Calculate average train loss for this epoch
        train_loss = running_loss / len(train_loader)
        train_losses.append(train_loss)

        # Validation
        student_model.eval()
        val_loss = 0
        with torch.no_grad():
            for data_acc, data_gyr, target, data_EMG in val_loader:
                # Extract the relevant channels for validation
                student_data_acc = data_acc[:, :, imu_channels].to(device)
                student_data_gyr = data_gyr[:, :, gyro_channels].to(device)

                output, _ = student_model(student_data_acc, student_data_gyr)
                val_loss += criterion_2(output, target.to(device)).item()

        val_loss /= len(val_loader)
        val_losses.append(val_loss)

        epoch_end_time = time.time()

        print(f"Epoch {epoch + 1}, Training Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}")

        # Reset running loss
        running_loss = 0

        # Save the best model based on validation loss
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(student_model.state_dict(), filename)
            patience_counter = 0
        else:
            patience_counter += 1

        if patience_counter >= patience:
            print(f"Stopping early after {epoch + 1} epochs")
            break

    print(f"Finished training. Model saved at {filename}")
    return student_model, train_losses, val_losses


In [10]:
import matplotlib.pyplot as plt
import numpy as np
import os
import h5py
from tqdm.notebook import tqdm
import pandas as pd
import csv

all_subjects= [f"subject_{x}" for x in range(1,14)]
input_acc, input_gyr, input_emg = 18,18,3
batch_size = 64

# Placeholder for storing best RMSEs and PCCs
best_rmse_per_subject = []
best_pcc_per_subject = []

# Store metrics per teacher and student
metrics_per_teacher_student = {}

students_imu_gyro_channels = [
    ([1], [1]),  # Student 1's IMU and Gyro channels (first 3 sensors)
    ([1,2], [1,2]),  # Student 2's IMU and Gyro channels (next 3 sensors)
    ([1,2,3], [1,2,3]),
    ([1,2,3,4], [1,2,3,4]),
    # Add more students
]


train_flag = True

for test_subject in all_subjects:
    print(f"Running training with {test_subject} as the test subject.")

    # Set up the training subjects (all except the test subject)
    train_subjects = [subject for subject in all_subjects if subject != test_subject]

    model_name = f'TeacherModel_RMSELoss_test_{test_subject}_wl{100}_ol{75}_nbs'
    print(f"Model: {model_name}")

    # Load the model configuration and data loaders for the teacher model
    model_config = {
        'model': create_teacher_model(input_acc, input_gyr, input_emg, w=100),
        'loss': RMSELoss(),
        'loaders': create_normbysub_data_loaders(
            config=config,
            train_subjects=train_subjects,
            test_subjects=[test_subject],
            window_length=100,
            window_overlap=75,
            batch_size=batch_size
        ),
        'epochs': 10,
        'use_curriculum': False
    }

    model = model_config['model']
    loss_function = model_config['loss']
    epochs = model_config.get("epochs", 10)
    device = model_config.get("device", torch.device("cuda" if torch.cuda.is_available() else "cpu"))
    learn_rate = model_config.get("learn_rate", 0.001)
    use_curriculum = model_config.get("use_curriculum", False)

    optimizer = model_config.get("optimizer", None)
    l1_lambda = model_config.get("l1_lambda", None)

    print(f"Running model: {model_name}")

    # Unpack the static loaders tuple (train_loader, val_loader, test_loader)
    train_loader, val_loader, test_loader = model_config['loaders']

    if train_flag:
        # Train the teacher model and save only the best based on validation loss
        model, train_losses, val_losses, test_losses, train_pccs, val_pccs, test_pccs, train_rmses, val_rmses, test_rmses = train_teacher(
            device=device,
            train_loader=train_loader,
            val_loader=val_loader,
            test_loader=test_loader,
            learn_rate=learn_rate,
            epochs=epochs,
            model=model,
            filename=model_name,
            loss_function=loss_function,
            optimizer=optimizer,
            l1_lambda=l1_lambda,
            train_from_last_epoch=False
        )
    else:
        # Load the trained teacher model
        model.load_state_dict(torch.load(f"{model_name}"))
        model.to(device)
        model.eval()

    # Evaluate the teacher model on the test set and record results
    test_loss, test_pcc, test_rmse = evaluate_model(device, model, test_loader, loss_function)
    print(f"Teacher Test Loss: {test_loss:.4f}, Test PCC: {np.mean(test_pcc):.4f}, Test RMSE: {np.mean(test_rmse):.4f}")

    # Store teacher metrics
    metrics_per_teacher_student[f'teacher_{test_subject}'] = {
        'model': model,
        'train_losses': train_losses,
        'val_losses': val_losses,
        'test_losses': test_losses,
        'train_pccs': train_pccs,
        'val_pccs': val_pccs,
        'test_pccs': test_pccs,
        'train_rmses': train_rmses,
        'val_rmses': val_rmses,
        'test_rmses': test_rmses
    }

    # Now, train the student models both with and without knowledge distillation
    for student_idx, (imu_indices, gyro_indices) in enumerate(students_imu_gyro_channels):
        print(f"Training Student {student_idx + 1} for test subject {test_subject}.")

        # Expand IMU and gyro indices to 3 channels each
        imu_channels_expanded = expand_indices(imu_indices)
        gyro_channels_expanded = expand_indices(gyro_indices)

        # Create student models
        student_model_kd = student_KD(len(imu_channels_expanded), len(gyro_channels_expanded)).to(device)
        student_model_no_kd = student_KD(len(imu_channels_expanded), len(gyro_channels_expanded)).to(device) # Deep copy for no KD model

        student_optimizer_kd = torch.optim.Adam(student_model_kd.parameters(), lr=learn_rate)
        student_optimizer_no_kd = torch.optim.Adam(student_model_no_kd.parameters(), lr=learn_rate)

        # Placeholder to store individual student metrics
        student_metrics = {
            'with_kd': {'model': student_model_kd, 'train_losses': [], 'val_losses': [], 'test_losses': [], 'train_pccs': [], 'val_pccs': [], 'test_pccs': [], 'train_rmses': [], 'val_rmses': [], 'test_rmses': []},
            'without_kd': {'model': student_model_no_kd, 'train_losses': [], 'val_losses': [], 'test_losses': [], 'train_pccs': [], 'val_pccs': [], 'test_pccs': [], 'train_rmses': [], 'val_rmses': [], 'test_rmses': []}
        }

        # Train with knowledge distillation
        student_model_kd, student_train_losses_kd, student_val_losses_kd = train_student(
            device=device,
            alpha=0.5,  # Alpha controls the weight for distillation from the teacher's output
            beta=0.5,   # Beta controls the weight for feature matching
            train_loader=train_loader,
            val_loader=val_loader,
            learn_rate=learn_rate,
            epochs=epochs,
            student_model=student_model_kd,
            filename=f'{model_name}_SKD_test_{test_subject}_student_{student_idx+1}',
            teacher_model=model,
            optimizer=student_optimizer_kd,
            loss_function=loss_function,
            imu_channels=imu_channels_expanded,
            gyro_channels=gyro_channels_expanded,
            l1_lambda=l1_lambda
        )

        # Save metrics for student with KD
        student_test_loss_kd, student_test_pcc_kd, student_test_rmse_kd = evaluate_model(device, student_model_kd, test_loader, loss_function,imu_channels_expanded,gyro_channels_expanded)
        student_metrics['with_kd']['train_losses'] = student_train_losses_kd
        student_metrics['with_kd']['val_losses'] = student_val_losses_kd
        student_metrics['with_kd']['test_losses'] = student_test_loss_kd
        student_metrics['with_kd']['train_rmses'] = student_test_rmse_kd
        student_metrics['with_kd']['test_pccs'] = student_test_pcc_kd

        print(f"Student {student_idx + 1} KD Test Loss: {student_test_loss_kd:.4f}, Test PCC: {np.mean(student_test_pcc_kd):.4f}, Test RMSE: {np.mean(student_test_rmse_kd):.4f}")

        # Train without knowledge distillation
        student_model_no_kd, student_train_losses_no_kd, student_val_losses_no_kd = train_student(
            device=device,
            alpha=0.0,  # No distillation from teacher
            beta=0.0,   # No feature matching
            train_loader=train_loader,
            val_loader=val_loader,
            learn_rate=learn_rate,
            epochs=epochs,
            student_model=student_model_no_kd,
            filename=f'StudentModel_no_KD_test_{test_subject}_student_{student_idx+1}',
            teacher_model=model,
            optimizer=student_optimizer_no_kd,
            loss_function=loss_function,
            imu_channels=imu_channels_expanded,
            gyro_channels=gyro_channels_expanded,
            l1_lambda=l1_lambda
        )

        # Save metrics for student without KD
        student_test_loss_no_kd, student_test_pcc_no_kd, student_test_rmse_no_kd = evaluate_model(device, student_model_no_kd, test_loader, loss_function,imu_channels_expanded,gyro_channels_expanded)
        student_metrics['without_kd']['train_losses'] = student_train_losses_no_kd
        student_metrics['without_kd']['val_losses'] = student_val_losses_no_kd
        student_metrics['without_kd']['test_losses'] = student_test_loss_no_kd
        student_metrics['without_kd']['train_rmses'] = student_test_rmse_no_kd
        student_metrics['without_kd']['test_pccs'] = student_test_pcc_no_kd

        print(f"Student {student_idx + 1} No KD Test Loss: {student_test_loss_no_kd:.4f}, Test PCC: {np.mean(student_test_pcc_no_kd):.4f}, Test RMSE: {np.mean(student_test_rmse_no_kd):.4f}")

        # Add student metrics to the main dictionary
        metrics_per_teacher_student[f'teacher_{test_subject}_student_{student_idx+1}'] = student_metrics

    print(f"Finished training for test subject {test_subject}.")



Running training with subject_1 as the test subject.
Model: TeacherModel_RMSELoss_test_subject_1_wl100_ol75_nbs
Sharded data found at /content/datasets/dataset_wl100_ol75_train_2_3_4_5_6_7_8_9_10_11_12_13. Skipping resharding.
Sharded data found at /content/datasets/dataset_wl100_ol0_test_1. Skipping resharding.
Running model: TeacherModel_RMSELoss_test_subject_1_wl100_ol75_nbs
Starting from scratch.


Epoch 1/10 Training:   0%|          | 0/327 [00:00<?, ?it/s]

Epoch: 1, Training Loss: 18.1764, Validation Loss: 10.6894, Test Loss: 17.0923
Training RMSE: 17.694606410132515, Validation RMSE: 10.4181, Test RMSE: 16.3254
Training PCC: 0.7860801328025845, Validation PCC: 0.9419, Test PCC: 0.6899
Checkpoint saved for epoch 1


Epoch 2/10 Training:   0%|          | 0/327 [00:00<?, ?it/s]

Epoch: 2, Training Loss: 10.4374, Validation Loss: 8.5986, Test Loss: 16.6219
Training RMSE: 10.05404296003958, Validation RMSE: 8.3199, Test RMSE: 15.8302
Training PCC: 0.9501034294357483, Validation PCC: 0.9651, Test PCC: 0.7152
Checkpoint saved for epoch 2


Epoch 3/10 Training:   0%|          | 0/327 [00:00<?, ?it/s]

Epoch: 3, Training Loss: 9.0535, Validation Loss: 7.8706, Test Loss: 16.8880
Training RMSE: 8.673678356330084, Validation RMSE: 7.5252, Test RMSE: 16.2529
Training PCC: 0.9641621050575448, Validation PCC: 0.9714, Test PCC: 0.7123
Checkpoint saved for epoch 3


Epoch 4/10 Training:   0%|          | 0/327 [00:00<?, ?it/s]

Epoch: 4, Training Loss: 8.3457, Validation Loss: 7.6371, Test Loss: 18.3592
Training RMSE: 7.987794307879838, Validation RMSE: 7.3770, Test RMSE: 17.1128
Training PCC: 0.9695664817202179, Validation PCC: 0.9752, Test PCC: 0.6979
Checkpoint saved for epoch 4


Epoch 5/10 Training:   0%|          | 0/327 [00:00<?, ?it/s]

Epoch: 5, Training Loss: 7.7698, Validation Loss: 7.2621, Test Loss: 16.3370
Training RMSE: 7.432592129974676, Validation RMSE: 7.0028, Test RMSE: 15.7902
Training PCC: 0.9733849095121565, Validation PCC: 0.9772, Test PCC: 0.7101
Checkpoint saved for epoch 5


Epoch 6/10 Training:   0%|          | 0/327 [00:00<?, ?it/s]

Epoch: 6, Training Loss: 7.3777, Validation Loss: 6.6888, Test Loss: 17.5618
Training RMSE: 7.068099114265404, Validation RMSE: 6.4003, Test RMSE: 16.5897
Training PCC: 0.9757616144711404, Validation PCC: 0.9795, Test PCC: 0.6819
Checkpoint saved for epoch 6


Epoch 7/10 Training:   0%|          | 0/327 [00:00<?, ?it/s]

Epoch: 7, Training Loss: 6.9185, Validation Loss: 6.5706, Test Loss: 17.7941
Training RMSE: 6.657101433576064, Validation RMSE: 6.3235, Test RMSE: 16.9429
Training PCC: 0.9781071046187914, Validation PCC: 0.9798, Test PCC: 0.6779
Checkpoint saved for epoch 7


Epoch 8/10 Training:   0%|          | 0/327 [00:00<?, ?it/s]

Epoch: 8, Training Loss: 6.6413, Validation Loss: 6.3155, Test Loss: 19.8130
Training RMSE: 6.404641700204117, Validation RMSE: 6.1439, Test RMSE: 18.3708
Training PCC: 0.9796323418026862, Validation PCC: 0.9827, Test PCC: 0.6859
Checkpoint saved for epoch 8


Epoch 9/10 Training:   0%|          | 0/327 [00:00<?, ?it/s]

Epoch: 9, Training Loss: 6.4346, Validation Loss: 5.7093, Test Loss: 18.0899
Training RMSE: 6.219590530239965, Validation RMSE: 5.5201, Test RMSE: 17.0820
Training PCC: 0.980899607345343, Validation PCC: 0.9844, Test PCC: 0.6909
Checkpoint saved for epoch 9


Epoch 10/10 Training:   0%|          | 0/327 [00:00<?, ?it/s]

Epoch: 10, Training Loss: 6.1263, Validation Loss: 5.5831, Test Loss: 19.5019
Training RMSE: 5.9292009107684995, Validation RMSE: 5.4312, Test RMSE: 18.4316
Training PCC: 0.9822841279581093, Validation PCC: 0.9856, Test PCC: 0.7172
Checkpoint saved for epoch 10
Total training time: 1255.35 seconds
loading best model from TeacherModel_RMSELoss_test_subject_1_wl100_ol75_nbs


  model.load_state_dict(torch.load(filename))


Teacher Test Loss: 19.5019, Test PCC: 0.7172, Test RMSE: 18.4316
Training Student 1 for test subject subject_1.
Epoch 1, Training Loss: 44.2141, Validation Loss: 21.3041
Epoch 2, Training Loss: 29.2529, Validation Loss: 16.3771
Epoch 3, Training Loss: 24.5822, Validation Loss: 14.1466
Epoch 4, Training Loss: 22.0945, Validation Loss: 14.0474
Epoch 5, Training Loss: 20.4688, Validation Loss: 12.5145
Epoch 6, Training Loss: 19.5848, Validation Loss: 11.4150
Epoch 7, Training Loss: 18.0296, Validation Loss: 11.1813
Epoch 8, Training Loss: 17.3036, Validation Loss: 11.8643
Epoch 9, Training Loss: 16.7350, Validation Loss: 10.0529
Epoch 10, Training Loss: 15.8020, Validation Loss: 9.8162
Finished training. Model saved at TeacherModel_RMSELoss_test_subject_1_wl100_ol75_nbs_SKD_test_subject_1_student_1
Student 1 KD Test Loss: 25.2018, Test PCC: 0.5349, Test RMSE: 23.9801
Epoch 1, Training Loss: 29.7654, Validation Loss: 21.5198
Epoch 2, Training Loss: 19.6518, Validation Loss: 16.2257
Epoch 3

Processing subjects:   0%|          | 0/12 [00:00<?, ?it/s]

Sharded data not found at /content/datasets/dataset_wl100_ol0_test_2. Resharding...
Processing subjects: ['subject_2'] with window length: 100, overlap: 0
Dataset folder: /content/datasets/dataset_wl100_ol0_test_2/test
Dataset folder created:  /content/datasets/dataset_wl100_ol0_test_2/test


Processing subjects:   0%|          | 0/1 [00:00<?, ?it/s]

Running model: TeacherModel_RMSELoss_test_subject_2_wl100_ol75_nbs
Starting from scratch.


Epoch 1/10 Training:   0%|          | 0/328 [00:00<?, ?it/s]

Epoch: 1, Training Loss: 17.4897, Validation Loss: 9.7122, Test Loss: 21.6197
Training RMSE: 17.01839024361556, Validation RMSE: 9.3167, Test RMSE: 19.8339
Training PCC: 0.7978459158258707, Validation PCC: 0.9529, Test PCC: 0.6696
Checkpoint saved for epoch 1


Epoch 2/10 Training:   0%|          | 0/328 [00:00<?, ?it/s]

Epoch: 2, Training Loss: 9.1805, Validation Loss: 7.4130, Test Loss: 22.2873
Training RMSE: 8.859599885901785, Validation RMSE: 7.1065, Test RMSE: 20.5057
Training PCC: 0.9594911839276236, Validation PCC: 0.9720, Test PCC: 0.6801
Checkpoint saved for epoch 2


Epoch 3/10 Training:   0%|          | 0/328 [00:00<?, ?it/s]

Epoch: 3, Training Loss: 7.9130, Validation Loss: 6.7094, Test Loss: 21.9314
Training RMSE: 7.6038662437501, Validation RMSE: 6.3724, Test RMSE: 20.2656
Training PCC: 0.9707861436657335, Validation PCC: 0.9775, Test PCC: 0.6782
Checkpoint saved for epoch 3


Epoch 4/10 Training:   0%|          | 0/328 [00:00<?, ?it/s]

Epoch: 4, Training Loss: 7.0855, Validation Loss: 6.5339, Test Loss: 21.6507
Training RMSE: 6.794724139256206, Validation RMSE: 6.2545, Test RMSE: 20.0632
Training PCC: 0.9765536799969275, Validation PCC: 0.9800, Test PCC: 0.6751
Checkpoint saved for epoch 4


Epoch 5/10 Training:   0%|          | 0/328 [00:00<?, ?it/s]

Epoch: 5, Training Loss: 6.6196, Validation Loss: 6.2343, Test Loss: 22.1801
Training RMSE: 6.3446637379929305, Validation RMSE: 6.0064, Test RMSE: 20.6411
Training PCC: 0.9796201511634011, Validation PCC: 0.9830, Test PCC: 0.6787
Checkpoint saved for epoch 5


Epoch 6/10 Training:   0%|          | 0/328 [00:00<?, ?it/s]

Epoch: 6, Training Loss: 6.4563, Validation Loss: 5.6434, Test Loss: 22.5757
Training RMSE: 6.1824682866170155, Validation RMSE: 5.3547, Test RMSE: 20.9550
Training PCC: 0.9806647642786226, Validation PCC: 0.9848, Test PCC: 0.6661
Checkpoint saved for epoch 6


Epoch 7/10 Training:   0%|          | 0/328 [00:00<?, ?it/s]

Epoch: 7, Training Loss: 5.9176, Validation Loss: 5.5816, Test Loss: 22.1071
Training RMSE: 5.6884978898172465, Validation RMSE: 5.2547, Test RMSE: 20.5149
Training PCC: 0.9835057214208502, Validation PCC: 0.9854, Test PCC: 0.6709
Checkpoint saved for epoch 7


Epoch 8/10 Training:   0%|          | 0/328 [00:00<?, ?it/s]

Epoch: 8, Training Loss: 5.6715, Validation Loss: 5.2267, Test Loss: 21.7744
Training RMSE: 5.450728775524511, Validation RMSE: 4.9513, Test RMSE: 20.1346
Training PCC: 0.9847467569966614, Validation PCC: 0.9872, Test PCC: 0.6814
Checkpoint saved for epoch 8


Epoch 9/10 Training:   0%|          | 0/328 [00:00<?, ?it/s]

Epoch: 9, Training Loss: 5.4989, Validation Loss: 5.3927, Test Loss: 21.3634
Training RMSE: 5.304114878904529, Validation RMSE: 5.1182, Test RMSE: 19.7223
Training PCC: 0.9854049478736947, Validation PCC: 0.9865, Test PCC: 0.6791
Checkpoint saved for epoch 9


Epoch 10/10 Training:   0%|          | 0/328 [00:00<?, ?it/s]

Epoch: 10, Training Loss: 5.3608, Validation Loss: 5.4220, Test Loss: 21.4894
Training RMSE: 5.172516094717553, Validation RMSE: 5.0544, Test RMSE: 19.8506
Training PCC: 0.9861923198467192, Validation PCC: 0.9881, Test PCC: 0.6654
Checkpoint saved for epoch 10
Total training time: 1307.10 seconds
loading best model from TeacherModel_RMSELoss_test_subject_2_wl100_ol75_nbs


  model.load_state_dict(torch.load(filename))


Teacher Test Loss: 21.7744, Test PCC: 0.6814, Test RMSE: 20.1346
Training Student 1 for test subject subject_2.
Epoch 1, Training Loss: 43.0080, Validation Loss: 20.6187
Epoch 2, Training Loss: 28.5050, Validation Loss: 16.8231
Epoch 3, Training Loss: 23.7943, Validation Loss: 13.8835
Epoch 4, Training Loss: 20.9422, Validation Loss: 12.7940
Epoch 5, Training Loss: 19.2398, Validation Loss: 11.8230
Epoch 6, Training Loss: 17.9843, Validation Loss: 11.0043
Epoch 7, Training Loss: 17.3145, Validation Loss: 10.9618
Epoch 8, Training Loss: 16.3626, Validation Loss: 10.0257
Epoch 9, Training Loss: 15.7505, Validation Loss: 9.8220
Epoch 10, Training Loss: 15.1740, Validation Loss: 10.0354
Finished training. Model saved at TeacherModel_RMSELoss_test_subject_2_wl100_ol75_nbs_SKD_test_subject_2_student_1
Student 1 KD Test Loss: 26.1785, Test PCC: 0.5777, Test RMSE: 23.8957
Epoch 1, Training Loss: 28.9310, Validation Loss: 21.3722
Epoch 2, Training Loss: 19.1891, Validation Loss: 16.3438
Epoch 3


KeyboardInterrupt



In [None]:

average_best_rmse = np.mean(best_rmse_per_subject)
average_best_pcc = np.mean(best_pcc_per_subject)
print(f"Average of best RMSEs across all subjects: {average_best_rmse:.4f}")
print(f"Average of best PCCs across all subjects: {average_best_pcc:.4f}")
print(best_rmse_per_subject)
print(best_pcc_per_subject)

# subjects = [f'Subject {i+1}' for i in range(len(best_rmse_per_subject))]

# print(best_rmse_per_subject)
# # Plot a bar chart with subject labels on the x-axis
# plt.figure(figsize=(10, 6))
# plt.bar(subjects, best_rmse_per_subject, color='blue', edgecolor='black')
# plt.title('Best RMSEs for Each Subject')
# plt.xlabel('Subjects')
# plt.ylabel('Best RMSE')
# plt.xticks(rotation=45, ha='right')
# plt.grid(True, axis='y')
# plt.tight_layout()
# plt.show()

In [None]:
import os
import zipfile
from datetime import datetime

notebook_name = 'regression_benchmark_normalizebysubject_withstudents'

# Create a timestamped folder name based on the notebook name
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
folder_name = f"{notebook_name}_checkpoints_{timestamp}"

# Make sure the folder exists
os.makedirs(folder_name, exist_ok=True)

checkpoint_dir = '.'

# Zip all checkpoint files and save in the new folder
zip_filename = f"{folder_name}.zip"
with zipfile.ZipFile(zip_filename, 'w') as zipf:
    # List files only in the current directory (no subfolders)
    for file in os.listdir(checkpoint_dir):
        if "TeacherModel" in str(file):
          file_path = os.path.join(checkpoint_dir, file)
          zipf.write(file_path, os.path.relpath(file_path, checkpoint_dir))
          print(f"Checkpoint {file} has been added to the zip file.")
print(f"All checkpoints have been zipped and saved as {zip_filename}.")




In [None]:
#copy zip file into google drive
import shutil

destination_path = '/content/MyDrive/MyDrive/models'

shutil.copy(zip_filename, destination_path)

In [None]:
import pickle
from google.colab import files
# Save the dictionary to a file
with open(f"{notebook_name}.pkl", 'wb') as f:
    pickle.dump(metrics_per_teacher_student, f)

print(f"Dictionary saved to {notebook_name}.pkl")

# Download the file
files.download(f"{notebook_name}.pkl")
