In [1]:
from google.colab import drive
import os

drive.mount('/content/drive')
active_directory = '/content/drive/MyDrive/Desktop/DP_Finetuning_Harnet_Submission'
os.chdir(active_directory)

Mounted at /content/drive


## IMPORT LIBRARIES

In [2]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import json
import pickle
from warnings import filterwarnings
from pandas.errors import SettingWithCopyWarning
import copy

filterwarnings("ignore", category=SettingWithCopyWarning)
filterwarnings('ignore', category=UserWarning)

## READ SUBJECT FILES

Each of the data-files contains 54 columns per row, the columns contain the following data:

-  1 timestamp (s)
- 2 activityID (see II.2. for the mapping to the activities)
- 3 heart rate (bpm)
- 4-20 IMU hand
- 21-37 IMU chest
- 38-54 IMU ankle

The IMU sensory data contains the following columns:

- 1 temperature (°C)
- 2-4 3D-acceleration data (ms-2), scale: ±16g, resolution: 13-bit
-  5-7 3D-acceleration data (ms-2), scale: ±6g, resolution: 13-bit*
- 8-10 3D-gyroscope data (rad/s)
- 11-13 3D-magnetometer data (μT)
- 14-17 orientation (invalid in this data collection)

16g acceleration hand -> indexes [4, 5, 6]

timestamp -> [0]

activity_id -> [1]

In [3]:
def read_subject_file(file_path:str)->pd.DataFrame:
    subject_name = file_path.split('/')[-1].rstrip('.dat')
    subject = pd.read_table(file_path, header=None, sep='\s+')
    return subject, subject_name

class DataProcessor:
  def __init__(self, subject_dataframe:pd.DataFrame, subject_name:str):
    self.subject_dataframe = subject_dataframe
    self.subject_name = subject_name
    self.acc_x_col = 'acc_x'
    self.acc_y_col = 'acc_y'
    self.acc_z_col = 'acc_z'
    self.timestamp = 'timestamp'
    self.activity_id = 'activity_id'

  def _extract_data(self)->pd.DataFrame:
    """
    Extract the relevant columns from the data
    """
    # timestamp, activity_id, 16g_acc_x, 16g_acc_y, 16g_acc_z
    raw_data = self.subject_dataframe.iloc[:, [0, 1, 4, 5, 6]]
    raw_data.columns = [self.timestamp , self.activity_id , self.acc_x_col, self.acc_y_col, self.acc_z_col]
    return raw_data

  def _handle_missing_values(self, data:pd.DataFrame)->pd.DataFrame:
    """
    Used linear interpolation for the acceleration data
    """
    data[self.acc_x_col] = data[self.acc_x_col].interpolate(method='linear', limit_direction='both')
    data[self.acc_y_col] = data[self.acc_y_col].interpolate(method='linear', limit_direction='both')
    data[self.acc_z_col] = data[self.acc_z_col].interpolate(method='linear', limit_direction='both')
    return data

  def sorted_timestamps(self, data:pd.DataFrame)->pd.DataFrame:
    """
    Sort the timestamps in ascending order
    """
    data = data.sort_values(by=self.timestamp, ascending=True).reset_index(drop=True)
    return data


  def downsample_from_100_to_30hz(self, data:pd.DataFrame)->pd.DataFrame:
    """
    Downsample the data from 100Hz to 30Hz
    """

    data_copy = data.copy()
    data_copy.set_index(self.timestamp, inplace=True)
    data_copy.index = pd.to_timedelta(data_copy.index, unit="s")
    data_copy = data_copy.resample('0.0333s').agg({self.activity_id: lambda x: x.mode()[0] if len(x.mode()) > 0 else x.iloc[0],
                                        self.acc_x_col:"mean",
                                        self.acc_y_col:"mean",
                                        self.acc_z_col:"mean"}).reset_index()
    return data_copy


def activity_mapping(value:int):
  mapping_dict = {
        0: "other",
        1: "lying",
        2: "sitting",
        3: "standing",
        4: "walking",
        5: "running",
        6: "cycling",
        7: "Nordic walking",
        9: "watching TV",
        10: "computer work",
        11: "car driving",
        12: "ascending stairs",
        13: "descending stairs",
        16: "vacuum cleaning",
        17: "ironing",
        18: "folding laundry",
        19: "house cleaning",
        20: "playing soccer",
        24: "rope jumping"
   }
  return mapping_dict.get(value, 'Unknown')


def create_sliding_windows(dataframe, window_length_sec, sampling_rate_hz, step_duration_sec):
  import scipy.stats as stats


  N_FEATURES = 3
  window_length_samples = int(window_length_sec * sampling_rate_hz)
  step_duration_samples = int(step_duration_sec * sampling_rate_hz)
  windows = []
  labels = []
  for i in range(0, len(dataframe)-window_length_samples, step_duration_samples):
    x = dataframe['acc_x'].values[i: i + window_length_samples]
    y = dataframe['acc_y'].values[i: i + window_length_samples]
    z = dataframe['acc_z'].values[i: i + window_length_samples]
    window = np.array([x, y, z])
    #label = stats.mode(dataframe['activity_id'][i: i + window_length_samples])[0]
    label = dataframe['activity_id'][i: i + window_length_samples]

    windows.append(window)
    labels.append(label)

  windows = np.asarray(windows).reshape(-1, N_FEATURES, window_length_samples, )
  labels = np.asarray(labels)

  return windows, labels



def clean_window_labels(window_X, window_y):
  """
  Remove the windows that have activity id = 0 ratio > 0.5
  """
  clean_window_X = []
  clean_window_y = []

  for i in range(len(window_y)):
    if np.sum(window_y[i] == 0) / len(window_y[i]) < 0.5:
      clean_window_X.append(window_X[i])
      clean_window_y.append(window_y[i])

  clean_window_X = np.array(clean_window_X)
  clean_window_y = np.array(clean_window_y)

  return clean_window_X, clean_window_y


def majority_voting(window_y):
  """
  Most frequent activity id in a window
  """
  from scipy import stats as st

  major_window_y = st.mode(window_y, axis=1).mode
  return major_window_y


def reshaped_windows(window_X, window_y):
  """
  Reshape the windows to fit the model (n_windows, n_features, n_timestamps)
  """
  window_X = window_X.reshape(window_X.shape[0], window_X.shape[2], window_X.shape[1])
  window_y = window_y.reshape(window_y.shape[0], )
  return window_X, window_y


def activity_filter(window_X, window_y):
  """
  Filtering the activities that everybody does. (1, 2, 3, 4, 12, 13, 16, 17)
  """
  filtered_window_X = []
  filtered_window_y = []

  valid_indices = (
            (window_y == 1) | (window_y == 2) | (window_y == 3) | (window_y == 4) | (window_y == 12) | (window_y == 13) | (window_y == 16) | (window_y == 17)
        )

  filtered_window_X = window_X[valid_indices]
  filtered_window_y = window_y[valid_indices]

  return filtered_window_X, filtered_window_y



In [4]:
file_dir = 'Protocol'
subject_raw_files = os.listdir(file_dir)

In [5]:
subject_arr_Xs = []
subject_arr_ys = []
subject_names = []

window_length_sec = 10
step_duration_sec = 5
sampling_rate_hz = 30
overlap_ratio = round((100*(window_length_sec-step_duration_sec)/window_length_sec), 2)


print(f'WINDOW LENGTH in SAMPLES: {int(window_length_sec * sampling_rate_hz)}')
print(f'STEP DURATION in SAMPLES: {int(step_duration_sec * sampling_rate_hz)}')
print(f'OVERLAPPING WINDOW RATIO: {overlap_ratio}%')


for f in subject_raw_files:
  file_path = os.path.join(file_dir, f)
  subject, subject_name = read_subject_file(file_path)
  if subject_name != 'subject109':
    print('-----------------------------------------------------------')
    print(f'Data preprocessing is starting for {subject_name}...')
    processor = DataProcessor(subject, subject_name)
    raw_data = processor._extract_data()

    subject_df = processor._handle_missing_values(raw_data)   # Missing axes values are filled by applying linear interpolation
    subject_df = processor.sorted_timestamps(subject_df)    # Update if the timestamps is not ascending
    downsampled_subject_df = processor.downsample_from_100_to_30hz(subject_df)    # Downsample the data from 100Hz to 30Hz

    win_X, win_y = create_sliding_windows(downsampled_subject_df, window_length_sec=window_length_sec, sampling_rate_hz=sampling_rate_hz, step_duration_sec=step_duration_sec)
    clean_win_X, clean_win_y = clean_window_labels(win_X, win_y)    # Remove the windows that have activity id = 0 ratio > 0.5
    major_win_y = majority_voting(clean_win_y)    # Majority voting for the labels in a window
    #reshaped_windows_X, reshaped_window_y = reshaped_windows(clean_win_X, major_win_y)    # Eliminated
    filtered_window_X, filtered_window_y = activity_filter(clean_win_X, major_win_y)   # Filtering the activities that everybody does. (1, 2, 3, 4, 12, 13, 16, 17)

    print(f'Final remaining shapes X: {filtered_window_X.shape}, y: {filtered_window_y.shape}')

    subject_arr_Xs.append(filtered_window_X)
    subject_arr_ys.append(filtered_window_y)
    subject_names.append(subject_name)


all_subject_infos = list(zip(subject_names, subject_arr_Xs, subject_arr_ys))

WINDOW LENGTH in SAMPLES: 300
STEP DURATION in SAMPLES: 150
OVERLAPPING WINDOW RATIO: 50.0%
-----------------------------------------------------------
Data preprocessing is starting for subject101...
Final remaining shapes X: (345, 3, 300), y: (345,)
-----------------------------------------------------------
Data preprocessing is starting for subject102...
Final remaining shapes X: (373, 3, 300), y: (373,)
-----------------------------------------------------------
Data preprocessing is starting for subject103...
Final remaining shapes X: (348, 3, 300), y: (348,)
-----------------------------------------------------------
Data preprocessing is starting for subject104...
Final remaining shapes X: (364, 3, 300), y: (364,)
-----------------------------------------------------------
Data preprocessing is starting for subject105...
Final remaining shapes X: (379, 3, 300), y: (379,)
-----------------------------------------------------------
Data preprocessing is starting for subject106...

In [6]:
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, TensorDataset

def get_pretrained_harnet(class_num, model_name = 'harnet10'):
    repo = 'OxWearables/ssl-wearables'
    model = torch.hub.load(repo, model_name, class_num=class_num, pretrained=True, force_reload=True)
    return model

def train(model, train_loader, optimizer, criterion, epoch, device, is_dp=False, privacy_engine=None, delta=None):
  model.train()
  total_loss = 0
  correct = 0
  total_samples = 0
  for batch_idx, (data, target) in enumerate(train_loader):
      data, target = data.to(device), target.to(device)
      optimizer.zero_grad()
      output = model(data)
      loss = criterion(output, target)
      loss.backward()
      optimizer.step()
      total_loss += loss.item() * data.size(0)
      pred = output.argmax(dim=1, keepdim=True)
      correct += pred.eq(target.view_as(pred)).sum().item()
      total_samples += data.size(0)
      if batch_idx % 10 == 0:
          if is_dp and privacy_engine:
              epsilon = privacy_engine.get_epsilon(delta)
              print(f"Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} "
                    f"({100. * batch_idx / len(train_loader):.0f}%)]\t"
                    f"Loss: {loss.item():.6f}\tEpsilon: {epsilon:.2f}")
          else:
              print(f"Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} "
                    f"({100. * batch_idx / len(train_loader):.0f}%)]\t"
                    f"Loss: {loss.item():.6f}")

  avg_loss = total_loss / total_samples
  accuracy = 100. * correct / total_samples
  print(f"Epoch {epoch} - Training: Average loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%")
  return avg_loss, accuracy


def evaluate(model, test_loader, criterion, device):
  model.eval()
  test_loss = 0
  correct = 0
  all_preds = []
  all_targets = []
  with torch.no_grad():
      for data, target in test_loader:
          data, target = data.to(device), target.to(device)
          output = model(data)
          test_loss += criterion(output, target).item() * data.size(0)
          pred = output.argmax(dim=1, keepdim=True)
          correct += pred.eq(target.view_as(pred)).sum().item()
          all_preds.extend(pred.cpu().numpy())
          all_targets.extend(target.cpu().numpy())
  test_loss /= len(test_loader.dataset)
  accuracy = 100. * correct / len(test_loader.dataset)
  f1 = f1_score(all_targets, all_preds, average='macro', zero_division=0)
  print(f"Evaluation set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} "
        f"({accuracy:.2f}%), F1-score: {f1:.4f}\n")
  return test_loss, accuracy, f1

def predict(model, data_loader, device):
  model.eval()
  all_preds = []
  with torch.no_grad():
      for data, _ in data_loader:
          data = data.to(device)
          output = model(data)
          pred = output.argmax(dim=1, keepdim=True)
          all_preds.extend(pred.cpu().numpy())
  return all_preds

def predict_proba(model, data_loader, device):
    model.eval()
    all_probs = []
    with torch.no_grad():
        for data, _ in data_loader:
            data = data.to(device)
            output = model(data)
            # Apply softmax to get probabilities
            probs = torch.softmax(output, dim=1)
            all_probs.extend(probs.cpu().numpy())
    return all_probs

class CustomScaler:
  """
  A wrapper for scikit-learn's StandardScaler that handles both 2D and 3D NumPy arrays.
  For 3D data, it reshapes to 2D, scales, and then reshapes back.
  """
  def __init__(self):
      self._scaler = StandardScaler()
      self._is_fitted = False
      self._original_input_dims = None
  def fit(self, data: np.ndarray):
      self._original_input_dims = data.ndim
      if self._original_input_dims == 2:
          self._scaler.fit(data)
      elif self._original_input_dims == 3:
          n_samples, n_timesteps, n_features = data.shape
          reshaped_data = data.reshape((n_samples * n_timesteps, n_features))
          self._scaler.fit(reshaped_data)
      else:
          raise ValueError("Input data must have 2 or 3 dimensions for scaling.")
      self._is_fitted = True
      return self
  def fit_transform(self, data: np.ndarray) -> np.ndarray:
      self._original_input_dims = data.ndim

      if self._original_input_dims == 2:
          scaled_data = self._scaler.fit_transform(data)
      elif self._original_input_dims == 3:
          n_samples, n_timesteps, n_features = data.shape
          # Reshape 3D data to 2D for scaling
          reshaped_data = data.reshape((n_samples * n_timesteps, n_features))
          scaled_reshaped_data = self._scaler.fit_transform(reshaped_data)
          # Reshape scaled data back to original 3D shape
          scaled_data = scaled_reshaped_data.reshape((n_samples, n_timesteps, n_features))
      else:
          raise ValueError("Input data must have 2 or 3 dimensions for scaling.")

      self._is_fitted = True
      return scaled_data

  def transform(self, data: np.ndarray) -> np.ndarray:
    if not self._is_fitted:
      raise RuntimeError("Scaler has not been fitted. Call fit_transform first.")
    if data.ndim != self._original_input_dims:
      raise ValueError(f"Input data has {data.ndim} dimensions, but scaler was fitted on "
                            f"data with {self._original_input_dims} dimensions.")
    if self._original_input_dims == 2:
        scaled_data = self._scaler.transform(data)
    elif self._original_input_dims == 3:
        n_samples, n_timesteps, n_features = data.shape
        reshaped_data = data.reshape((n_samples * n_timesteps, n_features))
        scaled_reshaped_data = self._scaler.transform(reshaped_data)
        scaled_data = scaled_reshaped_data.reshape((n_samples, n_timesteps, n_features))
    else:
        raise ValueError("Input data must have 2 or 3 dimensions.")

    return scaled_data

def get_data_loader(X_test, y_test, BATCH_SIZE, shuffle=False):
  X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
  y_test_tensor = torch.tensor(y_test, dtype=torch.long)
  test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
  test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=shuffle)
  return test_loader

class EarlyStopping:
  def __init__(self, patience=5, delta=0, verbose=False):
      self.patience = patience
      self.delta = delta
      self.best_score = None
      self.early_stop = False
      self.counter = 0
      self.best_model_state = None
      self.verbose = verbose
      self.best_epoch = 0
  def __call__(self, val_loss, model, epoch):
      score = -val_loss
      if self.best_score is None:
          self.best_score = score
          self.best_model_state = copy.deepcopy(model.state_dict())
          self.best_epoch = epoch
      elif score < self.best_score + self.delta:
          self.counter += 1
          if self.verbose:
              print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
          if self.counter >= self.patience:
              self.early_stop = True
      else:
          self.best_score = score
          self.best_model_state = copy.deepcopy(model.state_dict())
          self.best_epoch = epoch
          self.counter = 0

  def load_best_model(self, model):
      if self.best_model_state:
          model.load_state_dict(self.best_model_state)



def plot_distribution(arr:np.ndarray, title:str):
  pd.Series(arr).value_counts(normalize=True).plot(kind='bar')
  plt.title(title)
  plt.show()


In [7]:
import pandas as pd
import plotly.graph_objects as go

def fold_plot(fold_train_accuracies, fold_val_accuracies, val_subjects):
  import matplotlib.pyplot as plt
  import math
  # Get the list of train and val accuracies
  acc_list_zipped = list(zip(fold_train_accuracies, fold_val_accuracies, val_subjects))

  # Get the total number of folds to plot
  num_folds = len(acc_list_zipped)

  # --- Subplot Layout Calculation ---
  ncols = 2
  nrows = math.ceil(num_folds / ncols)

  # --- Create the Figure and Subplots ---
  fig, axes = plt.subplots(nrows, ncols, figsize=(14, 5 * nrows))
  fig.suptitle('Training vs. Validation Accuracy Across Folds', fontsize=16, y=1.02)
  axes = axes.flatten()


  # --- Loop and Plot on Each Subplot ---
  for i, (tr_acc, vl_acc, vl_subjects) in enumerate(acc_list_zipped):
      ax = axes[i] # Get the current axis
      ax.plot(tr_acc, label='Train Accuracy', color='royalblue')
      ax.plot(vl_acc, label='Validation Accuracy', color='darkorange')
      ax.set_title(f'Fold: {i + 1} for  Validation Subjects {vl_subjects}')
      ax.set_xlabel('Epoch')
      ax.set_ylabel('Accuracy')
      ax.legend()
      ax.grid(True, linestyle='--', alpha=0.6)

  # --- Clean Up and Display ---
  # If the number of folds is odd, the last subplot in the grid will be empty.
  # This loop hides any unused subplots.
  for i in range(num_folds, len(axes)):
      axes[i].axis('off')

  # Adjusts subplot params so that subplots are nicely fit in the figure.
  fig.tight_layout(rect=[0, 0, 1, 0.98])

  return fig


def plot_epochs(train_accs, val_accs, val_subj):
  import matplotlib.pyplot as plt
  epochs = np.arange(1, len(train_accs) + 1)
  fig, ax = plt.subplots()
  ax.plot(epochs, train_accs, label='Train Accuracy', color='royalblue')
  ax.plot(epochs, val_accs, label='Validation Accuracy', color='darkorange')
  ax.set_title(f'Training vs. Validation Accuracy for Validation Subjects {val_subj}')
  ax.set_xlabel('Epoch')
  ax.set_ylabel('Accuracy')
  ax.set_xticks(epochs)
  ax.legend()
  ax.grid(True, linestyle='--', alpha=0.6)
  return fig


def plot_roc_curve(y_score, y_true, noise_scale=None):
  from sklearn.metrics import roc_curve
  import matplotlib.pyplot as plt

  fpr, tpr, _ = roc_curve(y_score=y_score, y_true=y_true)
  plt.figure()
  plt.plot(fpr, tpr, color="darkorange", linewidth =2, label="ROC curve")
  plt.plot([0, 1], [0, 1], color="navy", linewidth =2, linestyle="--", label='No skills')
  plt.xlim([0.0, 1.0])
  plt.ylim([0.0, 1.0])
  plt.xlabel("False Positive Rate")
  plt.ylabel("True Positive Rate")
  if noise_scale:
    plt.title(f"Attack Model ROC Curve for DP-SGD Model with Noise Scale: {noise_scale}")
  else:
    plt.title("Attack Model ROC Curve for Non-Private Baseline")
  plt.legend(loc="lower right")
  plt.grid(True)
  plt.show()


def plot_confusion_heatmap(
    df: pd.DataFrame,
    true_col: str = 'True',
    pred_col: str = 'Preds',
    normalize: str = 'true',     # 'true', 'pred', or 'all'
    title: str = 'Confusion Matrix'
) -> go.Figure:
    """
    Compute and plot a (possibly normalized) confusion matrix as a Plotly heatmap.

    Parameters
    ----------
    df : pd.DataFrame
      Contains the columns `true_col` and `pred_col`.
    true_col, pred_col : str
      Names of the ground truth and prediction columns.
    normalize : {'true','pred','all', None}
      How to normalize the counts.
      - 'true' divides each row by its sum (per-class recall).
      - 'pred' divides each column by its sum (per-class precision).
      - 'all' divides by grand total (global frequency).
      - None means raw counts.
    title : str
      The plot title.

    Returns
    -------
    fig : go.Figure
    """
    # 1) build raw confusion matrix
    true_labs = sorted(df[true_col].unique())
    pred_labs = sorted(df[pred_col].unique())

    cm = (
      pd.crosstab(df[true_col], df[pred_col])
        .reindex(index=true_labs, columns=pred_labs, fill_value=0)
    )

    # 2) normalize if requested
    if normalize == 'true':
        cm_norm = cm.divide(cm.sum(axis=1), axis=0)
    elif normalize == 'pred':
        cm_norm = cm.divide(cm.sum(axis=0), axis=1)
    elif normalize == 'all':
        cm_norm = cm / cm.values.sum()
    else:
        cm_norm = cm.astype(float)

    # 3) decide annotation text and hover info
    if normalize is None:
        z = cm.values
        text = cm.values
        hover = 'Count: %{z}'
        colorbar_title = 'Count'
        texttemplate = '%{text}'
    else:
        z = cm_norm.values
        # show percentages with one decimal, e.g. "17.3 %"
        text = (cm_norm.values * 100).round(1)
        hover = 'Fraction: %{z:.2f}' if normalize=='all' else 'Pct: %{z:.2%}'
        colorbar_title = 'Fraction' if normalize=='all' else 'Pct'
        texttemplate = '%{text:.1f}%'

    # 4) build the heatmap
    hm = go.Heatmap(
        z=z,
        x=pred_labs,
        y=true_labs,
        colorscale='Blues',
        colorbar=dict(title=colorbar_title),
        text=text,
        texttemplate=texttemplate,
        hovertemplate=(
            'True: %{y}<br>'
            'Pred: %{x}<br>' + hover +
            '<extra></extra>'
        )
    )

    fig = go.Figure(hm)
    fig.update_layout(
        title=title,
        xaxis_title='Predicted activities',
        yaxis_title='True activities',
        xaxis_tickangle=45,
        width=700, height=700,
    )

    return fig


def remove_module_prefix(state_dict):
    return {k.replace("_module.", ""): v for k, v in state_dict.items()}


## GETTING THE TEST SUBJECTS (Subject102, Subject103)

In [8]:
import datetime as dt
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedGroupKFold, LeaveOneGroupOut
import torch.nn as nn
import copy
from itertools import product

torch.manual_seed(42)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

reports_save = True

Test_Subjects_X = []
Test_Subjects_y = []
Test_Subjects_subject_groups = []


for sb_name, Xs, ys in all_subject_infos:
  if sb_name in ['subject102','subject103']:           # Test subject for predictions
    Test_Subjects_X.extend(Xs)
    Test_Subjects_y.extend(ys)
    Test_Subjects_subject_groups.extend([sb_name] * len(ys))

Test_Subjects_X = np.array(Test_Subjects_X)
Test_Subjects_y = np.array(Test_Subjects_y)
Test_Subjects_subject_groups = np.array(Test_Subjects_subject_groups)

print(f'Test Set X shape: {Test_Subjects_X.shape}, y shape: {Test_Subjects_y.shape}, Subjects : {", ".join(np.unique(Test_Subjects_subject_groups))}')


Test Set X shape: (721, 3, 300), y shape: (721,), Subjects : subject102, subject103


In [9]:
# Loading Original Pre-trained Model

N_CLASSES = len(np.unique(Test_Subjects_y))
original_model = get_pretrained_harnet(class_num=N_CLASSES)

Downloading: "https://github.com/OxWearables/ssl-wearables/zipball/main" to /root/.cache/torch/hub/main.zip


131 Weights loaded


### Non-Private Baseline Model

In [10]:
saved_model_dir = 'attack_results_noDP/final_models/'

last_vers = max([f.split('_')[-1] for f in os.listdir(saved_model_dir) if f.startswith('final_model_') and f.endswith('.pth')]).rstrip('.pth')
final_base_model_path = [m for m in os.listdir(saved_model_dir) if m.startswith('final_model_') and m.endswith('.pth') and m.split('_')[-1].rstrip('.pth') == last_vers][0]
final_base_scaler_path = [m for m in os.listdir(saved_model_dir) if m.startswith('final_scaler_') and m.endswith('.pkl') and m.split('_')[-1].rstrip('.pkl') == last_vers][0]
final_base_label_encoder_path = [m for m in os.listdir(saved_model_dir) if m.startswith('final_label_encoder_') and m.endswith('.pkl') and m.split('_')[-1].rstrip('.pkl') == last_vers][0]

BATCH_SIZE = 32

final_scaler_base = pickle.load(open(os.path.join(saved_model_dir, final_base_scaler_path), 'rb'))
final_le_base = pickle.load(open(os.path.join(saved_model_dir, final_base_label_encoder_path), 'rb'))

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

final_base_model = copy.deepcopy(original_model)
base_state_dict = torch.load(os.path.join(saved_model_dir, final_base_model_path), weights_only=True, map_location='cpu')
final_base_model.load_state_dict(remove_module_prefix(base_state_dict))
final_base_model.to(device)

scaled_test_X = final_scaler_base.transform(Test_Subjects_X)
encoded_test_y = final_le_base.transform(Test_Subjects_y)

test_data_loader = get_data_loader(scaled_test_X, encoded_test_y, BATCH_SIZE=BATCH_SIZE, shuffle=False)

preds_base = predict(final_base_model, test_data_loader, device)

preds_base = np.array(preds_base)
preds_base = final_le_base.inverse_transform(preds_base)

test_loss_base, test_acc_base, test_f1_base = evaluate(final_base_model, test_data_loader, nn.CrossEntropyLoss(), device)

pred_acts_base = pd.Series(preds_base).apply(activity_mapping)
true_acts_base = pd.Series(Test_Subjects_y).apply(activity_mapping)

comparison_base_df = pd.DataFrame({
    "Preds": pred_acts_base,
    "True": true_acts_base
})


Evaluation set: Average loss: 1.3445, Accuracy: 523/721 (72.54%), F1-score: 0.7021



#### DP-SGD (Noise Scale=1)

In [11]:
saved_model_dir = 'attack_results_DP/final_models/'

last_vers = max([f.split('_')[-1] for f in os.listdir(saved_model_dir) if f.startswith('final_model_1_') and f.endswith('.pth')]).rstrip('.pth')
final_dp_model_path = [m for m in os.listdir(saved_model_dir) if m.startswith('final_model_1_') and m.endswith('.pth') and m.split('_')[-1].rstrip('.pth') == last_vers][0]
final_dp_scaler_path = [m for m in os.listdir(saved_model_dir) if m.startswith('final_scaler_1_') and m.endswith('.pkl') and m.split('_')[-1].rstrip('.pkl') == last_vers][0]
final_dp_label_encoder_path = [m for m in os.listdir(saved_model_dir) if m.startswith('final_label_encoder_1_') and m.endswith('.pkl') and m.split('_')[-1].rstrip('.pkl') == last_vers][0]


BATCH_SIZE = 32

final_scaler_dp = pickle.load(open(os.path.join(saved_model_dir, final_dp_scaler_path), 'rb'))
final_le_dp = pickle.load(open(os.path.join(saved_model_dir, final_dp_label_encoder_path), 'rb'))

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# True Members
final_dp_model = copy.deepcopy(original_model)
dp_state_dict = torch.load(os.path.join(saved_model_dir, final_dp_model_path), weights_only=True, map_location='cpu')
final_dp_model.load_state_dict(remove_module_prefix(dp_state_dict))
final_dp_model.to(device)

scaled_test_X_dp = final_scaler_dp.transform(Test_Subjects_X)
encoded_test_y_dp = final_le_dp.transform(Test_Subjects_y)

test_data_loader_dp = get_data_loader(scaled_test_X_dp, encoded_test_y_dp, BATCH_SIZE=BATCH_SIZE, shuffle=False)

preds_dp = predict(final_dp_model, test_data_loader_dp, device)

preds_dp = np.array(preds_dp)
preds_dp = final_le_dp.inverse_transform(preds_dp)

test_loss_dp, test_acc_dp, test_f1_dp = evaluate(final_dp_model, test_data_loader_dp, nn.CrossEntropyLoss(), device)

pred_acts_dp = pd.Series(preds_dp).apply(activity_mapping)
true_acts_dp = pd.Series(Test_Subjects_y).apply(activity_mapping)

comparison_dp_df = pd.DataFrame({
    "Preds": pred_acts_dp,
    "True": true_acts_dp
})

Evaluation set: Average loss: 2.2889, Accuracy: 427/721 (59.22%), F1-score: 0.5614



## VISUALIZATION

### Non-Private Baseline (Noise Scale = 0)

In [12]:
fig_base = plot_confusion_heatmap(comparison_base_df, normalize='true', title='Per-class Recall for Non-Private Baseline (σ=0)')
fig_base.show()

### DP-SGD (Noise Scale = 1)

In [13]:
fig_dp = plot_confusion_heatmap(comparison_dp_df, normalize='true', title='Per-class Recall for DP-SGD (σ=1)')
fig_dp.show()

## TRAINING PERFORMANCES

#### DP (Noise Scale=1)

In [14]:
fold_report_path_dp = 'attack_results_DP/fold_details_DP_CH_NS_1_10_30_50.0_20250625_Split_4-2-2.csv'
fold_report_df_dp = pd.read_csv(fold_report_path_dp)

In [15]:
import re
import ast
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def visualize_training_process_averaged(df, title_prefix="Training Process", save_path=None):
    """
    Visualize the training process showing average accuracy across folds with min-max ranges,
    using pandas to align epochs automatically.

    Parameters:
    -----------
    df : pandas.DataFrame
        DataFrame containing columns: 'Val Subjects', 'Epoch_Results'
    title_prefix : str
        Prefix for the plot title
    save_path : str, optional
        Path to save the plot as HTML file

    Returns:
    --------
    plotly.graph_objects.Figure
        The created figure object
    """
    # 1) Parse each fold into a dict of lists
    all_folds_data = {}
    for idx, row in df.iterrows():
        key = f'fold_{idx}'
        epoch_str = row['Epoch_Results']
        clean_str = re.sub(r'np\.float64\(([\d.]+)\)', r'\1', epoch_str)
        try:
            ed = ast.literal_eval(clean_str)
            all_folds_data[key] = {
                'epochs': ed['epoch'],
                'train_acc': ed['train_acc'],
                'val_acc':   ed['val_acc'],
            }
        except Exception as e:
            print(f"Skipping fold {idx}: parse error {e}")

    if not all_folds_data:
        raise ValueError("No valid fold data to plot.")

    # 2) Build DataFrames: index=epoch, columns=folds
    train_df = pd.DataFrame({
        fold: pd.Series(data=d['train_acc'], index=d['epochs'])
        for fold, d in all_folds_data.items()
    })
    val_df = pd.DataFrame({
        fold: pd.Series(data=d['val_acc'], index=d['epochs'])
        for fold, d in all_folds_data.items()
    })

    # 3) Sort by epoch number (index) to ensure monotonic x-axis
    train_df = train_df.sort_index()
    val_df   = val_df.sort_index()

    # 4) Compute statistics across folds
    train_mean = train_df.mean(axis=1)
    train_min  = train_df.min(axis=1)
    train_max  = train_df.max(axis=1)
    train_std  = train_df.std(axis=1)

    val_mean   = val_df.mean(axis=1)
    val_min    = val_df.min(axis=1)
    val_max    = val_df.max(axis=1)
    val_std    = val_df.std(axis=1)

    # 5) Final epoch list
    epochs = train_mean.index.tolist()

    # 6) Create subplots and add traces just as before,
    #    but now using these pandas Series (convert to list)
    fig = make_subplots(
        rows=1, cols=2,
        subplot_titles=('Training Accuracy (Avg ± Min-Max)', 'Validation Accuracy (Avg ± Min-Max)'),
        horizontal_spacing=0.1
    )

    # Training range
    fig.add_trace(
        go.Scatter(
            x=epochs + epochs[::-1],
            y=train_max.tolist() + train_min.tolist()[::-1],
            fill='toself', fillcolor='rgba(31,119,180,0.2)',
            line=dict(color='rgba(255,255,255,0)'),
            name='Train Range',
            hoverinfo='skip'
        ), row=1, col=1
    )
    # Training mean
    fig.add_trace(
        go.Scatter(
            x=epochs, y=train_mean.tolist(),
            mode='lines+markers',
            name='Train Mean',
            line=dict(color='#1f77b4', width=3),
            marker=dict(size=6),
            hovertemplate='Epoch %{x}<br>Acc %{y:.2f}%<extra></extra>'
        ), row=1, col=1
    )

    # Validation range
    fig.add_trace(
        go.Scatter(
            x=epochs + epochs[::-1],
            y=val_max.tolist() + val_min.tolist()[::-1],
            fill='toself', fillcolor='rgba(255,127,14,0.2)',
            line=dict(color='rgba(255,255,255,0)'),
            name='Val Range',
            hoverinfo='skip'
        ), row=1, col=2
    )
    # Validation mean
    fig.add_trace(
        go.Scatter(
            x=epochs, y=val_mean.tolist(),
            mode='lines+markers',
            name='Val Mean',
            line=dict(color='#ff7f0e', width=3),
            marker=dict(size=6),
            hovertemplate='Epoch %{x}<br>Acc %{y:.2f}%<extra></extra>'
        ), row=1, col=2
    )

    # 7) Layout tweaks (same as before)
    fig.update_layout(
        title={
            'text': f'{title_prefix} Cross-Validation Results',
            'x': 0.5, 'xanchor': 'center',
            'font': {'size': 18, 'family': 'Arial, sans-serif'}
        },
        font=dict(family="Arial, sans-serif", size=12),
        plot_bgcolor='white', paper_bgcolor='white',
        height=500, width=1200,
        legend=dict(orientation="h", y=-0.2, x=0.5, xanchor="center")
    )
    fig.update_xaxes(title_text="Epoch", showgrid=True, gridcolor='lightgray',
                     linecolor='black', mirror=True)
    fig.update_yaxes(title_text="Accuracy (%)", range=[0,100],
                     showgrid=True, gridcolor='lightgray',
                     linecolor='black', mirror=True)

    # 8) Optionally save
    if save_path:
        fig.write_html(save_path)
        print(f"Saved plot to {save_path}")

    return fig


def create_averaged_summary_table(df):
    """
    Create a summary table with averaged final epoch metrics and statistics.

    Parameters:
    -----------
    df : pandas.DataFrame
        DataFrame containing the training results

    Returns:
    --------
    pandas.DataFrame
        Summary table with averaged metrics and statistics
    """
    all_final_metrics = {
        'train_acc': [],
        'val_acc': []
    }

    fold_details = []

    for idx, row in df.iterrows():
        val_subjects = row['Val Subjects']
        epoch_results_str = row['Epoch_Results']

        # Parse epoch results
        clean_str = re.sub(r'np\.float64\(([\d.]+)\)', r'\1', epoch_results_str)

        try:
            epoch_data = ast.literal_eval(clean_str)

            # Get final epoch metrics
            final_train_acc = epoch_data['train_acc'][-1]
            final_val_acc = epoch_data['val_acc'][-1]

            all_final_metrics['train_acc'].append(final_train_acc)
            all_final_metrics['val_acc'].append(final_val_acc)

            fold_details.append({
                'Fold': f'Fold {idx + 1}',
                'Validation_Subjects': val_subjects,
                'Final_Train_Acc': f'{final_train_acc:.2f}%',
                'Final_Val_Acc': f'{final_val_acc:.2f}%'
            })

        except Exception as e:
            print(f"Error processing fold {idx + 1}: {e}")

    # Calculate statistics
    train_mean = np.mean(all_final_metrics['train_acc'])
    train_std = np.std(all_final_metrics['train_acc'])
    val_mean = np.mean(all_final_metrics['val_acc'])
    val_std = np.std(all_final_metrics['val_acc'])

    # Create summary
    summary_stats = {
        'Metric': ['Training Accuracy', 'Validation Accuracy'],
        'Mean ± Std': [f'{train_mean:.2f}% ± {train_std:.2f}%',
                       f'{val_mean:.2f}% ± {val_std:.2f}%'],
        'Min': [f'{min(all_final_metrics["train_acc"]):.2f}%',
                f'{min(all_final_metrics["val_acc"]):.2f}%'],
        'Max': [f'{max(all_final_metrics["train_acc"]):.2f}%',
                f'{max(all_final_metrics["val_acc"]):.2f}%']
    }

    return pd.DataFrame(fold_details), pd.DataFrame(summary_stats)



In [16]:
# Example usage:

# Create the averaged visualization
fig = visualize_training_process_averaged(
    fold_report_df_dp,
    title_prefix=f"DP Classifier Head Fine-Tuning (σ = 1) with Test Accuracy = {round(test_acc_dp, 2)}%",
    save_path=None
)

# Show the plot
fig.show()

# Create summary tables
fold_details, summary_stats = create_averaged_summary_table(fold_report_df_dp)
fold_details


Unnamed: 0,Fold,Validation_Subjects,Final_Train_Acc,Final_Val_Acc
0,Fold 1,['subject104' 'subject105'],71.65%,58.95%
1,Fold 2,['subject101' 'subject108'],73.93%,62.34%
2,Fold 3,['subject106' 'subject107'],73.62%,71.99%


#### Non-DP Baseline

In [17]:
fold_report_path_base = 'attack_results_noDP/fold_details_noDP_CH_NS_10_30_50.0_20250625_Split_4-2-2.csv'
fold_report_df_base = pd.read_csv(fold_report_path_base)
fold_report_df_base.head()

Unnamed: 0.1,Unnamed: 0,Val Subjects,Batch_Size,Learning_Rate,Epoch_Results
0,0,['subject104' 'subject105'],32,0.001,"{'epoch': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, ..."
1,1,['subject101' 'subject108'],32,0.001,"{'epoch': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, ..."
2,2,['subject106' 'subject107'],32,0.001,"{'epoch': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, ..."


In [18]:

# Create the averaged visualization
fig = visualize_training_process_averaged(
    fold_report_df_base,
    title_prefix=f"Non-Private (Baseline) Classifier Head Fine-Tuning with Test Accuracy = {round(test_acc_base, 2)}%",
    save_path=None
)

# Show the plot
fig.show()

# Create summary tables
fold_details, summary_stats = create_averaged_summary_table(fold_report_df_base)
fold_details

Unnamed: 0,Fold,Validation_Subjects,Final_Train_Acc,Final_Val_Acc
0,Fold 1,['subject104' 'subject105'],96.63%,73.35%
1,Fold 2,['subject101' 'subject108'],95.61%,70.66%
2,Fold 3,['subject106' 'subject107'],96.14%,75.07%


In [19]:
import ast
import re
import plotly.graph_objects as go

def visualize_epsilon_over_epochs(
    df,
    title,
    save_path: str = None
) -> go.Figure:
    """
    Plot achieved epsilon vs. epoch for each fold.

    Parameters
    ----------
    df : pandas.DataFrame
        DataFrame with columns:
          - 'epoch': list (or string‐repr of list) of epoch indices
          - 'achieved_epsilon': list (or string‐repr of list) of epsilon values
        One row per fold (ideally 3).
    title : str
        Plot title.
    save_path : str, optional
        If given, the HTML will be saved to this path.

    Returns
    -------
    fig : plotly.graph_objects.Figure
        The Plotly figure object.
    """
    # Initialize data storage for all folds
    all_folds_data = {}
    epochs = None

    # Process each fold and collect data
    for idx, row in df.iterrows():
        epochs = eval(row['Epoch_Results'])['epoch']
        a_epsilons= eval(row['Epoch_Results'])['achieved_epsilon']

        try:
            epoch_data = {
                'epoch': epochs,
                'achieved_epsilon': a_epsilons,
            }
            # Store data for this fold
            fold_key = f'fold_{idx}'
            all_folds_data[fold_key] = {
                'epochs': epoch_data['epoch'],
                'achieved_epsilons': epoch_data['achieved_epsilon'],
            }

            # Set epochs (assuming all folds have same epochs)
            if epochs is None:
                epochs = epoch_data['epoch']

        except Exception as e:
            print(f"Error parsing data for fold {idx + 1}: {e}")
            continue

    if not all_folds_data:
        print("No valid data found!")
        return None

    # Calculate statistics across folds for each epoch
    num_epochs = np.array([np.array(all_folds_data[k]['epochs']).max() for k, v in all_folds_data.items()]).max()
    epsilon_stats = {'mean': [], 'min': [], 'max': [], 'std': []}

    for epoch_idx in range(num_epochs):
        # Collect values for this epoch across all folds
        eps_values = [all_folds_data[fold]['achieved_epsilons'][epoch_idx] for fold in all_folds_data.keys()]

        # Calculate statistics
        epsilon_stats['mean'].append(np.mean(eps_values))
        epsilon_stats['min'].append(np.min(eps_values))
        epsilon_stats['max'].append(np.max(eps_values))
        epsilon_stats['std'].append(np.std(eps_values))

    # Create subplots
    fig = make_subplots(
        rows=1, cols=1,
        #subplot_titles=('Epsilon Values'),
        horizontal_spacing=0.1
    )

    # Training Accuracy Plot
    # Add min-max range (shadow)
    fig.add_trace(
        go.Scatter(
            x=epochs + epochs[::-1],  # x coordinates for filled area
            y=epsilon_stats['max'] + epsilon_stats['min'][::-1],  # y coordinates for filled area
            fill='toself',
            fillcolor='rgba(31, 119, 180, 0.2)',
            line=dict(color='rgba(255,255,255,0)'),
            name='ε Range (Min-Max)',
            showlegend=True,
            hoverinfo='skip'
        ),
        row=1, col=1
    )

    # Add mean line for training
    fig.add_trace(
        go.Scatter(
            x=epochs,
            y=epsilon_stats['mean'],
            mode='lines+markers',
            name='ε (Mean)',
            line=dict(color='#1f77b4', width=3),
            marker=dict(size=10, color='#1f77b4'),
            hovertemplate='<b>ε</b><br>' +
                         'Epoch: %{x}<br>' +
                         'Mean: %{y:.2f}%<br>' +
                         '<extra></extra>'
        ),
        row=1, col=1
    )


    # Update layout for academic publication quality
    fig.update_layout(
        title={
            'text': f'ε Changes Over Epochs {title}',
            'x': 0.5,
            'xanchor': 'center',
            'font': {'size': 18, 'family': 'Arial, sans-serif'}
        },
        font=dict(family="Arial, sans-serif", size=12),
        plot_bgcolor='white',
        paper_bgcolor='white',
        height=500,
        width=800,
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=-0.2,
            xanchor="center",
            x=0.5,
            font=dict(size=12)
        )
    )

    # Update x-axes
    fig.update_xaxes(
        title_text="Epoch",
        showgrid=True,
        gridwidth=1,
        gridcolor='lightgray',
        linecolor='black',
        linewidth=1,
        mirror=True
    )

    # Update y-axes
    fig.update_yaxes(
        title_text="ε",
        showgrid=True,
        gridwidth=1,
        gridcolor='lightgray',
        linecolor='black',
        linewidth=1,
        mirror=True
    )


    # Save if path provided
    if save_path:
        fig.write_html(save_path)
        print(f"Plot saved to: {save_path}")

    return fig


In [20]:
fig = visualize_epsilon_over_epochs(fold_report_df_dp, title='for DP Classifier Head Fine-Tuning (σ = 1)')
fig.show()

In [21]:
def avg_stats_fold(df):
  stats_list = []
  for idx, row in df.iterrows():
    epochs = eval(row['Epoch_Results'])['epoch']
    train_acc = eval(row['Epoch_Results'])['train_acc']
    val_acc = eval(row['Epoch_Results'])['val_acc']

    if 'achieved_epsilon' in eval(row['Epoch_Results']).keys():
      a_epsilons= eval(row['Epoch_Results'])['achieved_epsilon']
      temp_df = pd.DataFrame({
          'epoch': epochs,
          f'achieved_epsilon_fold_{idx+1}': a_epsilons,
          f'train_acc_fold_{idx+1}': train_acc,
          f'val_acc_fold_{idx+1}': val_acc
      })
    else:
      temp_df = pd.DataFrame({
          'epoch': epochs,
          f'train_acc_fold_{idx+1}': train_acc,
          f'val_acc_fold_{idx+1}': val_acc
      })

    stats_list.append(temp_df)


  merge_df = pd.concat(stats_list, axis=1)

  if 'achieved_epsilon' in eval(row['Epoch_Results']).keys():
    merge_df['epsilon_mean'] = merge_df.apply(lambda x: x[["achieved_epsilon_fold_1", "achieved_epsilon_fold_2", "achieved_epsilon_fold_3"]].mean(), axis=1)
    merge_df['epsilon_std'] = merge_df.apply(lambda x: x[["achieved_epsilon_fold_1", "achieved_epsilon_fold_2", "achieved_epsilon_fold_3"]].std(), axis=1)
  merge_df['train_acc_mean'] = merge_df.apply(lambda x: x[["train_acc_fold_1", "train_acc_fold_2", "train_acc_fold_3"]].mean(), axis=1)
  merge_df['train_acc_std'] = merge_df.apply(lambda x: x[["train_acc_fold_1", "train_acc_fold_2", "train_acc_fold_3"]].std(), axis=1)
  merge_df['val_acc_mean'] = merge_df.apply(lambda x: x[["val_acc_fold_1", "val_acc_fold_2", "val_acc_fold_3"]].mean(), axis=1)
  merge_df['val_acc_std'] = merge_df.apply(lambda x: x[["val_acc_fold_1", "val_acc_fold_2", "val_acc_fold_3"]].std(), axis=1)


  new_df = pd.DataFrame()
  new_df['Epoch'] = np.arange(1, len(merge_df) + 1)

  new_df['Training Accuracy'] = merge_df.apply(lambda x: str(round(x['train_acc_mean'], 2)) + ' ± ' + str(round(x['train_acc_std'], 2)), axis=1)
  new_df['Validation Accuracy'] = merge_df.apply(lambda x: str(round(x['val_acc_mean'], 2)) + ' ± ' + str(round(x['val_acc_std'], 2)), axis=1)
  if 'achieved_epsilon' in eval(row['Epoch_Results']).keys():
    new_df['ε'] = merge_df.apply(lambda x: str(round(x['epsilon_mean'], 2)) + ' ± ' + str(round(x['epsilon_std'], 2)), axis=1)

  return new_df



In [22]:
#avg_stats_fold(fold_report_df_base).to_excel('target_model_baseline_stats.xlsx')
#avg_stats_fold(fold_report_df_dp).to_excel('target_model_dp_stats.xlsx')

In [23]:
  stats_list = []
  for idx, row in fold_report_df_base.iterrows():
    epochs = eval(row['Epoch_Results'])['epoch']
    train_acc = eval(row['Epoch_Results'])['train_acc']
    val_acc = eval(row['Epoch_Results'])['val_acc']

    if 'achieved_epsilon' in eval(row['Epoch_Results']).keys():
      a_epsilons= eval(row['Epoch_Results'])['achieved_epsilon']
      temp_df = pd.DataFrame({
          'epoch': epochs,
          f'achieved_epsilon_fold_{idx+1}': a_epsilons,
          f'train_acc_fold_{idx+1}': train_acc,
          f'val_acc_fold_{idx+1}': val_acc
      })
    else:
      temp_df = pd.DataFrame({
          'epoch': epochs,
          f'train_acc_fold_{idx+1}': train_acc,
          f'val_acc_fold_{idx+1}': val_acc
      })

    stats_list.append(temp_df)


  temp_merge = pd.merge(stats_list[0], stats_list[1], on='epoch')
  merge_df = pd.merge(temp_merge, stats_list[2], on='epoch')

