In [1]:
import numpy as np
import torch
import pandas as pd
import xlsxwriter
from denoising_diffusion_pytorch import Unet1D, GaussianDiffusion1D, Trainer1D, Dataset1D

import pandas as pd
import numpy as np
import os
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import openpyxl

import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

# Directory containing the Excel files
excel_dir = 'Data/'

# Get a list of all Excel files in the directory
excel_files = [file for file in os.listdir(excel_dir) if file.endswith('.xlsx')]

# Split the data into trials based on empty spaces
trials = []
current_trial = []

for excel_file in excel_files:
    # Construct the full path to the Excel file
    excel_file_path = os.path.join(excel_dir, excel_file)

    try:
        # Open the Excel file and read sheet names
        excel_workbook = openpyxl.load_workbook(excel_file_path, read_only=True)
        sheet_names = excel_workbook.sheetnames

        # Find the sheet with a name that ends with "trimmed"
        for sheet_name in sheet_names:
            if sheet_name.endswith("trimmed"):
                break  # Found the desired sheet
        else:
            # Handle the case when no matching sheet is found
            print(f"No sheet ending with 'trimmed' found in {excel_file}")
            continue

        # Load the data from the selected sheet
        df = pd.read_excel(excel_file_path, sheet_name=sheet_name)

        # Extract accelerometer data columns (x, y, z)
        accelerometer_data = df[['x-axis (g)', 'y-axis (g)', 'z-axis (g)']].values

        for row in accelerometer_data:
            if np.all(np.isnan(row)):
                if current_trial:
                    trials.append(current_trial)
                    current_trial = []
            else:
                current_trial.append(row)

        if current_trial:
            trials.append(current_trial)

        # Process 'trials' as needed for each file
        # You can perform additional operations or save the trials data for each file here

    except Exception as e:
        print(f"An error occurred while processing {excel_file}: {e}")

# Calculate the desired trial length as a multiple of 1, 2, 4, or 8
highest_trial_length = max(len(trial) for trial in trials)
desired_trial_length = highest_trial_length

print(desired_trial_length)

while desired_trial_length % 8 != 0:
    desired_trial_length += 1

print(desired_trial_length)

# Pad each trial with zeros to the desired length
padded_trials = []

for trial in trials:
    while len(trial) < desired_trial_length:
        trial.append(trial[-1])
    padded_trials.append(trial)

print(padded_trials)

# Convert the data into a 3D tensor
tensor_data = np.array(padded_trials)

# Reshape the tensor to match your desired dimensions
tensor_data = tensor_data.transpose(0, 2, 1)

training_seq = torch.tensor(tensor_data)

# # Apply MinMax scaling to the input data and store the scalers
# scaler_list = []
# for i in range(training_seq.shape[0]):
#     scaler = MinMaxScaler()
#     # Reshape the data for scaling
#     reshaped_data = training_seq[i].reshape(-1, 1)
#     training_seq[i] = torch.tensor(scaler.fit_transform(reshaped_data).reshape(training_seq[i].shape), dtype=torch.float)
#     scaler_list.append(scaler)

print(training_seq.shape)

print(training_seq)

training_seq = training_seq.float()

print(training_seq.dtype)

model = Unet1D(
    dim=64,
    dim_mults=(1, 2, 4, 8),
    channels=3  # 3 columns
)

diffusion = GaussianDiffusion1D(
    model,
    seq_length=desired_trial_length,  # number of data in 1 trial. this should be divided by 1, 2, 4, 8
    timesteps=1000,
    objective='pred_v' 
)

dataset = Dataset1D(
    training_seq)  # this is just an example, but you can formulate your own Dataset and pass it into the `Trainer1D` below

loss = diffusion(training_seq)
loss.backward()

# # Or using trainer

trainer = Trainer1D(
    diffusion,
    dataset = dataset,
    train_batch_size = 32,
    train_lr = 8e-5,
    train_num_steps = 700,         # total training steps
    gradient_accumulate_every = 2,    # gradient accumulation steps
    ema_decay = 0.995,                # exponential moving average decay
    amp = True,                       # turn on mixed precision
)
trainer.train()

sampled_seq = diffusion.sample(batch_size=20)
sampled_seq.shape  # (4, 32, 128) // 4 trials, 32 features, 128 rows in one trial

print(sampled_seq)

tensor_data = np.array(sampled_seq)

for i in range(sampled_seq.shape[0]):
    tensor_data[i] = scaler_list[i].inverse_transform(tensor_data[i])


# Reverse the scaling and save to Excel
excel_file_path = "trial_data.xlsx"
with pd.ExcelWriter(excel_file_path, engine='xlsxwriter') as writer:
    for trial_number, trial_data in enumerate(tensor_data, start=1):
        # # Inverse transform using the corresponding scaler
        # scaler = scaler_list[trial_number - 1]
        # reshaped_data = tensor_data[trial_number - 1].reshape(-1, 1)
        # tensor_data[trial_number - 1] = scaler.inverse_transform(reshaped_data).reshape(tensor_data[trial_number - 1].shape)

        reshaped_data = tensor_data[trial_number - 1].T  # Transpose to have features as columns
        df = pd.DataFrame(reshaped_data, columns=[f"Feature{i}" for i in range(1, reshaped_data.shape[1] + 1)])
        df.to_excel(writer, sheet_name=f"Trial_{trial_number}", index=False)





  from .autonotebook import tqdm as notebook_tqdm


No sheet ending with 'trimmed' found in Mfallback_Hip_2022-07-20T13.19.37.166_E59CF70AD2E7_Accelerometer_32.000Hz_1.7.3.xlsx
573
576
[[array([ 0.955, -0.065,  0.219]), array([ 0.966, -0.08 ,  0.223]), array([ 0.973, -0.083,  0.23 ]), array([ 0.968, -0.063,  0.232]), array([ 0.964, -0.05 ,  0.239]), array([ 0.965, -0.053,  0.238]), array([ 0.972, -0.058,  0.233]), array([ 0.97 , -0.055,  0.224]), array([ 0.971, -0.06 ,  0.229]), array([ 0.966, -0.058,  0.229]), array([ 0.97 , -0.055,  0.234]), array([ 0.968, -0.051,  0.235]), array([ 0.965, -0.04 ,  0.234]), array([ 0.97 , -0.042,  0.236]), array([ 0.971, -0.046,  0.237]), array([ 0.969, -0.047,  0.238]), array([ 0.966, -0.048,  0.238]), array([ 0.968, -0.044,  0.238]), array([ 0.973, -0.041,  0.239]), array([ 0.965, -0.037,  0.239]), array([ 0.968, -0.044,  0.24 ]), array([ 0.966, -0.039,  0.238]), array([ 0.967, -0.039,  0.245]), array([ 0.967, -0.027,  0.24 ]), array([ 0.974, -0.028,  0.241]), array([ 0.972, -0.022,  0.24 ]), array([

loss: 0.0776: 100%|██████████| 700/700 [22:22<00:00,  1.92s/it]


training complete


sampling loop time step: 100%|██████████| 1000/1000 [04:16<00:00,  3.90it/s]


tensor([[[8.9902e-02, 1.2857e-01, 1.7438e-01,  ..., 0.0000e+00,
          0.0000e+00, 1.3604e-01],
         [0.0000e+00, 0.0000e+00, 1.1193e-03,  ..., 7.1382e-02,
          2.1600e-03, 0.0000e+00],
         [0.0000e+00, 1.6469e-03, 1.4374e-02,  ..., 1.5782e-01,
          1.1246e-01, 3.1793e-01]],

        [[9.5578e-01, 9.0359e-01, 8.9529e-01,  ..., 9.7403e-01,
          9.3673e-01, 9.9908e-01],
         [0.0000e+00, 6.5544e-02, 0.0000e+00,  ..., 0.0000e+00,
          1.2519e-01, 2.6803e-01],
         [2.4987e-01, 1.9737e-01, 2.2737e-01,  ..., 1.6008e-01,
          1.6911e-01, 3.7346e-01]],

        [[3.0905e-01, 1.2916e-01, 1.8330e-01,  ..., 0.0000e+00,
          0.0000e+00, 4.1695e-02],
         [1.0168e-03, 6.1611e-03, 1.1923e-02,  ..., 6.9070e-02,
          3.5638e-02, 6.3226e-02],
         [0.0000e+00, 1.6049e-03, 3.9932e-02,  ..., 5.8582e-01,
          6.6539e-01, 5.5395e-01]],

        ...,

        [[8.5392e-01, 8.4521e-01, 8.6351e-01,  ..., 8.6963e-01,
          9.0662e-01, 8.2

NameError: name 'scaler_list' is not defined