<a href="https://colab.research.google.com/github/lorenafc/MscThesis_EyeTrackingIVR/blob/main/autoencoder_with_rf_sequence_samples_thesis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import time
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestClassifier
from torch.utils.data import TensorDataset


In [2]:
# Device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('Device:', device)

# Read the CSV file
file_name = '/content/LLA2020_labeled.csv'
# file_name = '/content/eye_tracking_data_small_2019.csv'
eye_tracking_data = pd.read_csv(file_name)

Device: cpu


In [3]:
eye_tracking_data.head()

Unnamed: 0,time,L_x,L_y,L_z,C_x,C_y,C_z,observer,GT1,GT2,GT3,GT4,GT5,GT6,GT7
0,9.314,-2.969,1.6232,-1.2434,-0.4009,1.6289,-1.2939,1,0,0,0,0,0,0,0
1,9.337,-2.969,1.6255,-1.2432,-0.4007,1.629,-1.294,1,0,0,0,0,0,0,0
2,9.36,-2.969,1.626,-1.2447,-0.4006,1.629,-1.294,1,0,0,0,0,0,0,0
3,9.381,-2.969,1.6232,-1.243,-0.4004,1.6291,-1.2941,1,0,0,0,0,0,0,0
4,9.403,-2.969,1.6242,-1.241,-0.4002,1.6291,-1.2941,1,0,0,0,0,0,0,0


In [4]:
eye_tracking_data_rf = eye_tracking_data.copy()

In [5]:
# Data cleaning
eye_tracking_data = eye_tracking_data.drop(columns=['GT2', 'GT3', 'GT4', 'GT5', 'GT6', 'GT7']) # removing
eye_tracking_data = eye_tracking_data[['time', 'L_x', 'L_y', 'L_z', 'C_x', 'C_y', 'C_z', 'GT1','observer']]

In [6]:
print(eye_tracking_data.head(3))

    time    L_x     L_y     L_z     C_x     C_y     C_z  GT1  observer
0  9.314 -2.969  1.6232 -1.2434 -0.4009  1.6289 -1.2939    0         1
1  9.337 -2.969  1.6255 -1.2432 -0.4007  1.6290 -1.2940    0         1
2  9.360 -2.969  1.6260 -1.2447 -0.4006  1.6290 -1.2940    0         1


In [7]:
eye_tracking_data_without_GT1 = eye_tracking_data.drop(columns=['GT1'])

In [8]:
print(eye_tracking_data_without_GT1.head(3))

    time    L_x     L_y     L_z     C_x     C_y     C_z  observer
0  9.314 -2.969  1.6232 -1.2434 -0.4009  1.6289 -1.2939         1
1  9.337 -2.969  1.6255 -1.2432 -0.4007  1.6290 -1.2940         1
2  9.360 -2.969  1.6260 -1.2447 -0.4006  1.6290 -1.2940         1


In [9]:
# Convert all columns to float32 for compatibility with PyTorch
eye_tracking_data_without_GT1 = eye_tracking_data_without_GT1.astype('float32')

train_split = 0.75
# Creating data indices for training and test splits: LSTM autoencoder time series https://github.com/fabiozappo/LSTM-Autoencoder-Time-Series/blob/main/code/main.py
dataset_size = len(eye_tracking_data_without_GT1)
indices = list(range(dataset_size))
split = int(np.floor(train_split * dataset_size))

et_train_without_GT1 = eye_tracking_data_without_GT1.iloc[:split, :]
et_test_without_GT1 = eye_tracking_data_without_GT1.iloc[split:, :]

# Scaling the data
# scaler = MinMaxScaler()
# et_train_without_GT1 = pd.DataFrame(scaler.fit_transform(et_train_without_GT1), columns=et_train_without_GT1.columns)
# et_test_without_GT1 = pd.DataFrame(scaler.transform(et_test_without_GT1), columns=et_test_without_GT1.columns)

# Define sequence length (the sequences have overlapping data)
sequence_length = 460  # 10 seconds of data - sampled at ~45 Hz

# Function to create sequences
def create_sequences(data, sequence_length):
    sequences = []
    sequences_again = []
    for i in range(len(data) - sequence_length):
        seq = data.iloc[i:i+sequence_length, :].values  # Convert to numpy array with .values to use it as a tensor.
        seq_again = data.iloc[i:i+sequence_length, :].values  # Repeated X in Y position
        sequences.append(seq)
        sequences_again.append(seq_again)
    return np.array(sequences), np.array(sequences_again)

# Generate sequences for training and testing
X_train_seq, y_train_seq = create_sequences(et_train_without_GT1, sequence_length)
X_test_seq, y_test_seq = create_sequences(et_test_without_GT1, sequence_length)




In [10]:
print(f"X_train_seq shape: {X_train_seq.shape}")
print(f"y_train_seq shape: {y_train_seq.shape}")
print(f"X_test_seq shape: {X_test_seq.shape}")
print(f"y_test_seq shape: {y_test_seq.shape}")


X_train_seq shape: (79229, 460, 8)
y_train_seq shape: (79229, 460, 8)
X_test_seq shape: (26103, 460, 8)
y_test_seq shape: (26103, 460, 8)


In [11]:
X_train_tensor = torch.tensor(X_train_seq, dtype=torch.float32)
y_train_tensor = X_train_tensor.clone()  # Target for the autoencoder is the input itself
X_test_tensor = torch.tensor(X_test_seq, dtype=torch.float32)
y_test_tensor = X_test_tensor.clone()  # Target for the autoencoder is the input itself

# Create TensorDataset for train and test sets
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

batch_size = 256
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False, drop_last=True) #timeseries data so shuffle = False
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, drop_last=True) #timeseries data so shuffle = False


In [12]:
print(X_train_tensor.shape)
print(y_train_tensor.shape)
print(X_test_tensor.shape)
print(y_test_tensor.shape)

torch.Size([79229, 460, 8])
torch.Size([79229, 460, 8])
torch.Size([26103, 460, 8])
torch.Size([26103, 460, 8])


In [13]:
# Hyperparameters for the autoencoder model
random_seed = 123
learning_rate = 0.005
num_epochs = 5

# Model architecture settings
input_size = sequence_length * 8  # 460 samples * 8 features per sample (I am including the column "observer") = 3220
num_hidden_1 = 500  # First layer in encoder
num_hidden_2 = 50   # Compressed representation layer

# Define the Autoencoder model
class Autoencoder(nn.Module):
    def __init__(self):
        super(Autoencoder, self).__init__()

        ### ENCODER
        self.encoder_layer1 = nn.Linear(input_size, num_hidden_1)

        self.encoder_layer2 = nn.Linear(num_hidden_1, num_hidden_2)


        ### DECODER
        self.decoder_layer1 = nn.Linear(num_hidden_2, num_hidden_1)
        self.decoder_layer2 = nn.Linear(num_hidden_1, input_size)
        # self.decoder_layer2.weight.detach().normal_(0.0, 0.1)
        # self.decoder_layer2.bias.detach().zero_()

    def encoder(self, x):
        x = F.sigmoid(self.encoder_layer1(x))
        encoded = F.sigmoid(self.encoder_layer2(x))
        return encoded

    def decoder(self, encoded_x):
        x = F.leaky_relu(self.decoder_layer1(encoded_x))
        decoded = torch.sigmoid(self.decoder_layer2(x))  # Sigmoid to get values between 0 and 1
        return decoded

    def forward(self, x):
        # Flatten input from (batch, sequence_length, features) to (batch, input_size)
        x = x.view(x.size(0), -1)
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

# Instantiate the model
torch.manual_seed(random_seed)
model = Autoencoder().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Training Loop
start_time = time.time()
for epoch in range(num_epochs):
    for batch_idx, (sequences, _) in enumerate(train_loader):
        sequences = sequences.to(device)

        # Forward pass
        decoded = model(sequences)

        # loss = F.binary_cross_entropy(decoded, sequences.view(sequences.size(0), -1), reduction='mean')
         # reconstruction error
        loss = F.mse_loss(decoded, sequences.view(sequences.size(0), -1)) # changes binary_cross_entropy loss to mse_loss
        optimizer.zero_grad()

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        # Logging
        if not batch_idx % 50:
            print('Epoch: %03d/%03d | Batch %03d/%03d | Loss: %.4f'
                  % (epoch + 1, num_epochs, batch_idx, len(train_loader), loss))

    print('Time elapsed: %.2f min' % ((time.time() - start_time) / 60))

print('Total Training Time: %.2f min' % ((time.time() - start_time) / 60))



Epoch: 001/005 | Batch 000/309 | Loss: 39.3705
Epoch: 001/005 | Batch 050/309 | Loss: 117.5504
Epoch: 001/005 | Batch 100/309 | Loss: 257.5620
Epoch: 001/005 | Batch 150/309 | Loss: 76.9752
Epoch: 001/005 | Batch 200/309 | Loss: 221.5446
Epoch: 001/005 | Batch 250/309 | Loss: 327.3101
Epoch: 001/005 | Batch 300/309 | Loss: 352.1613
Time elapsed: 0.57 min
Epoch: 002/005 | Batch 000/309 | Loss: 36.2229
Epoch: 002/005 | Batch 050/309 | Loss: 117.5504
Epoch: 002/005 | Batch 100/309 | Loss: 257.5620
Epoch: 002/005 | Batch 150/309 | Loss: 76.9752
Epoch: 002/005 | Batch 200/309 | Loss: 221.5446
Epoch: 002/005 | Batch 250/309 | Loss: 327.3101
Epoch: 002/005 | Batch 300/309 | Loss: 352.1613
Time elapsed: 1.16 min
Epoch: 003/005 | Batch 000/309 | Loss: 36.2229
Epoch: 003/005 | Batch 050/309 | Loss: 117.5504
Epoch: 003/005 | Batch 100/309 | Loss: 257.5620
Epoch: 003/005 | Batch 150/309 | Loss: 76.9752
Epoch: 003/005 | Batch 200/309 | Loss: 221.5446
Epoch: 003/005 | Batch 250/309 | Loss: 327.3101


In [14]:
print("input size:", input_size)
print("num_hidden_1:", num_hidden_1)
print("num_hidden_2:", num_hidden_2)

input size: 3680
num_hidden_1: 500
num_hidden_2: 50


# Using encoder


In [15]:
#### USING ENCODER (dont run the cell below of decoder if you run this one)

# Extract features from the autoencoder for Random Forest
X_train_ae = np.ones((len(train_dataset), num_hidden_2))
# y_train_ae = y_train_seq
y_train_ae = np.ones((len(train_dataset),num_hidden_2))

X_test_ae = np.ones((len(test_dataset), num_hidden_2))
# y_test_ae = y_test_seq
y_test_ae = np.ones((len(test_dataset),num_hidden_2))

start_idx = 0
for idx, (sequences, labels) in enumerate(train_loader):
    sequences = sequences.to(device)
    encoded = model.encoder(sequences.view(sequences.size(0), -1))
    # decoded = model.encoder(sequences.view(sequences.size(0), -1))

    batch_size = encoded.shape[0]
    # batch_size = decoded.shape[0]
    # X_train_ae[start_idx:start_idx+batch_size] = decoded.cpu().detach().numpy() #using decoder
    # y_train_ae[start_idx:start_idx+batch_size] = decoded.cpu().detach().numpy() #using decoder #labels.cpu().detach().numpy()
    X_train_ae[start_idx:start_idx+batch_size] = encoded.cpu().detach().numpy() #using encoder
    y_train_ae[start_idx:start_idx+batch_size] = encoded.cpu().detach().numpy() #using encoder #labels.cpu().detach().numpy()
    start_idx += batch_size

start_idx = 0
for idx, (sequences, labels) in enumerate(test_loader):
    sequences = sequences.to(device)
    # encoded = model.encoder(sequences.view(sequences.size(0), -1))
    decoded = model.encoder(sequences.view(sequences.size(0), -1))
    # batch_size = encoded.shape[0]
    batch_size = decoded.shape[0]
    X_test_ae[start_idx:start_idx + batch_size] = decoded.cpu().detach().numpy() #using encoder
    y_test_ae[start_idx:start_idx+batch_size] = decoded.cpu().detach().numpy() #using encoder #labels.cpu().detach().numpy()
    # X_test_ae[start_idx:start_idx+batch_size] = decoded.cpu().detach().numpy() #using decoder
    # y_test_ae[start_idx:start_idx+batch_size] = decoded.cpu().detach().numpy() #using decoder #labels.cpu().detach().numpy()

    start_idx += batch_size


# RANDOM FOREST

In [18]:
eye_tracking_data.columns

Index(['time', 'L_x', 'L_y', 'L_z', 'C_x', 'C_y', 'C_z', 'GT1', 'observer'], dtype='object')

In [16]:
train_split = 0.75
# Creating data indices for training and test splits: LSTM autoencoder time series https://github.com/fabiozappo/LSTM-Autoencoder-Time-Series/blob/main/code/main.py
dataset_size = len(eye_tracking_data)
indices = list(range(dataset_size))
split = int(np.floor(train_split * dataset_size))

et_train_original_rf = eye_tracking_data.iloc[:split, :]
et_test_original_rf = eye_tracking_data.iloc[split:, :]

print("et_train_rf shape is %s\n" % (et_train_original_rf.shape,))

feature_cols = ['time', 'L_x', 'L_y', 'L_z', 'C_x', 'C_y', 'C_z', 'observer']

X_train_original_rf = et_train_original_rf[feature_cols] # Features
y_train_original_rf = et_train_original_rf.GT1 # Target variable

X_test_original_rf = et_test_original_rf[feature_cols] # Features
y_test_original_rf = et_test_original_rf.GT1 # Target variable

print(f"X_train_original_rf shape is {X_train_original_rf.shape} \nY train original rf shape is {y_train_original_rf.shape} ")
print(f"X_test_original_rf shape is {X_test_original_rf.shape}\n")
print(f"y_test_original_rf shape is {y_test_original_rf.shape}")


et_train_rf shape is (79689, 9)

X_train_original_rf shape is (79689, 8) 
Y train original rf shape is (79689,) 
X_test_original_rf shape is (26563, 8)

y_test_original_rf shape is (26563,)


In [17]:
print(f"The X_train_ae shape is:  {X_train_ae.shape}")
print(f"The X_test_ae shape is:  {X_test_ae.shape}\n")

print(f"The y_train_ae shape is:  {X_train_ae.shape}")
print(f"The Y_test_ae shape is:  {X_test_ae.shape}\n")

print(f"The y_train_seq shape is:  {y_train_seq.shape}\n")

print(f"The y_train_original_rf shape is: {y_train_original_rf.shape}")
print(f"The y_test_original_rf shape is: {y_test_original_rf.shape}")
# the number of rows are different from the autoencoders and the original ones to be used in the RF

The X_train_ae shape is:  (79229, 50)
The X_test_ae shape is:  (26103, 50)

The y_train_ae shape is:  (79229, 50)
The Y_test_ae shape is:  (26103, 50)

The y_train_seq shape is:  (79229, 460, 8)

The y_train_original_rf shape is: (79689,)
The y_test_original_rf shape is: (26563,)


## Using original GT1 column for Y train and test dropping last rows to match autoencoder size:

In [18]:
# drop last 460 rows of y_train_original_rf is (79689,) and y_train_original_rf is (79689,)
# to match the number of rows of X_train_ae

y_train_original_rf_drop_last_rows = y_train_original_rf[:-460]
y_test_original_rf_drop_last_rows = y_test_original_rf[:-460]

In [36]:
print(f"The X_train_ae shape is:  {X_train_ae.shape}")
print(f"The X_test_ae shape is:  {X_test_ae.shape}\n")

print(f"The y_train_ae shape is:  {y_train_ae.shape}")
print(f"The y_test_ae shape is:  {y_test_ae.shape}\n")

print(f"The train_original_rf_drop_last_rows shape is:  {y_train_original_rf_drop_last_rows.shape}")
print(f"The test_original_rf_drop_last_rows shape is:  {y_test_original_rf_drop_last_rows.shape}")

The X_train_ae shape is:  (79229, 50)
The X_test_ae shape is:  (26103, 50)

The y_train_ae shape is:  (79229, 50)
The y_test_ae shape is:  (26103, 50)

The train_original_rf_drop_last_rows shape is:  (79229,)
The test_original_rf_drop_last_rows shape is:  (26103,)


In [34]:
# Random Forest Classifier - USING ENCODER AND ORIGINAL X TEST AND Y TEST
rf = RandomForestClassifier(n_estimators=50, n_jobs=-1)
rf.fit(X_train_ae, y_train_original_rf_drop_last_rows)

print(f'Train Accuracy: {rf.score(X_train_ae, y_train_original_rf_drop_last_rows) * 100:.2f}%')
print(f'Test Accuracy: {rf.score(X_test_ae, y_train_original_rf_drop_last_rows) * 100:.2f}%')


Train Accuracy: 54.33%




ValueError: X has 8 features, but RandomForestClassifier is expecting 50 features as input.

## Creating sequences
## Use y_train_seq_rf, X_test_seq_rf and y_test_seq_rf in the RF model with X_train_ae

In [None]:
# Function to create sequences
def create_sequences_rf(data, sequence_length):
    sequences = []
    labels_GT1 = []
    for i in range(len(data) - sequence_length):
        seq = data.iloc[i:i+sequence_length, :].values  #
        label_GT1 = data.iloc[i:i+sequence_length]['GT1'].values  # Select only GT1 column
        sequences.append(seq)
        labels_GT1.append(label_GT1)
    return np.array(sequences), np.array(labels_GT1)


# Generate sequences for training and testing
X_train_seq_rf, y_train_seq_rf = create_sequences_rf(et_train_original_rf, sequence_length)
X_test_seq_rf, y_test_seq_rf = create_sequences_rf(et_test_original_rf, sequence_length)

In [None]:
print(f"The X_train_seq_rf shape is:  {X_train_seq_rf.shape}")
print(f"The y_train_seq_rf shape is:  {y_train_seq_rf.shape}\n")

print(f"The X_test_seq_rf shape is:  {X_test_seq_rf.shape}")
print(f"The y_test_seq_rf shape is:  {y_test_seq_rf.shape}\n")

In [20]:
print(f"The X_train_ae shape is:  {X_train_ae.shape}")
print(f"The X_test_ae shape is:  {X_test_ae.shape}\n")

print(f"The y_train_ae shape is:  {y_train_ae.shape}")
print(f"The y_test_ae shape is:  {y_test_ae.shape}\n")



The X_train_ae shape is:  (79229, 50)
The X_test_ae shape is:  (26103, 50)

The y_train_ae shape is:  (79229, 50)
The y_test_ae shape is:  (26103, 50)

The X_test_seq_rf shape is:  (26103, 460, 9)
The y_test_seq_rf shape is:  (26103, 460)



In [24]:
# Random Forest Classifier - USING ENCODER AND X_TRAIN_AE and y_train_seq_rf, X_test_seq_rf and y_train_seq_rf. Labels GT1 in train and test with sequences
rf = RandomForestClassifier(n_estimators=50, n_jobs=-1)
rf.fit(X_train_ae, y_train_ae)

print(f'Train Accuracy: {rf.score(X_train_ae, y_train_ae) * 100:.2f}%')
print(f'Test Accuracy: {rf.score(X_test_ae, y_test_ae) * 100:.2f}%')

ValueError: Unknown label type: continuous-multioutput. Maybe you are trying to fit a classifier, which expects discrete classes on a regression target with continuous values.

In [36]:
# Random Forest Classifier - USING ENCODER (X_TRAIN AND X_TEST DF)
rf = RandomForestClassifier(n_estimators=50, n_jobs=-1)
rf.fit(X_train_ae_df, y_train_seq_rf)

print(f'Train Accuracy: {rf.score(X_train_ae_df, y_train_ae_df) * 100:.2f}%')
print(f'Test Accuracy: {rf.score(X_test_ae, y_test_original_rf_drop_last_rows) * 100:.2f}%')

NameError: name 'y_train_seq_rf' is not defined

In [33]:
# Random Forest Classifier
rf = RandomForestClassifier(n_estimators=50, n_jobs=-1)
rf.fit(X_train_ae, y_train_original_rf_drop_last_rows)

print(f'Train Accuracy: {rf.score(X_train_ae, y_train_original_rf_drop_last_rows) * 100:.2f}%')
print(f'Test Accuracy: {rf.score(X_test_ae, y_test_original_rf_drop_last_rows) * 100:.2f}%')


Train Accuracy: 54.33%
Test Accuracy: 55.16%


In [None]:
print("Training label distribution:", np.unique(y_train_seq, return_counts=True))
print("Testing label distribution:", np.unique(y_test_ae, return_counts=True))


Training label distribution: (array([0., 1.], dtype=float32), array([16527326, 19348994]))
Testing label distribution: (array([0., 1.], dtype=float32), array([5569882, 7006518]))


In [None]:
print("X_train_ae shape:", X_train_ae.shape)
print("X_test_ae shape:", X_test_ae.shape)
print("y_train_ae shape:", y_train_ae.shape)
print("y_test_ae shape:", y_test_ae.shape)


X_train_ae shape: (77992, 50)
X_test_ae shape: (27340, 50)
y_train_ae shape: (77992, 460)
y_test_ae shape: (27340, 460)


## Inserting X_test in the autoencoder without sequence (simulate real data)

In [None]:
# Convert all columns to float32 for compatibility with PyTorch
eye_tracking_data_without_GT1 = eye_tracking_data_without_GT1.astype('float32')

train_split = 0.75
# Creating data indices for training and test splits: LSTM autoencoder time series https://github.com/fabiozappo/LSTM-Autoencoder-Time-Series/blob/main/code/main.py
dataset_size = len(eye_tracking_data_without_GT1)
indices = list(range(dataset_size))
split = int(np.floor(train_split * dataset_size))

et_train_without_GT1 = eye_tracking_data_without_GT1.iloc[:split, :]
et_test_without_GT1 = eye_tracking_data_without_GT1.iloc[split:, :]


In [None]:
X_train_tensor = torch.tensor(X_train_seq, dtype=torch.float32)
y_train_tensor = X_train_tensor.clone()  # Target for the autoencoder is the input itself
X_test_tensor = torch.tensor(X_test_seq, dtype=torch.float32)
y_test_tensor = X_test_tensor.clone()  # Target for the autoencoder is the input itself

# Create TensorDataset for train and test sets
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

batch_size = 256
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False, drop_last=True) #timeseries data so shuffle = False
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, drop_last=True) #timeseries data so shuffle = False

In [None]:
#### USING ENCODER (dont run the cell below of decoder if you run this one)

# Extract features from the autoencoder for Random Forest
X_train_ae = np.ones((len(train_dataset), num_hidden_2))
# y_train_ae = y_train_seq
y_train_ae = np.ones((len(train_dataset),num_hidden_2))

X_test_ae = np.ones((len(test_dataset), num_hidden_2))
# y_test_ae = y_test_seq
y_test_ae = np.ones((len(test_dataset),num_hidden_2))

start_idx = 0
for idx, (sequences, labels) in enumerate(train_loader):
    sequences = sequences.to(device)
    encoded = model.encoder(sequences.view(sequences.size(0), -1))
    # decoded = model.encoder(sequences.view(sequences.size(0), -1))

    batch_size = encoded.shape[0]
    # batch_size = decoded.shape[0]
    # X_train_ae[start_idx:start_idx+batch_size] = decoded.cpu().detach().numpy() #using decoder
    # y_train_ae[start_idx:start_idx+batch_size] = decoded.cpu().detach().numpy() #using decoder #labels.cpu().detach().numpy()
    X_train_ae[start_idx:start_idx+batch_size] = encoded.cpu().detach().numpy() #using encoder
    y_train_ae[start_idx:start_idx+batch_size] = encoded.cpu().detach().numpy() #using encoder #labels.cpu().detach().numpy()
    start_idx += batch_size

start_idx = 0
for idx, (sequences, labels) in enumerate(test_loader):
    sequences = sequences.to(device)
    # encoded = model.encoder(sequences.view(sequences.size(0), -1))
    decoded = model.encoder(sequences.view(sequences.size(0), -1))
    # batch_size = encoded.shape[0]
    batch_size = decoded.shape[0]
    X_test_ae[start_idx:start_idx + batch_size] = decoded.cpu().detach().numpy() #using encoder
    y_test_ae[start_idx:start_idx+batch_size] = decoded.cpu().detach().numpy() #using encoder #labels.cpu().detach().numpy()
    # X_test_ae[start_idx:start_idx+batch_size] = decoded.cpu().detach().numpy() #using decoder
    # y_test_ae[start_idx:start_idx+batch_size] = decoded.cpu().detach().numpy() #using decoder #labels.cpu().detach().numpy()

    start_idx += batch_size