<a href="https://colab.research.google.com/github/lorenafc/MscThesis_EyeTrackingIVR/blob/main/autoencoder_with_rf_sequence_samples_thesis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import time
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestClassifier

In [17]:
# Device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('Device:', device)

# Read the CSV file
file_name = '/content/LLA2020_labeled.csv'
# file_name = '/content/eye_tracking_data_small_2019.csv'
eye_tracking_data = pd.read_csv(file_name)

Device: cpu


In [18]:
eye_tracking_data.head()

Unnamed: 0,time,L_x,L_y,L_z,C_x,C_y,C_z,observer,GT1,GT2,GT3,GT4,GT5,GT6,GT7
0,9.314,-2.969,1.6232,-1.2434,-0.4009,1.6289,-1.2939,1,0,0,0,0,0,0,0
1,9.337,-2.969,1.6255,-1.2432,-0.4007,1.629,-1.294,1,0,0,0,0,0,0,0
2,9.36,-2.969,1.626,-1.2447,-0.4006,1.629,-1.294,1,0,0,0,0,0,0,0
3,9.381,-2.969,1.6232,-1.243,-0.4004,1.6291,-1.2941,1,0,0,0,0,0,0,0
4,9.403,-2.969,1.6242,-1.241,-0.4002,1.6291,-1.2941,1,0,0,0,0,0,0,0


In [19]:
# Data cleaning: remove unnecessary columns
eye_tracking_data = eye_tracking_data.drop(columns=['GT2', 'GT3', 'GT4', 'GT5', 'GT6', 'GT7'])
eye_tracking_data = eye_tracking_data[['time', 'L_x', 'L_y', 'L_z', 'C_x', 'C_y', 'C_z', 'GT1','observer']]

# Convert all columns to float32 for compatibility with PyTorch
eye_tracking_data = eye_tracking_data.astype('float32')

# Split into training and testing datasets
# et_train_with_GT1 = eye_tracking_data.iloc[:1536, :]
# et_test_with_GT1 = eye_tracking_data.iloc[1536:, :]

et_train_with_GT1 = eye_tracking_data.iloc[:78452, :]
et_test_with_GT1 = eye_tracking_data.iloc[78452:, :]

# Scaling the data
scaler = MinMaxScaler()
et_train_with_GT1 = pd.DataFrame(scaler.fit_transform(et_train_with_GT1), columns=et_train_with_GT1.columns)
et_test_with_GT1 = pd.DataFrame(scaler.transform(et_test_with_GT1), columns=et_test_with_GT1.columns)

# Define sequence length
sequence_length = 460  # 10 seconds of data - sampled at ~45 Hz

# Function to create sequences
def create_sequences(data, sequence_length):
    sequences = []
    labels = []
    for i in range(len(data) - sequence_length):
        seq = data.iloc[i:i+sequence_length, :-1].values  # Exclude the GT1 column. convert to numpy asrray with .values to use it as a tensor.
        label = data.iloc[i:i+sequence_length]['GT1'].values  # Label is the GT1 at the last step of the sequence
        sequences.append(seq)
        labels.append(label)
    return np.array(sequences), np.array(labels)

# Generate sequences for training and testing
X_train_seq, y_train_seq = create_sequences(et_train_with_GT1, sequence_length)
X_test_seq, y_test_seq = create_sequences(et_test_with_GT1, sequence_length)




In [20]:
# Custom Dataset class
class EyeTrackingDataset(Dataset):
    def __init__(self, sequences, labels):
        self.sequences = torch.tensor(sequences, dtype=torch.float32)
        self.labels = torch.tensor(labels, dtype=torch.long)

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        return self.sequences[idx], self.labels[idx]

# Initialize Dataset and DataLoader with sequences
train_dataset = EyeTrackingDataset(X_train_seq, y_train_seq)
test_dataset = EyeTrackingDataset(X_test_seq, y_test_seq)

batch_size = 256
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False, drop_last=True) #timeseries data so shuffle = False
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, drop_last=True) #timeseries data so shuffle = False


In [23]:
print(train_loader.dataset.sequences.shape)
print(train_loader.dataset.labels.shape)
print(test_loader.dataset.sequences.shape)
print(test_loader.dataset.labels.shape)

torch.Size([77992, 460, 8])
torch.Size([77992, 460])
torch.Size([27340, 460, 8])
torch.Size([27340, 460])


In [35]:
# Hyperparameters for the autoencoder model
random_seed = 123
learning_rate = 0.005
num_epochs = 5

# Model architecture settings
input_size = sequence_length * 8  # 460 samples * 8 features per sample (I am including the column "observer") = 3220
num_hidden_1 = 500  # First layer in encoder
num_hidden_2 = 50   # Compressed representation layer

# Define the Autoencoder model
class Autoencoder(nn.Module):
    def __init__(self):
        super(Autoencoder, self).__init__()

        ### ENCODER
        self.encoder_layer1 = nn.Linear(input_size, num_hidden_1)

        self.encoder_layer2 = nn.Linear(num_hidden_1, num_hidden_2)


        ### DECODER
        self.decoder_layer1 = nn.Linear(num_hidden_2, num_hidden_1)
        self.decoder_layer2 = nn.Linear(num_hidden_1, input_size)
        # self.decoder_layer2.weight.detach().normal_(0.0, 0.1)
        # self.decoder_layer2.bias.detach().zero_()

    def encoder(self, x):
        x = F.sigmoid(self.encoder_layer1(x))
        encoded = F.sigmoid(self.encoder_layer2(x))
        return encoded

    def decoder(self, encoded_x):
        x = F.leaky_relu(self.decoder_layer1(encoded_x))
        decoded = torch.sigmoid(self.decoder_layer2(x))  # Sigmoid to get values between 0 and 1
        return decoded

    def forward(self, x):
        # Flatten input from (batch, sequence_length, features) to (batch, input_size)
        x = x.view(x.size(0), -1)
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

# Instantiate the model
torch.manual_seed(random_seed)
model = Autoencoder().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Training Loop
start_time = time.time()
for epoch in range(num_epochs):
    for batch_idx, (sequences, _) in enumerate(train_loader):
        sequences = sequences.to(device)

        # Forward pass
        decoded = model(sequences)

        # loss = F.binary_cross_entropy(decoded, sequences.view(sequences.size(0), -1), reduction='mean')
         # reconstruction error
        loss = F.mse_loss(decoded, sequences.view(sequences.size(0), -1)) # changes binary_cross_entropy loss to mse_loss
        optimizer.zero_grad()

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        # Logging
        if not batch_idx % 50:
            print('Epoch: %03d/%03d | Batch %03d/%03d | Loss: %.4f'
                  % (epoch + 1, num_epochs, batch_idx, len(train_loader), loss))

    print('Time elapsed: %.2f min' % ((time.time() - start_time) / 60))

print('Total Training Time: %.2f min' % ((time.time() - start_time) / 60))



Epoch: 001/005 | Batch 000/304 | Loss: 0.0796
Epoch: 001/005 | Batch 050/304 | Loss: 0.0565
Epoch: 001/005 | Batch 100/304 | Loss: 0.0630
Epoch: 001/005 | Batch 150/304 | Loss: 0.0441
Epoch: 001/005 | Batch 200/304 | Loss: 0.0532
Epoch: 001/005 | Batch 250/304 | Loss: 0.0618
Epoch: 001/005 | Batch 300/304 | Loss: 0.0427
Time elapsed: 1.15 min
Epoch: 002/005 | Batch 000/304 | Loss: 0.0676
Epoch: 002/005 | Batch 050/304 | Loss: 0.0544
Epoch: 002/005 | Batch 100/304 | Loss: 0.0591
Epoch: 002/005 | Batch 150/304 | Loss: 0.0466
Epoch: 002/005 | Batch 200/304 | Loss: 0.0532
Epoch: 002/005 | Batch 250/304 | Loss: 0.0642
Epoch: 002/005 | Batch 300/304 | Loss: 0.0401
Time elapsed: 2.52 min
Epoch: 003/005 | Batch 000/304 | Loss: 0.0619
Epoch: 003/005 | Batch 050/304 | Loss: 0.0490
Epoch: 003/005 | Batch 100/304 | Loss: 0.0606
Epoch: 003/005 | Batch 150/304 | Loss: 0.0447
Epoch: 003/005 | Batch 200/304 | Loss: 0.0538
Epoch: 003/005 | Batch 250/304 | Loss: 0.0668
Epoch: 003/005 | Batch 300/304 | L

In [36]:
print("input size:", input_size)
print("num_hidden_1:", num_hidden_1)
print("num_hidden_2:", num_hidden_2)

input size: 3680
num_hidden_1: 500
num_hidden_2: 50


In [50]:

# Extract features from the autoencoder for Random Forest
X_train_ae = np.ones((len(train_dataset), num_hidden_2))
# y_train_ae = y_train_seq
y_train_ae = np.ones((len(train_dataset),sequence_length))

X_test_ae = np.ones((len(test_dataset), num_hidden_2))
# y_test_ae = y_test_seq
y_test_ae = np.ones((len(test_dataset),sequence_length))

start_idx = 0
for idx, (sequences, labels) in enumerate(train_loader):
    sequences = sequences.to(device)
    encoded = model.encoder(sequences.view(sequences.size(0), -1))
    batch_size = encoded.shape[0]
    X_train_ae[start_idx:start_idx+batch_size] = encoded.cpu().detach().numpy() #using encoder
    y_train_ae[start_idx:start_idx+batch_size] = labels.cpu().detach().numpy()
    start_idx += batch_size

start_idx = 0
for idx, (sequences, labels) in enumerate(test_loader):
    sequences = sequences.to(device)
    encoded = model.encoder(sequences.view(sequences.size(0), -1))
    batch_size = encoded.shape[0]
    X_test_ae[start_idx:start_idx + batch_size] = encoded.cpu().detach().numpy() #using encoder
    y_test_ae[start_idx:start_idx+batch_size] = labels.cpu().detach().numpy()
    start_idx += batch_size


In [51]:
# Random Forest Classifier
rf = RandomForestClassifier(n_estimators=50, n_jobs=-1)
rf.fit(X_train_ae, y_train_ae)

print(f'Train Accuracy: {rf.score(X_train_ae, y_train_ae) * 100:.2f}%')
print(f'Test Accuracy: {rf.score(X_test_ae, y_test_ae) * 100:.2f}%')


Train Accuracy: 0.22%
Test Accuracy: 0.75%


In [39]:
print("Training label distribution:", np.unique(y_train_ae, return_counts=True))
print("Testing label distribution:", np.unique(y_test_ae, return_counts=True))


Training label distribution: (array([0., 1.], dtype=float32), array([16527326, 19348994]))
Testing label distribution: (array([0., 1.], dtype=float32), array([5569882, 7006518]))


In [40]:
print("X_train_ae shape:", X_train_ae.shape)
print("X_test_ae shape:", X_test_ae.shape)
print("y_train_ae shape:", y_train_ae.shape)
print("y_test_ae shape:", y_test_ae.shape)


X_train_ae shape: (77992, 50)
X_test_ae shape: (27340, 50)
y_train_ae shape: (77992, 460)
y_test_ae shape: (27340, 460)
