In [73]:

class GazeDataset(Dataset):
    def __init__(self, csv_file, data_path, transform=None):
        """
        Args:
            csv_file (string): Path to the csv file.
            data_path (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied on an image.
        """
        self.data_frame = pd.read_csv(csv_file)
        self.data_path = data_path
        self.transform = transform
        # Create a dictionary mapping each unique gaze value to a unique integer
        self.gaze_to_int = {gaze: idx for idx, gaze in enumerate(self.data_frame['gaze'].unique())}
        self.num_classes = len(self.gaze_to_int)
    def __len__(self):
        return len(self.data_frame)

    def __getitem__(self, idx):
        img_name = os.path.join(self.data_path, self.data_frame['imgID'][idx])  # Assuming imgID is in the first column
        image = Image.open(img_name)
        

        if self.transform:
            image = self.transform(image)
            
        gaze = self.data_frame['gaze'][idx]   # Assuming gaze is in the second column
        gaze_idx = self.gaze_to_int[gaze]

        # Convert gaze_idx to one-hot encoded vector
        one_hot_gaze = torch.zeros(self.num_classes)
        one_hot_gaze[gaze_idx] = 1
        return image, one_hot_gaze

# Create datasets
import pandas as pd
from sklearn.model_selection import train_test_split

# Assuming that your CSV file has two columns: 'imgID' for image file names and 'gaze' for gaze labels
data_frame = pd.read_csv('D:\Datasets\Talis_frames15_v2\labels_and_features_TRAIN.csv')

# Count the frequency of each class
class_sample_counts = data_frame['gaze'].value_counts().sort_index().to_numpy()
# Compute weights for each class
weights = 1.0 / class_sample_counts
# Create a weight for each sample in the dataset
sample_weights = weights[data_frame['gaze'].replace({gaze: idx for idx, gaze in enumerate(data_frame['gaze'].unique())}).to_numpy()]
from torch.utils.data import WeightedRandomSampler, DataLoader
sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True)



train_dataset = GazeDataset(csv_file='D:\Datasets\Talis_frames15_v2\labels_and_features_TRAIN.csv', data_path=data_path, transform=transform)
valid_dataset = GazeDataset(csv_file='D:\Datasets\Talis_frames15_v2\labels_and_features_VAL.csv', data_path=data_path, transform=transform)

# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=batch, sampler=sampler, num_workers=0)
valid_loader = DataLoader(valid_dataset, batch_size=batch, shuffle=False, num_workers=0)



In [74]:
train_dataset.gaze_to_int

{3: 0, 2: 1, 1: 2, 4: 3}

In [75]:
# Compute class weights
class_counts = train_dataset.data_frame['gaze'].value_counts().to_dict()
total_samples = sum(class_counts.values())
class_weights = [total_samples/class_counts[class_] for class_ in sorted(train_dataset.gaze_to_int)]
class_weights_tensor = torch.FloatTensor(class_weights).to(device)
criterion2 = nn.CrossEntropyLoss(weight=class_weights_tensor)

# Train Depth+Img Detection

In [85]:
import os

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
from PIL import Image
import torch.optim as optim
import torch.nn as nn
from tqdm import tqdm

batch=32
data_path = r"D:\Datasets\Talis_frames15_v2"  # Provide your data path here

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

criterion = nn.CrossEntropyLoss()
validate_epochs = 1  # Example: validate every epoch
save_epochs = 2     # Example: save the model every 2 epochs
ckpt_dir = r"D:\Datasets\Talis_frames15_v2\checkpoints"

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),  # Randomly flip the image horizontally
    transforms.RandomVerticalFlip(p=0.5),  # Randomly flip the image vertically
    transforms.RandomRotation(degrees=15),  # Randomly rotate the image by up to 15 degrees
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # Randomly change the brightness, contrast, saturation, and hue
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),  # Random translation
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0), ratio=(0.75, 1.33)),  # Randomly crop and resize the image
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
transform2 = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])
class GazeDataset(Dataset):
    def __init__(self, csv_file, data_path, transform=None):
        """
        Args:
            csv_file (string): Path to the csv file.
            data_path (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied on an image.
        """
        self.data_frame = pd.read_csv(csv_file)
        self.data_path = data_path
        self.transform = transform
        # Create a dictionary mapping each unique gaze value to a unique integer
        self.gaze_to_int = {gaze: idx for idx, gaze in enumerate(self.data_frame['gaze'].unique())}
        self.num_classes = len(self.gaze_to_int)
    def __len__(self):
        return len(self.data_frame)

    def __getitem__(self, idx):
        img_name = os.path.join(self.data_path, self.data_frame['imgID'][idx])  # Assuming imgID is in the first column
        image = Image.open(img_name)
        dep_name = os.path.join(self.data_path+"_depth", self.data_frame['imgID'][idx])
        depth = Image.open(img_name)

        if self.transform:
            image = self.transform(image)
            depth = transform2(depth)
        gaze = self.data_frame['gaze'][idx]   # Assuming gaze is in the second column
        gaze_idx = self.gaze_to_int[gaze]

        # Convert gaze_idx to one-hot encoded vector
        one_hot_gaze = torch.zeros(self.num_classes)
        one_hot_gaze[gaze_idx] = 1
        return image, depth, one_hot_gaze

# Create datasets
import pandas as pd
from sklearn.model_selection import train_test_split

# Assuming that your CSV file has two columns: 'imgID' for image file names and 'gaze' for gaze labels
data_frame = pd.read_csv('D:\Datasets\Talis_frames15_v2\labels_and_features_TRAIN.csv')

# Count the frequency of each class
class_sample_counts = data_frame['gaze'].value_counts().sort_index().to_numpy()
# Compute weights for each class
weights = 1.0 / class_sample_counts
# Create a weight for each sample in the dataset
sample_weights = weights[data_frame['gaze'].replace({gaze: idx for idx, gaze in enumerate(data_frame['gaze'].unique())}).to_numpy()]
from torch.utils.data import WeightedRandomSampler, DataLoader
sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True)



train_dataset = GazeDataset(csv_file='D:\Datasets\Talis_frames15_v2\labels_and_features_TRAIN.csv', data_path=data_path, transform=transform)
valid_dataset = GazeDataset(csv_file='D:\Datasets\Talis_frames15_v2\labels_and_features_VAL.csv', data_path=data_path, transform=transform)

# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=batch, sampler=sampler, num_workers=0)
valid_loader = DataLoader(valid_dataset, batch_size=batch, shuffle=False, num_workers=0)

# Load a pre-trained ResNet-18 model and modify the final layer
model = models.resnet18(weights="ResNet18_Weights.DEFAULT")
model.fc = nn.Linear(model.fc.in_features, 4)
model.conv1 = nn.Conv2d(6, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)

#model=model2
model.to(device)
optimizer = optim.SGD(model.parameters(), lr=0.00001)
if not os.path.exists(ckpt_dir):
    os.makedirs(ckpt_dir)
import os
import torch
from tqdm import tqdm

# Training loop
num_epochs = 10  # Define your desired number of epochs
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    print("start epoch :"+str(epoch))
    # Add a progress bar for the training loop

    for images,depth, gazes in tqdm(train_loader): # train_progress_bar:
        inputs = torch.cat([images, depth], dim=1)
        # print(inputs.shape)
        inputs, gazes = inputs.to(device), torch.argmax(gazes, dim=1).to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, gazes)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        # Update the progress bar


    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader)}")

    # Save checkpoint
    if (epoch + 1) % save_epochs == 0:
        torch.save(model.state_dict(), os.path.join(ckpt_dir, f"epoch_{epoch+1}.ckpt"))

    # Validation loop
    if (epoch + 1) % validate_epochs == 0:
        model.eval()
        correct = 0
        total = 0
        # Add a progress bar for the validation loop
        valid_progress_bar = tqdm(valid_loader, desc='Validating', leave=False)
        with torch.no_grad():
            for images, depth, gazes in valid_progress_bar:
                inputs = torch.cat([images, depth], dim=1)
                inputs, gazes = inputs.to(device), torch.argmax(gazes, dim=1).to(device)
                outputs = model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += gazes.size(0)
                correct += (predicted == gazes).sum().item()

        print(f"Validation Accuracy: {100 * correct / total}%")

print("Training completed.")




start epoch :0


100%|██████████| 77/77 [01:23<00:00,  1.08s/it]


Epoch [1/10], Loss: 1.256945919680905


Validating:   5%|▌         | 1/19 [00:01<00:18,  1.03s/it]

torch.Size([32, 6, 224, 224])


Validating:  11%|█         | 2/19 [00:02<00:17,  1.03s/it]

torch.Size([32, 6, 224, 224])


Validating:  16%|█▌        | 3/19 [00:03<00:17,  1.07s/it]

torch.Size([32, 6, 224, 224])


Validating:  21%|██        | 4/19 [00:04<00:16,  1.07s/it]

torch.Size([32, 6, 224, 224])


Validating:  26%|██▋       | 5/19 [00:05<00:15,  1.08s/it]

torch.Size([32, 6, 224, 224])


Validating:  32%|███▏      | 6/19 [00:06<00:13,  1.07s/it]

torch.Size([32, 6, 224, 224])


Validating:  37%|███▋      | 7/19 [00:07<00:12,  1.08s/it]

torch.Size([32, 6, 224, 224])


Validating:  42%|████▏     | 8/19 [00:08<00:11,  1.08s/it]

torch.Size([32, 6, 224, 224])


Validating:  47%|████▋     | 9/19 [00:10<00:12,  1.22s/it]

torch.Size([32, 6, 224, 224])


Validating:  53%|█████▎    | 10/19 [00:11<00:11,  1.23s/it]

torch.Size([32, 6, 224, 224])


Validating:  58%|█████▊    | 11/19 [00:12<00:09,  1.16s/it]

torch.Size([32, 6, 224, 224])


Validating:  63%|██████▎   | 12/19 [00:13<00:07,  1.13s/it]

torch.Size([32, 6, 224, 224])


Validating:  68%|██████▊   | 13/19 [00:14<00:06,  1.10s/it]

torch.Size([32, 6, 224, 224])


Validating:  74%|███████▎  | 14/19 [00:15<00:05,  1.09s/it]

torch.Size([32, 6, 224, 224])


Validating:  79%|███████▉  | 15/19 [00:16<00:04,  1.08s/it]

torch.Size([32, 6, 224, 224])


Validating:  84%|████████▍ | 16/19 [00:17<00:03,  1.07s/it]

torch.Size([32, 6, 224, 224])


Validating:  89%|████████▉ | 17/19 [00:18<00:02,  1.07s/it]

torch.Size([32, 6, 224, 224])


Validating:  95%|█████████▍| 18/19 [00:19<00:01,  1.08s/it]

torch.Size([32, 6, 224, 224])


                                                           

torch.Size([29, 6, 224, 224])
Validation Accuracy: 26.94214876033058%
start epoch :1


100%|██████████| 77/77 [01:20<00:00,  1.04s/it]


Epoch [2/10], Loss: 1.1061678157224284


Validating:   5%|▌         | 1/19 [00:00<00:17,  1.00it/s]

torch.Size([32, 6, 224, 224])


Validating:  11%|█         | 2/19 [00:01<00:16,  1.01it/s]

torch.Size([32, 6, 224, 224])


Validating:  16%|█▌        | 3/19 [00:03<00:16,  1.01s/it]

torch.Size([32, 6, 224, 224])


Validating:  21%|██        | 4/19 [00:04<00:15,  1.01s/it]

torch.Size([32, 6, 224, 224])


Validating:  26%|██▋       | 5/19 [00:05<00:14,  1.02s/it]

torch.Size([32, 6, 224, 224])


Validating:  32%|███▏      | 6/19 [00:06<00:13,  1.02s/it]

torch.Size([32, 6, 224, 224])


Validating:  37%|███▋      | 7/19 [00:07<00:12,  1.02s/it]

torch.Size([32, 6, 224, 224])


Validating:  42%|████▏     | 8/19 [00:08<00:11,  1.02s/it]

torch.Size([32, 6, 224, 224])


Validating:  47%|████▋     | 9/19 [00:09<00:10,  1.02s/it]

torch.Size([32, 6, 224, 224])


Validating:  53%|█████▎    | 10/19 [00:10<00:09,  1.02s/it]

torch.Size([32, 6, 224, 224])


Validating:  58%|█████▊    | 11/19 [00:11<00:08,  1.02s/it]

torch.Size([32, 6, 224, 224])


Validating:  63%|██████▎   | 12/19 [00:12<00:07,  1.02s/it]

torch.Size([32, 6, 224, 224])


Validating:  68%|██████▊   | 13/19 [00:13<00:06,  1.02s/it]

torch.Size([32, 6, 224, 224])


Validating:  74%|███████▎  | 14/19 [00:14<00:05,  1.02s/it]

torch.Size([32, 6, 224, 224])


Validating:  79%|███████▉  | 15/19 [00:15<00:04,  1.02s/it]

torch.Size([32, 6, 224, 224])


Validating:  84%|████████▍ | 16/19 [00:16<00:03,  1.03s/it]

torch.Size([32, 6, 224, 224])


Validating:  89%|████████▉ | 17/19 [00:17<00:02,  1.03s/it]

torch.Size([32, 6, 224, 224])


Validating:  95%|█████████▍| 18/19 [00:18<00:01,  1.03s/it]

torch.Size([32, 6, 224, 224])


                                                           

torch.Size([29, 6, 224, 224])
Validation Accuracy: 25.950413223140497%
start epoch :2


 61%|██████    | 47/77 [00:50<00:32,  1.07s/it]


KeyboardInterrupt: 

# Train Depth+Img+Head+Eye Detection

In [None]:
import os

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
from PIL import Image
import torch.optim as optim
import torch.nn as nn
from tqdm import tqdm

batch=32
data_path = r"D:\Datasets\Talis_frames15_v2"  # Provide your data path here

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

criterion = nn.CrossEntropyLoss()
validate_epochs = 1  # Example: validate every epoch
save_epochs = 2     # Example: save the model every 2 epochs
ckpt_dir = r"D:\Datasets\Talis_frames15_v2\checkpoints"

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),  # Randomly flip the image horizontally
    transforms.RandomVerticalFlip(p=0.5),  # Randomly flip the image vertically
    transforms.RandomRotation(degrees=15),  # Randomly rotate the image by up to 15 degrees
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # Randomly change the brightness, contrast, saturation, and hue
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),  # Random translation
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0), ratio=(0.75, 1.33)),  # Randomly crop and resize the image
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
transform2 = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])
class GazeDataset(Dataset):
    def __init__(self, csv_file, data_path, transform=None):
        """
        Args:
            csv_file (string): Path to the csv file.
            data_path (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied on an image.
        """
        self.data_frame = pd.read_csv(csv_file)
        self.data_path = data_path
        self.transform = transform
        # Create a dictionary mapping each unique gaze value to a unique integer
        self.gaze_to_int = {gaze: idx for idx, gaze in enumerate(self.data_frame['gaze'].unique())}
        self.num_classes = len(self.gaze_to_int)
        self.head_
    def __len__(self):
        return len(self.data_frame)

    def __getitem__(self, idx):
        img_name = os.path.join(self.data_path, self.data_frame['imgID'][idx])  # Assuming imgID is in the first column
        image = Image.open(img_name)
        head=
        

        if self.transform:
            image = self.transform(image)
            depth = transform2(depth)
        gaze = self.data_frame['gaze'][idx]   # Assuming gaze is in the second column
        gaze_idx = self.gaze_to_int[gaze]

        # Convert gaze_idx to one-hot encoded vector
        one_hot_gaze = torch.zeros(self.num_classes)
        one_hot_gaze[gaze_idx] = 1
        return image, depth, one_hot_gaze

# Create datasets
import pandas as pd
from sklearn.model_selection import train_test_split

# Assuming that your CSV file has two columns: 'imgID' for image file names and 'gaze' for gaze labels
data_frame = pd.read_csv('D:\Datasets\Talis_frames15_v2\labels_and_features_TRAIN.csv')

# Count the frequency of each class
class_sample_counts = data_frame['gaze'].value_counts().sort_index().to_numpy()
# Compute weights for each class
weights = 1.0 / class_sample_counts
# Create a weight for each sample in the dataset
sample_weights = weights[data_frame['gaze'].replace({gaze: idx for idx, gaze in enumerate(data_frame['gaze'].unique())}).to_numpy()]
from torch.utils.data import WeightedRandomSampler, DataLoader
sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True)



train_dataset = GazeDataset(csv_file='D:\Datasets\Talis_frames15_v2\labels_and_features_TRAIN.csv', data_path=data_path, transform=transform)
valid_dataset = GazeDataset(csv_file='D:\Datasets\Talis_frames15_v2\labels_and_features_VAL.csv', data_path=data_path, transform=transform)

# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=batch, sampler=sampler, num_workers=0)
valid_loader = DataLoader(valid_dataset, batch_size=batch, shuffle=False, num_workers=0)

# Load a pre-trained ResNet-18 model and modify the final layer
model = models.resnet18(weights="ResNet18_Weights.DEFAULT")
model.fc = nn.Linear(model.fc.in_features, 4)
model.conv1 = nn.Conv2d(6, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)

#model=model2
model.to(device)
optimizer = optim.SGD(model.parameters(), lr=0.00001)
if not os.path.exists(ckpt_dir):
    os.makedirs(ckpt_dir)
import os
import torch
from tqdm import tqdm

# Training loop
num_epochs = 10  # Define your desired number of epochs
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    print("start epoch :"+str(epoch))
    # Add a progress bar for the training loop

    for images,depth, gazes in tqdm(train_loader): # train_progress_bar:
        inputs = torch.cat([images, depth], dim=1)
        # print(inputs.shape)
        inputs, gazes = inputs.to(device), torch.argmax(gazes, dim=1).to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, gazes)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        # Update the progress bar


    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader)}")

    # Save checkpoint
    if (epoch + 1) % save_epochs == 0:
        torch.save(model.state_dict(), os.path.join(ckpt_dir, f"epoch_{epoch+1}.ckpt"))

    # Validation loop
    if (epoch + 1) % validate_epochs == 0:
        model.eval()
        correct = 0
        total = 0
        # Add a progress bar for the validation loop
        valid_progress_bar = tqdm(valid_loader, desc='Validating', leave=False)
        with torch.no_grad():
            for images, depth, gazes in valid_progress_bar:
                inputs = torch.cat([images, depth], dim=1)
                inputs, gazes = inputs.to(device), torch.argmax(gazes, dim=1).to(device)
                outputs = model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += gazes.size(0)
                correct += (predicted == gazes).sum().item()

        print(f"Validation Accuracy: {100 * correct / total}%")

print("Training completed.")




# Tabular Training