In [1]:
import pandas as pd
import numpy as np
import torch

df = pd.read_csv('./csv_dataset/cropped.csv')
df.dropna()
print(df.head())

   age  gender ethnicity                                img_name
0  100       1         0  100_1_0_20170110183726390.jpg.chip.jpg
1  100       1         2  100_1_2_20170105174847679.jpg.chip.jpg
2  101       1         2  101_1_2_20170105174739309.jpg.chip.jpg
3   10       0         0   10_0_0_20161220222308131.jpg.chip.jpg
4   10       0         0   10_0_0_20170103200329407.jpg.chip.jpg


In [2]:
from sklearn.model_selection import train_test_split

df_train, df_test = train_test_split(df, train_size=0.8, random_state=42)
df_train, df_valid = train_test_split(df_train, train_size=0.85, random_state=42)

# Save the training, validation, and test sets in separate CSV files.
df_train.to_csv('./csv_dataset/train_set.csv', index=False)
df_valid.to_csv('./csv_dataset/valid_set.csv', index=False)
df_test.to_csv('./csv_dataset/test_set.csv', index=False)

In [3]:
from torchvision import transforms
custom_transform = transforms.Compose([transforms.Resize((128, 128)),
                                       transforms.RandomCrop((120, 120)),
                                       transforms.ToTensor()])


In [4]:
# Hyper-parameters
import torch.cuda
learning_rate = 0.01
device = 'cuda' if torch.cuda.is_available() else 'cpu'
num_epochs = 25
train_batch_size = 40
test_batch_size = 40
random_seed = 1
NUM_CLASSES = 117
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [5]:
from torch.utils.data import Dataset, DataLoader
from pathlib import Path
from PIL import Image

class UTKface(Dataset):
    def __init__(self, csv_file, transform=None):
        self.data = pd.read_csv(csv_file)
        self.transform = transform
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        row = self.data.iloc[index].values
        if pd.isna(row[-1]):
            # Handle the case where the value is NaN
            # You can skip this entry, return a placeholder image, or take other actions
            # For example, you can return a default image and age
            default_image = torch.zeros((1, 120, 120), dtype=torch.float32)  # Modify the shape as needed
            age = torch.tensor(0)  # Modify the default age as needed
            return default_image, age
        img_dir = str(Path(f'./crop_part1/{row[-1]}'))
        image = Image.open(img_dir).convert("L")
        image = self.transform(image)
        age = torch.tensor(int(row[0]))
        
        return image, age
    
    
csv_file_train = './csv_dataset/train_set.csv'
csv_file_test = './csv_dataset/test_set.csv'
csv_file_valid = './csv_dataset/valid_set.csv'


train_set = UTKface(csv_file_train,custom_transform)
test_set = UTKface(csv_file_test,custom_transform)
valid_set = UTKface(csv_file_valid,custom_transform)

train_loader = DataLoader(train_set, batch_size=train_batch_size, shuffle=True)
valid_loader = DataLoader(valid_set, batch_size=test_batch_size)
test_loader = DataLoader(test_set, batch_size=test_batch_size)

# print(train_loader.dataset)
# train_loader = train_loader.fillna(0)
i=0
for images, age in train_loader:
    print(i)
    print('Image batch dimensions:', images.shape)
    print('Image label dimensions:', age.shape)
    i+=1
        

0
Image batch dimensions: torch.Size([40, 1, 120, 120])
Image label dimensions: torch.Size([40])
1
Image batch dimensions: torch.Size([40, 1, 120, 120])
Image label dimensions: torch.Size([40])
2
Image batch dimensions: torch.Size([40, 1, 120, 120])
Image label dimensions: torch.Size([40])
3
Image batch dimensions: torch.Size([40, 1, 120, 120])
Image label dimensions: torch.Size([40])
4
Image batch dimensions: torch.Size([40, 1, 120, 120])
Image label dimensions: torch.Size([40])
5
Image batch dimensions: torch.Size([40, 1, 120, 120])
Image label dimensions: torch.Size([40])
6
Image batch dimensions: torch.Size([40, 1, 120, 120])
Image label dimensions: torch.Size([40])
7
Image batch dimensions: torch.Size([40, 1, 120, 120])
Image label dimensions: torch.Size([40])
8
Image batch dimensions: torch.Size([40, 1, 120, 120])
Image label dimensions: torch.Size([40])
9
Image batch dimensions: torch.Size([40, 1, 120, 120])
Image label dimensions: torch.Size([40])
10
Image batch dimensions: tor

In [8]:
from coral_pytorch.layers import CoralLayer

class ConvNet(torch.nn.Module):

    def __init__(self, num_classes):
        super(ConvNet, self).__init__()

        self.features = torch.nn.Sequential(
            torch.nn.Conv2d(1, 32, (3, 3), (1, 1), 2),
            torch.nn.MaxPool2d((2, 2), (1, 1)),
            torch.nn.Conv2d(32, 64 , (3, 3), (1, 1), 3),
            torch.nn.MaxPool2d((2, 2), (1, 1)),
            torch.nn.Conv2d(64, 128 , (3, 3), (1, 1), 2),
            torch.nn.MaxPool2d((2, 2), (2, 2)),
            torch.nn.Conv2d(128, 256, (3, 3), (1, 1), 1),
            torch.nn.MaxPool2d((2, 2), (1, 1)))

        ### Specify CORAL layer
        self.fc = CoralLayer(size_in=984064, num_classes=num_classes)
        ###--------------------------------------------------------------------###

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1) # flatten

        ##### Use CORAL layer #####
        logits =  self.fc(x)
        probas = torch.sigmoid(logits)
        ###--------------------------------------------------------------------###

        return logits, probas



torch.manual_seed(random_seed)
model = ConvNet(num_classes=NUM_CLASSES)
print(model.features)
model.to(DEVICE)

optimizer = torch.optim.Adam(model.parameters())

Sequential(
  (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
  (1): MaxPool2d(kernel_size=(2, 2), stride=(1, 1), padding=0, dilation=1, ceil_mode=False)
  (2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(3, 3))
  (3): MaxPool2d(kernel_size=(2, 2), stride=(1, 1), padding=0, dilation=1, ceil_mode=False)
  (4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
  (5): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  (6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (7): MaxPool2d(kernel_size=(2, 2), stride=(1, 1), padding=0, dilation=1, ceil_mode=False)
)


In [9]:
from coral_pytorch.dataset import levels_from_labelbatch
from coral_pytorch.losses import coral_loss


for epoch in range(num_epochs):

    model = model.train()
    for batch_idx, (features, class_labels) in enumerate(train_loader):

        ##### Convert class labels for CORAL
        levels = levels_from_labelbatch(class_labels, 
                                        num_classes=NUM_CLASSES)
        ###--------------------------------------------------------------------###

        features = features.to(DEVICE)
        # print(features.shape)
        levels = levels.to(DEVICE)
        logits, probas = model(features)

        #### CORAL loss 
        loss = coral_loss(logits, levels)
        ###--------------------------------------------------------------------###   


        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        ### LOGGING
        if not batch_idx % 167:
            print ('Epoch: %03d/%03d | Batch %03d/%03d | Loss: %.4f' 
                %(epoch+1, num_epochs, batch_idx, 
                    len(train_loader), loss))

Epoch: 001/025 | Batch 000/167 | Loss: 86.6056
Epoch: 002/025 | Batch 000/167 | Loss: 52.7287


KeyboardInterrupt: 

In [282]:
from coral_pytorch.dataset import proba_to_label


def compute_mae_and_mse(model, data_loader, device):

    with torch.no_grad():

        mae, mse, acc, num_examples = 0., 0., 0., 0

        for i, (features, targets) in enumerate(data_loader):

            features = features.to(device)
            targets = targets.float().to(device)

            logits, probas = model(features)
            predicted_labels = proba_to_label(probas).float()

            num_examples += targets.size(0)
            mae += torch.sum(torch.abs(predicted_labels - targets))
            mse += torch.sum((predicted_labels - targets)**2)

        mae = mae / num_examples
        mse = mse / num_examples
        return mae, mse

In [283]:
train_mae, train_mse = compute_mae_and_mse(model, train_loader, DEVICE)
test_mae, test_mse = compute_mae_and_mse(model, test_loader, DEVICE)

In [284]:
print(f'Mean absolute error (train/test): {train_mae:.2f} | {test_mae:.2f}')
print(f'Mean squared error (train/test): {train_mse:.2f} | {test_mse:.2f}')


Mean absolute error (train/test): 7.60 | 8.61
Mean squared error (train/test): 114.91 | 144.86


In [285]:
# # Assuming you have a custom CSV file named 'custom_input.csv'
# custom_csv_file = './csv_dataset/new.csv'

# # Create a custom dataset
# custom_dataset = UTKface(custom_csv_file, custom_transform)  # Define custom_transform if needed

# # Create a DataLoader for the custom dataset
# custom_loader = DataLoader(custom_dataset, batch_size=1, shuffle=False)  # Batch size set to 1 for individual samples

# # Set the model to evaluation mode
# model = model.eval()

# # Iterate through the custom DataLoader
# for batch_idx, (features, class_labels) in enumerate(custom_loader):
#     # Move features to the specified device
#     features = features.to(DEVICE)

#     # Forward pass through the model to get predictions
#     logits, probas = model(features)
#     predicted_labels = proba_to_label(probas).float()
#     # Convert logits or probas to a format suitable for your task
#     # For example, you might want to convert logits to class predictions

#     # Print or use the predictions as needed
#     print("Predictions:", predicted_labels)
