In [1]:
# import libaries

import numpy as np
import torch
import random
import torch.nn as nn
import sys
import pandas as pd
from PIL import Image

from tqdm.notebook import tqdm
import os
import matplotlib.pyplot as plt
from torchvision import transforms
from torch import optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from torchinfo import summary

In [2]:
print('Python version:', sys.version)
print('CUDA Available:', torch.cuda.is_available())

if torch.cuda.is_available():
    print('GPU Name:', torch.cuda.get_device_name())
    print('GPU Properties:\n', torch.cuda.get_device_properties('cuda'))
    device = "cuda"
    torch.cuda.set_per_process_memory_fraction(0.95, 0)
    torch.cuda.empty_cache()
else:
    print("Cuda is not available, please use cpu instead")
    device = "cpu"
!nvidia-smi

Python version: 3.9.0 (tags/v3.9.0:9cf6752, Oct  5 2020, 15:34:40) [MSC v.1927 64 bit (AMD64)]
CUDA Available: True
GPU Name: NVIDIA GeForce RTX 2070
GPU Properties:
 _CudaDeviceProperties(name='NVIDIA GeForce RTX 2070', major=7, minor=5, total_memory=8191MB, multi_processor_count=36)
Sat Jan 28 17:33:23 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 526.98       Driver Version: 526.98       CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name            TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ... WDDM  | 00000000:01:00.0  On |                  N/A |
|  0%   36C    P2    52W / 175W |    461MiB /  8192MiB |      1%      Default |
|                               |         

In [90]:
train_df = pd.read_csv('./digiface_csv_files/digi_train.csv')
train_df['identity'] = train_df['identity'].astype(str)
train_df = train_df.groupby('identity')['path'].apply(list).reset_index()
# train_df.head(3)

In [91]:
train_df_copy = train_df.copy()

In [100]:
label_to_indices = dict()

for i, row in train_df_copy.iterrows():
    # get identity of each row
    row_identity = row['identity']
    # append each identity to numberical value
    label_to_indices[row_identity] = i
    # loop imgs in each identity
    print(row)
    for img_name in row['path']:
        print(img_name)
        # concatenate the directoru and image name
        # path_to_image = train_dir+img_name
        # # open image and convert to RGB
        # img = Image.open(path_to_image).convert('RGB')
        #
        # images.append(img)
        # labels.append(i)
    break

identity                                                    0
path        [digiFace1M\subjects_0-1999_72_imgs\0\22.png, ...
Name: 0, dtype: object
digiFace1M\subjects_0-1999_72_imgs\0\22.png
digiFace1M\subjects_0-1999_72_imgs\0\49.png
digiFace1M\subjects_0-1999_72_imgs\0\42.png
digiFace1M\subjects_0-1999_72_imgs\0\66.png
digiFace1M\subjects_0-1999_72_imgs\0\45.png
digiFace1M\subjects_0-1999_72_imgs\0\3.png
digiFace1M\subjects_0-1999_72_imgs\0\2.png
digiFace1M\subjects_0-1999_72_imgs\0\54.png
digiFace1M\subjects_0-1999_72_imgs\0\11.png
digiFace1M\subjects_0-1999_72_imgs\0\65.png
digiFace1M\subjects_0-1999_72_imgs\0\69.png
digiFace1M\subjects_0-1999_72_imgs\0\29.png
digiFace1M\subjects_0-1999_72_imgs\0\53.png
digiFace1M\subjects_0-1999_72_imgs\0\51.png
digiFace1M\subjects_0-1999_72_imgs\0\27.png
digiFace1M\subjects_0-1999_72_imgs\0\35.png
digiFace1M\subjects_0-1999_72_imgs\0\64.png
digiFace1M\subjects_0-1999_72_imgs\0\38.png
digiFace1M\subjects_0-1999_72_imgs\0\71.png
digiFace1M\subjects

In [45]:
seed = 41
# Define custom dataset
class FaceDataset(Dataset):
    def __init__(self, training_csv = None, training_dir = None, transform = None):
        # set random seed for FaceDataset
        np.random.seed(seed)
        random.seed(seed)
        # create contructors
        self.unique_img_name = None
        self.data = dict()
        self.image = list()
        self.identities = list()
        # read csv file
        self.train_df = pd.read_csv(training_csv)
        #get the length of entire dataset
        self.len_train = len(self.train_df)
        # set the transformation
        self.transform = transform
        # set the train directory
        self.train_dir = training_dir
        # group each identity together and create list of each identity imgs
        self.train_df = self.train_df.groupby('identity')['filename'].apply(list).reset_index()
        # load imgs
        self.load_imgs(self.train_df)

    def __len__(self):
        return len(self.image)

    # get each pair of images -> 1: same identity, 0: different identity
    # if index is even -> same pair
    # if index is odd -> random identity
    def __getitem__(self, idx):
        anc = None
        img2 = None
        label = None
        if idx % 2 == 0:
            # random pick the identity
            random_identity = np.random.choice(self.unique_img_name, size = 1)[0]
            random_img = random.sample(self.data[random_identity], 2)
            img1 = random_img[0]
            img2 = random_img[1]
            label = 1
        else:
            random_identity = np.random.choice(self.unique_img_name, size = 2)
            img1 = random.choice(self.data[random_identity[0]])
            img2 = random.choice(self.data[random_identity[1]])
            label = 0
        #         return self.train_df
        return img1, img2, torch.from_numpy(np.array([label], dtype = np.float64))

    def load_imgs(self, df):
        # iterate thought each row
        for _, row in df.iterrows():
            # get identity of each row
            row_identities = row['identity']
            # check if not identity is in self.data
            if row_identities not in self.data:
                # create key in self.data as new identity
                self.data[row_identities] = list()
            # loop imgs in each identity
            for img_name in row['filenames']:
                # concatenate the directoru and image name
                path_to_image = self.train_dir+img_name
                # open image and convert to RGB
                img = Image.open(path_to_image).convert('RGB')
                # check img transformations
                if self.transform == None:
                    # self.transform is not define, convert the img to tensor
                    convert_tensor = transforms.ToTensor()
                    img = convert_tensor(img)
                else:
                    # transforms img
                    img = self.transform(img)

                self.data[row_identities].append(img)
                self.image.append(img)
                self.identities.append(img_name)

        self.unique_img_name = np.array(list(self.data.keys()))

    def rotate_imgs(self, path_to_img):
        img = Image.open(path_to_img).convert('RGB')
        is_rotate = random.choice([True,False])
        angle_rotation = [90,180,270,360]
        if is_rotate:
            degree = random.choice(angle_rotation)
            img = img.rotate(degree)
        return img


[2, 3, 4, 5, 6]

In [None]:
seed = 41

class digiFace_ds(Dataset):
    def __init__(self, path_csv, path_dir, transform = None):
        # set seed for randomness
        np.random.seed(seed)
        random.seed(seed)
        #
    def load_imgs(self, df):
        # loop though each row
        for _, row in df.iterrows():
            # get identity
            row_iden = row['identity']

In [48]:
df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
                              'Parrot', 'Parrot'],
                   'Max Speed': [380., 370., 24., 26.]})
df.groupby(['Animal'])['Max Speed'].apply(list)

Animal
Falcon    [380.0, 370.0]
Parrot      [24.0, 26.0]
Name: Max Speed, dtype: object

In [13]:
count = 0
data = dict()

for _, row in train_df.iterrows():
    # get row identity
    row_id = row['identity']
    print(row_id)



    if count == 2:
        break
    count += 1

KeyError: 'identity'