In [12]:
import pandas as pd
import numpy as np
import torch
import warnings
import os
warnings.filterwarnings("ignore")

torch.__version__

'2.0.1+cu117'

In [13]:
from helper import get_dataframe

df = get_dataframe('UTKFace')

valid_values = ['0', '1', '2', '3', '4']

df= df[df['race'].isin(valid_values)]

df


Unnamed: 0,age,gender,race,path
0,50,0,0,50_0_0_20170117135034485.jpg.chip.jpg
1,55,0,3,55_0_3_20170119171117830.jpg.chip.jpg
2,12,0,4,12_0_4_20170103201607807.jpg.chip.jpg
3,40,0,0,40_0_0_20170117172519480.jpg.chip.jpg
4,39,1,3,39_1_3_20170104233629347.jpg.chip.jpg
...,...,...,...,...
23703,65,0,0,65_0_0_20170111200641250.jpg.chip.jpg
23704,26,1,0,26_1_0_20170116234741431.jpg.chip.jpg
23705,55,0,0,55_0_0_20170120140655585.jpg.chip.jpg
23706,60,1,0,60_1_0_20170110122614299.jpg.chip.jpg


In [36]:
# creating custom dataset

from torch.utils.data import Dataset
from torchvision import transforms
from torchvision.io import read_image

class CustomDataset(Dataset):

    def __init__(self,path,labels,transfrom=None):
        self.img_dir = path
        self.img_labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self,idx):
        img_path = os.path.join(self.img_dir,self.img_labels.iloc[idx,0])
        image = read_image(img_path)
        label = list(map(int, self.img_labels.iloc[idx, 1]))
        # label = list(map(int, map(float, self.img_labels.iloc[idx, 1])))

        if self.transform:
            image = self.transform(image)

        
        return image,label

# Here normalization component was a standard found from the internet

transform = transforms.Compose([  
    transforms.ToPILImage(),
    # transforms.Grayscale(num_output_channels=1),
    transforms.Resize((32, 32)),  # Resize the image to 28x28 pixels
    transforms.ToTensor(),
    # transforms.Normalize(mean=[0.485], std=[0.229])  # Normalize image (for grayscale)
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])


In [37]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(df['path'], df['gender'], test_size=0.2, random_state=42)


training_data = pd.concat([X_train,y_train],axis=1)

testing_data = pd.concat([X_test,y_test],axis=1)

training_data['gender'].unique()

array(['1', '0'], dtype=object)

In [38]:
training_dataset = CustomDataset('UTKFace', training_data, transform)
testing_dataset = CustomDataset('UTKFace', testing_data, transform)


In [39]:
from torch.utils.data import DataLoader

batch_size = 32
train_dataloader = DataLoader(training_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(testing_dataset, batch_size=batch_size)


In [40]:
next(iter(train_dataloader))[0].shape# Test column

torch.Size([32, 3, 32, 32])

### Creating TinyVGG model

In [41]:
from torch import nn

class Model(nn.Module):

  def __init__(self,input_shape:int,hidden_units:int,output_shape:int):
    super().__init__()
    self.conv_block_1 = nn.Sequential(
        nn.Conv2d(input_shape,hidden_units,kernel_size = 3,stride = 1,padding = 1),
        nn.ReLU(),
        nn.Conv2d(hidden_units,hidden_units,kernel_size=3,stride=1,padding=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=3)
    )

    self.conv_block_2 = nn.Sequential(
        nn.Conv2d(hidden_units,hidden_units,kernel_size = 3,stride = 1,padding = 1),
        nn.ReLU(),
        nn.Conv2d(hidden_units,hidden_units,kernel_size=3,stride=1,padding=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2)
    )

    self.classifier = nn.Sequential(
        nn.Flatten(),
        nn.Linear(in_features=25*10,
                  out_features=output_shape)
    )


  def forward(self,x):

    x = self.conv_block_1(x)
    x = self.conv_block_2(x)
    x = self.classifier(x)
    return x



In [42]:
def accuracy_fn(y_true, y_pred):
    """Calculates accuracy between truth labels and predictions.

    Args:
        y_true (torch.Tensor): Truth labels for predictions.
        y_pred (torch.Tensor): Predictions to be compared to predictions.

    Returns:
        [torch.float]: Accuracy value between y_true and y_pred, e.g. 78.45
    """
    correct = torch.eq(y_true, y_pred).sum().item()

    acc = (correct / len(y_pred)) * 100
    return acc

### Model to predict Gender

In [43]:
# Creating model and defining function
torch.manual_seed(42)

device = 'cpu'
model = Model(3,10,2)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params = model.parameters(),
                            lr=0.1)

In [44]:
torch.manual_seed(42)

from tqdm.auto import tqdm

EPOCHS = 6

for epoch in tqdm(range(EPOCHS)):

    train_loss , train_acc = 0,0

    for batch,(X,y) in enumerate(train_dataloader):

        model.train()
        
        y_pred = model(X)

        loss = loss_fn(y_pred,y[0])

        train_loss+= loss

        optimizer.zero_grad()

        loss.backward()

        optimizer.step()

        if batch %400 ==0:
          print(f"Looked at {batch*len(X)}/{len(train_dataloader)} samples.")


    train_loss /= len(train_dataloader)

    print(f"Train loss:{train_loss}")

    ### Starting testing loop
    test_loss, test_acc = 0,0

    model.eval()

    test_loss = 0
    test_acc = 0
    with torch.inference_mode():

        for batch,(X,y) in enumerate(test_dataloader):

            y_pred = model(X)
            loss = loss_fn(y_pred,y[0])
            test_loss += loss

            y_pred = y_pred.argmax(dim = 1)
            
            test_acc += accuracy_fn(y_pred,y[0])

        test_acc= test_acc/len(test_dataloader)
        test_loss/=len(test_dataloader)

    print(f"Test acc : {test_acc} | test loss : {test_loss} | Train loss : {train_loss}")
            
            
        
        
        


        

  0%|          | 0/6 [00:00<?, ?it/s]

Looked at 0/593 samples.
Looked at 12800/593 samples.
Train loss:0.5241355895996094
Test acc : 81.70302013422818 | test loss : 0.40331026911735535 | Train loss : 0.5241355895996094
Looked at 0/593 samples.
Looked at 12800/593 samples.
Train loss:0.3777991533279419
Test acc : 83.65352348993288 | test loss : 0.3622373640537262 | Train loss : 0.3777991533279419
Looked at 0/593 samples.
Looked at 12800/593 samples.
Train loss:0.3365187346935272
Test acc : 84.76510067114094 | test loss : 0.33321496844291687 | Train loss : 0.3365187346935272
Looked at 0/593 samples.
Looked at 12800/593 samples.
Train loss:0.3195090889930725
Test acc : 86.4010067114094 | test loss : 0.31767112016677856 | Train loss : 0.3195090889930725
Looked at 0/593 samples.
Looked at 12800/593 samples.
Train loss:0.30314815044403076
Test acc : 84.72315436241611 | test loss : 0.3506428003311157 | Train loss : 0.30314815044403076
Looked at 0/593 samples.
Looked at 12800/593 samples.
Train loss:0.297203928232193
Test acc : 87

In [114]:
((y[0] >=0.5).int() - y[0]).sum()

tensor(0)

### Model to predict Race

In [98]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(df['path'], df['race'], test_size=0.2, random_state=42)


training_data = pd.concat([X_train,y_train],axis=1)

testing_data = pd.concat([X_test,y_test],axis=1)

training_data['race'].unique()

array(['0', '1', '3', '4', '2'], dtype=object)

In [99]:
training_dataset = CustomDataset('UTKFace', training_data, transform)
testing_dataset = CustomDataset('UTKFace', testing_data, transform)


In [100]:
from torch.utils.data import DataLoader

batch_size = 32
train_dataloader = DataLoader(training_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(testing_dataset, batch_size=batch_size)


In [101]:
next(iter(train_dataloader))[0].shape# Test column

torch.Size([32, 3, 32, 32])

In [102]:
# Creating model and defining function
torch.manual_seed(42)

device = 'cpu'
model = Model(3,10,5)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params = model.parameters(),
                            lr=0.1)


In [103]:
torch.manual_seed(42)

from tqdm.auto import tqdm

EPOCHS = 6

for epoch in tqdm(range(EPOCHS)):

    train_loss , train_acc = 0,0

    for batch,(X,y) in enumerate(train_dataloader):

        model.train()
        
        y_pred = model(X)

        loss = loss_fn(y_pred,y[0])

        train_loss+= loss

        optimizer.zero_grad()

        loss.backward()

        optimizer.step()

        if batch %400 ==0:
          print(f"Looked at {batch*len(X)}/{len(train_dataloader)} samples.")


    train_loss /= len(train_dataloader)

    print(f"Train loss:{train_loss}")

    ### Starting testing loop
    test_loss, test_acc = 0,0

    model.eval()

    test_loss = 0
    test_acc = 0
    with torch.inference_mode():

        for batch,(X,y) in enumerate(test_dataloader):

            y_pred = model(X)
            loss = loss_fn(y_pred,y[0])
            test_loss += loss

            y_pred = y_pred.argmax(dim = 1)
            
            test_acc += accuracy_fn(y_pred,y[0])

        test_acc= test_acc/len(test_dataloader)
        test_loss/=len(test_dataloader)

    print(f"Test acc : {test_acc} | test loss : {test_loss} | Train loss : {train_loss}")
            
            
        
        
        


        

  0%|          | 0/6 [00:00<?, ?it/s]

Looked at 0/593 samples.
Looked at 12800/593 samples.
Train loss:1.2973315715789795
Test acc : 55.310402684563755 | test loss : 1.1629858016967773 | Train loss : 1.2973315715789795
Looked at 0/593 samples.
Looked at 12800/593 samples.
Train loss:1.0214494466781616
Test acc : 66.04865771812081 | test loss : 0.9412874579429626 | Train loss : 1.0214494466781616
Looked at 0/593 samples.
Looked at 12800/593 samples.
Train loss:0.895909309387207
Test acc : 70.62080536912751 | test loss : 0.8422528505325317 | Train loss : 0.895909309387207
Looked at 0/593 samples.
Looked at 12800/593 samples.
Train loss:0.8454598188400269
Test acc : 63.76258389261745 | test loss : 1.000044345855713 | Train loss : 0.8454598188400269
Looked at 0/593 samples.
Looked at 12800/593 samples.
Train loss:0.8187004327774048
Test acc : 72.26510067114094 | test loss : 0.8040850758552551 | Train loss : 0.8187004327774048
Looked at 0/593 samples.
Looked at 12800/593 samples.
Train loss:0.8006505966186523
Test acc : 69.4546