In [36]:
import pandas as pd
import numpy as np
import torch
import warnings
import os
warnings.filterwarnings("ignore")

torch.__version__

'2.0.1+cu117'

In [63]:
from helper import get_dataframe

df = get_dataframe('UTKFace')

valid_values = ['0', '1', '2', '3', '4']

df= df[df['race'].isin(valid_values)]

def f(x):
    return int(int(x)/10)

df['age'] = df['age'].apply(lambda x: f(x))

df

Unnamed: 0,age,gender,race,path
0,5,0,0,50_0_0_20170117135034485.jpg.chip.jpg
1,5,0,3,55_0_3_20170119171117830.jpg.chip.jpg
2,1,0,4,12_0_4_20170103201607807.jpg.chip.jpg
3,4,0,0,40_0_0_20170117172519480.jpg.chip.jpg
4,3,1,3,39_1_3_20170104233629347.jpg.chip.jpg
...,...,...,...,...
23703,6,0,0,65_0_0_20170111200641250.jpg.chip.jpg
23704,2,1,0,26_1_0_20170116234741431.jpg.chip.jpg
23705,5,0,0,55_0_0_20170120140655585.jpg.chip.jpg
23706,6,1,0,60_1_0_20170110122614299.jpg.chip.jpg


In [69]:
# creating custom dataset

from torch.utils.data import Dataset
from torchvision import transforms
from torchvision.io import read_image

class CustomDataset(Dataset):

    def __init__(self, path, labels, transform=None):
        self.img_dir = path
        self.img_labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        image = read_image(img_path)
        
        # Convert the 'age' value to an integer
        label = int(self.img_labels.iloc[idx, 1])

        if self.transform:
            image = self.transform(image)

        return image, label


# Here normalization component was a standard found from the internet

transform = transforms.Compose([  
    transforms.ToPILImage(),
    # transforms.Grayscale(num_output_channels=1),
    transforms.Resize((32, 32)),  # Resize the image to 28x28 pixels
    transforms.ToTensor(),
    # transforms.Normalize(mean=[0.485], std=[0.229])  # Normalize image (for grayscale)
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])


In [70]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(df['path'], df['age'], test_size=0.2, random_state=42)


training_data = pd.concat([X_train,y_train],axis=1)

testing_data = pd.concat([X_test,y_test],axis=1)

training_data['age'].unique()

array([ 2,  5,  1,  3,  4,  0, 11,  6,  7,  9,  8, 10])

In [71]:
training_dataset = CustomDataset('UTKFace', training_data, transform)
testing_dataset = CustomDataset('UTKFace', testing_data, transform)


In [72]:
from torch.utils.data import DataLoader

batch_size = 32
train_dataloader = DataLoader(training_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(testing_dataset, batch_size=batch_size)


In [73]:
next(iter(train_dataloader))[0].shape# Test column

torch.Size([32, 3, 32, 32])

### Creating TinyVGG model

In [74]:
from torch import nn

class Model(nn.Module):

  def __init__(self,input_shape:int,hidden_units:int,output_shape:int):
    super().__init__()
    self.conv_block_1 = nn.Sequential(
        nn.Conv2d(input_shape,hidden_units,kernel_size = 3,stride = 1,padding = 1),
        nn.ReLU(),
        nn.Conv2d(hidden_units,hidden_units,kernel_size=3,stride=1,padding=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=3)
    )

    self.conv_block_2 = nn.Sequential(
        nn.Conv2d(hidden_units,hidden_units,kernel_size = 3,stride = 1,padding = 1),
        nn.ReLU(),
        nn.Conv2d(hidden_units,hidden_units,kernel_size=3,stride=1,padding=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2)
    )

    self.classifier = nn.Sequential(
        nn.Flatten(),
        nn.Linear(in_features=25*10,
                  out_features=output_shape)
    )


  def forward(self,x):

    x = self.conv_block_1(x)
    x = self.conv_block_2(x)
    x = self.classifier(x)
    return x


In [75]:
def accuracy_fn(y_true, y_pred):
    correct = torch.eq(y_true, y_pred).sum().item()

    acc = (correct / len(y_pred)) * 100
    return acc

In [76]:
# Creating model and defining function
torch.manual_seed(42)

device = 'cpu'
model = Model(3,10,12)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params = model.parameters(),
                            lr=0.1)


In [83]:
torch.manual_seed(42)

from tqdm.auto import tqdm

EPOCHS = 5

for epoch in tqdm(range(EPOCHS)):

    train_loss , train_acc = 0,0

    for batch,(X,y) in enumerate(train_dataloader):

        model.train()
        
        y_pred = model(X)
        loss = loss_fn(y_pred,y)

        train_loss+= loss

        optimizer.zero_grad()

        loss.backward()

        optimizer.step()

        if batch %400 ==0:
          print(f"Looked at {batch*len(X)}/{len(train_dataloader)} samples.")


    train_loss /= len(train_dataloader)

    print(f"Train loss:{train_loss}")

    ### Starting testing loop
    test_loss, test_acc = 0,0

    model.eval()

    test_loss = 0
    test_acc = 0
    with torch.inference_mode():

        for batch,(X,y) in enumerate(test_dataloader):

            y_pred = model(X)
            loss = loss_fn(y_pred,y)
            test_loss += loss

            y_pred = y_pred.argmax(dim = 1)
            
            test_acc += accuracy_fn(y_pred,y)

        test_acc= test_acc/len(test_dataloader)
        test_loss/=len(test_dataloader)

    print(f"Test acc : {test_acc} | test loss : {test_loss} | Train loss : {train_loss}")
            
            
        
        
        


        

  0%|          | 0/5 [00:00<?, ?it/s]

Looked at 0/593 samples.
Looked at 12800/593 samples.
Train loss:1.3995872735977173
Test acc : 46.16610738255034 | test loss : 1.379460334777832 | Train loss : 1.3995872735977173
Looked at 0/593 samples.
Looked at 12800/593 samples.
Train loss:1.3560317754745483
Test acc : 45.56208053691275 | test loss : 1.3974759578704834 | Train loss : 1.3560317754745483
Looked at 0/593 samples.
Looked at 12800/593 samples.
Train loss:1.3177398443222046
Test acc : 47.94043624161074 | test loss : 1.3398469686508179 | Train loss : 1.3177398443222046
Looked at 0/593 samples.
Looked at 12800/593 samples.
Train loss:1.303940773010254
Test acc : 46.35906040268456 | test loss : 1.3747233152389526 | Train loss : 1.303940773010254
Looked at 0/593 samples.
Looked at 12800/593 samples.
Train loss:1.277944803237915
Test acc : 47.48741610738255 | test loss : 1.3152506351470947 | Train loss : 1.277944803237915


In [114]:
((y[0] >=0.5).int() - y[0]).sum()

tensor(0)