<a href="https://www.kaggle.com/code/rabbi2k3/gender-classification-pytorch-resnet-50?scriptVersionId=216470981" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [1]:
#/kaggle/input/biggest-genderface-recognition-dataset
import gc
import os
import time
import warnings

import torch
import torch.nn as nn
import torchvision
from torchvision.models import resnet50
from sklearn.model_selection import train_test_split
from torch import optim
from torch.utils import data
from torch.utils.data import random_split,Subset
from torchvision import transforms

warnings.filterwarnings("ignore")


class GenderNet(nn.Module):
    def __init__(self):
        super(GenderNet, self).__init__()
        # Load pre-trained ResNet-50
        self.base_model = resnet50(pretrained=True)

        in_features = self.base_model.fc.in_features
        self.base_model.fc = nn.Linear(in_features, 2)

    def forward(self, x):
        # Forward pass through ResNet-50
        x = self.base_model(x)
        return x

In [None]:
class GenderTrain:
    def __init__(self):
        self.root = os.path.abspath('/kaggle/input/biggest-genderface-recognition-dataset/faces/')
        self.train_iterator = None
        self.valid_iterator = None
        self.test_iterator = None

        # set hyper parameters
        self.img_size = 112
        self.means = (0, 0, 0)
        self.stds = (1, 1, 1)

        self.batch_size = 128

        # Number of training epochs
        self.num_epochs = 100

        # Learning rate
        self.lr = 0.0001

        # Initiate net
        self.net = GenderNet()

        # set device
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.net.to(self.device)

        # set optimizer
        self.optimizer = optim.Adam(self.net.parameters(), lr=self.lr)

        # set criterion to calculate loss
        self.criterion = nn.CrossEntropyLoss()
        self.criterion.to(self.device)

        # Learning Rate Scheduler
        self.lr_scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=5, gamma=0.5)

    def load_dataset(self):
        transform_pipeline = transforms.Compose([
            transforms.RandomResizedCrop(self.img_size),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(mean=self.means,
                                 std=self.stds)
        ])

        full_dataset = torchvision.datasets.ImageFolder(root=self.root,
                                                         transform=transform_pipeline)

        # Split the dataset into train, validation, and test sets
        total_size = len(full_dataset)
        train_size = int(total_size * 0.7)
        val_size = int(total_size * 0.2)
        test_size = total_size - train_size - val_size  # Ensure all data is included

        train_dataset, validation_dataset, test_dataset = random_split(
            full_dataset, [train_size, val_size, test_size]
        )


        self.train_iterator = data.DataLoader(dataset=train_dataset,
                                              shuffle=True,
                                              num_workers=8,
                                              batch_size=self.batch_size)

        self.valid_iterator = data.DataLoader(dataset=validation_dataset,
                                              shuffle=True,
                                              num_workers=8,
                                              batch_size=self.batch_size)

        self.test_iterator = data.DataLoader(dataset=test_dataset,
                                             shuffle=False,
                                             num_workers=8,
                                             batch_size=self.batch_size)

        print('Load data done!')

    def train(self, iterator):
        # Local Parameters
        epoch_loss = 0
        epoch_acc = 0
        start_time = time.time()

        # Iterating over data loader
        for images, labels in iterator:
            # Loading images and labels to device
            images = images.to(self.device)
            labels = labels.to(self.device)

            # Reseting Gradients
            self.optimizer.zero_grad()

            # Forward
            outputs = self.net(images)

            # identify max prediction
            _, prediction = torch.max(outputs, 1)

            # Calculating Loss
            loss = self.criterion(outputs, labels)

            # Backward
            loss.backward()
            self.optimizer.step()

            # append losses
            epoch_loss += loss.item() * images.size(0)

            # append accuracy
            epoch_acc += torch.sum(prediction == labels.data)

            del images
            del labels

        # Overall Epoch Results
        end_time = time.time()
        total_time = end_time - start_time

        # Acc and Loss
        # epoch_loss = np.mean(epoch_loss)
        # epoch_acc = np.mean(epoch_acc)
        # self.lr_scheduler.step()
        return epoch_loss / len(iterator.dataset), epoch_acc / len(iterator.dataset), total_time

    def evaluate(self, iterator, best_val_acc, mode='test'):
        # Local Parameters
        epoch_loss = 0
        epoch_acc = 0
        start_time = time.time()

        # Iterating over data loader
        for images, labels in iterator:
            # Loading images and labels to device
            images = images.to(self.device)
            labels = labels.to(self.device)

            # Forward
            outputs = self.net(images)

            # identify max prediction
            _, prediction = torch.max(outputs, 1)

            # Calculating Loss
            loss = self.criterion(outputs, labels)

            # Calculate loss
            epoch_loss += loss.item() * images.size(0)

            # Calculating Accuracy
            epoch_acc += torch.sum(prediction == labels.data)

            del images
            del labels

        # Overall Epoch Results
        end_time = time.time()
        total_time = end_time - start_time

        # Saving best model
        if epoch_acc > best_val_acc and mode == 'val':
            best_val_acc = epoch_acc
            torch.save(self.net.state_dict(), os.path.abspath('/kaggle/working/resnet50_best.pth'))

        return epoch_loss / len(iterator.dataset), epoch_acc / len(iterator.dataset), total_time, best_val_acc

    def train_data(self):
        best_val_acc = 0
        torch.cuda.empty_cache()

        for epoch in range(self.num_epochs):  # loop over the dataset multiple times
            # Training
            print("Training...")
            loss, acc, elapsed = self.train(self.train_iterator)
            # Print Epoch Details
            print(f'Epoch {epoch + 1} Loss : {loss} Acc : {(acc * 100): .3f} % Time : {elapsed}')

            # Validation
            print("Validating...")
            loss, acc, elapsed, best_val_acc = self.evaluate(self.valid_iterator, best_val_acc=best_val_acc, mode='val')
            # Print Epoch Details
            print(f'Epoch {epoch + 1} Loss : {loss} Acc : {(acc * 100):.3f} % Time : {elapsed}')

            # Test
            print("Testing...")
            loss, acc, elapsed, best_val_acc = self.evaluate(self.test_iterator, best_val_acc=best_val_acc)
            # Print Epoch Details
            print(f'Epoch {epoch + 1} Loss : {loss} Acc : Acc : {(acc * 100):.3f} % Time : {elapsed}')

        print('Finished Training')

        # save model
        model_path = os.path.abspath('/kaggle/working/model.pth')
        torch.save(self.net.state_dict(), model_path)


if __name__ == '__main__':
    trainer = GenderTrain()
    trainer.load_dataset()
    trainer.train_data()


Load data done!
Training...
Epoch 1 Loss : 0.24715572638851682 Acc :  89.483 % Time : 58.86373543739319
Validating...
Epoch 1 Loss : 0.18719383764284522 Acc : 92.417 % Time : 6.935500383377075
Testing...
Epoch 1 Loss : 0.1966731342719354 Acc : Acc : 92.347 % Time : 3.991048574447632
Training...
Epoch 2 Loss : 0.16566577794474616 Acc :  93.574 % Time : 65.4695417881012
Validating...
Epoch 2 Loss : 0.15552437428323995 Acc : 94.128 % Time : 6.805830717086792
Testing...
Epoch 2 Loss : 0.16098002506374787 Acc : Acc : 94.113 % Time : 4.13761830329895
Training...
Epoch 3 Loss : 0.14580540622940674 Acc :  94.473 % Time : 65.35103011131287
Validating...
Epoch 3 Loss : 0.1407135162957132 Acc : 94.846 % Time : 6.9858644008636475
Testing...
Epoch 3 Loss : 0.1494502555181473 Acc : Acc : 94.334 % Time : 3.903759002685547
Training...
Epoch 4 Loss : 0.13051697808025162 Acc :  95.020 % Time : 65.2849850654602
Validating...
Epoch 4 Loss : 0.1460079984558061 Acc : 94.515 % Time : 6.855999231338501
Testin