In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.model_selection import train_test_split
from torch.optim.lr_scheduler import StepLR
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, transforms

In [None]:
train_dir="/kaggle/input/fer2013-for-efficientface/FER2013/train"
test_dir="/kaggle/input/fer2013-for-efficientface/FER2013/test"

In [None]:
IMG_SIZE = 224
train_transforms = transforms.Compose(
    [
        transforms.Resize((IMG_SIZE,IMG_SIZE)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    ]
)

test_transforms = transforms.Compose(
    [
        transforms.Resize((IMG_SIZE,IMG_SIZE)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    ]
)

In [None]:
device = 'cuda'
use_cuda = torch.cuda.is_available()

In [None]:
batch_size = 64
train_dataset = datasets.ImageFolder(root=train_dir, transform=train_transforms)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataset = datasets.ImageFolder(root=test_dir, transform=test_transforms)
test_loader  = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False) 

In [None]:
import matplotlib.pyplot as plt

# Retrieve the data and labels for the train, test, and validation datasets
train_labels = train_dataset.targets
test_labels = test_dataset.targets

# Define the class names
class_names = train_dataset.classes

# Count the number of samples for each class in the train, test, and validation datasets
train_counts = [train_labels.count(i) for i in range(len(class_names))]
test_counts = [test_labels.count(i) for i in range(len(class_names))]

# Create separate bar graphs for train, test, and validation datasets
fig, axs = plt.subplots(2, 1, figsize=(6, 12))

# Plot for the training dataset
axs[0].bar(class_names, train_counts, color='b')
axs[0].set_title('Training Dataset')
axs[0].set_xlabel('Classes')
axs[0].set_ylabel('Number of Samples')
for i, v in enumerate(train_counts):
    axs[0].text(i, v + 10, str(v), ha='center')

# Plot for the validation dataset
axs[1].bar(class_names, test_counts, color='r')
axs[1].set_title('Test Dataset')
axs[1].set_xlabel('Classes')
axs[1].set_ylabel('Number of Samples')
for i, v in enumerate(test_counts):
    axs[1].text(i, v + 10, str(v), ha='center')

# Adjust layout and display the plots
plt.tight_layout()
plt.show()

**MODEL**

In [None]:
(unique, counts) = np.unique(train_dataset.targets, return_counts=True)
cw=1/counts
cw/=cw.min()
class_weights = {i:cwi for i,cwi in zip(unique,cw)}
print(counts, class_weights.values())

In [None]:
def cross_entropy_loss_with_soft_target(pred, target):
        return torch.mean(torch.sum(- weights*target * torch.nn.functional.log_softmax(pred, -1), 1))

**Robust Optimizer**

In [None]:
class RobustOptimizer(torch.optim.Optimizer):
    def __init__(self, params, base_optimizer, eps=0.05, **kwargs):
        defaults = dict(rho=eps, **kwargs)
        super(RobustOptimizer, self).__init__(params, defaults)

        self.base_optimizer = base_optimizer(self.param_groups, **kwargs)
        self.param_groups = self.base_optimizer.param_groups

    @torch.no_grad()
    def first_step(self, zero_grad=False):
        grad_norm = self._grad_norm()
        for group in self.param_groups:
            scale = group["rho"] / (grad_norm + 1e-12)

            for p in group["params"]:
                if p.grad is None: continue
                e_w = p.grad * scale.to(p)
                p.add_(e_w)
                self.state[p]["e_w"] = e_w

        if zero_grad: self.zero_grad()

    @torch.no_grad()
    def second_step(self, zero_grad=False):
        for group in self.param_groups:
            for p in group["params"]:
                if p.grad is None: continue
                p.sub_(self.state[p]["e_w"])

        self.base_optimizer.step()

        if zero_grad: self.zero_grad()

    @torch.no_grad()
    def step(self, closure=None):
        closure = torch.enable_grad()(closure)

        self.first_step(zero_grad=True)
        closure()
        self.second_step()

    def _grad_norm(self):
        shared_device = self.param_groups[0]["params"][0].device
        norm = torch.norm(
                    torch.stack([
                        p.grad.norm(p=2).to(shared_device)
                        for group in self.param_groups for p in group["params"]
                        if p.grad is not None
                    ]),
                    p=2
               )
        return norm

In [None]:
def train(model,criterion,n_epochs, learningrate):
    
    optimizer = RobustOptimizer(filter(lambda p: p.requires_grad, model.parameters()), optim.Adam, lr=learningrate)
        
    best_acc=0
    
    for epoch in range(n_epochs):
        epoch_loss = 0
        epoch_accuracy = 0
        model.train()
        for i,(data, label) in enumerate(train_loader):
            data = data.to(device)
            label = label.to(device)

            output = model(data)
            loss = criterion(output, label)

            loss.backward()
            optimizer.first_step(zero_grad=True)

            # second forward-backward pass
            output = model(data)
            loss = criterion(output, label)
            loss.backward()
            optimizer.second_step(zero_grad=True)

            acc = (output.argmax(dim=1) == label).float().sum()
            epoch_accuracy += acc
            epoch_loss += loss
        epoch_accuracy /= len(train_dataset)
        epoch_loss /= len(train_dataset)
        
        model.eval()
        with torch.no_grad():
            epoch_val_accuracy = 0
            epoch_val_loss = 0
            for i,(data, label) in enumerate(test_loader):
                data = data.to(device)
                label = label.to(device)

                val_output = model(data)
                val_loss = criterion(val_output, label)

                acc = (val_output.argmax(dim=1) == label).float().sum()
                epoch_val_accuracy += acc
                epoch_val_loss += val_loss
        epoch_val_accuracy /= len(test_dataset)
        epoch_val_loss /= len(test_dataset)
        print(
            f"Epoch : {epoch+1} - loss : {epoch_loss:.4f} - acc: {epoch_accuracy:.4f} - val_loss : {epoch_val_loss:.4f} - val_acc: {epoch_val_accuracy:.4f}\n"
        )
        if best_acc<epoch_val_accuracy:
            best_acc=epoch_val_accuracy
        
        print(f"Best acc:{best_acc}")

In [None]:
import timm
model=timm.create_model('tf_efficientnet_b0_ns', pretrained=False)
model.classifier=torch.nn.Identity()
model.load_state_dict(torch.load('/kaggle/input/efficientnet-weights-on-vgg/state_vggface2_enet0_new.pt'))

In [None]:
model.classifier=nn.Sequential(nn.Linear(in_features=1280, out_features=7)) #1792 #1280 #1536
model=model.to(device)
print(model)

In [None]:
def set_parameter_requires_grad(model, requires_grad):
    for param in model.parameters():
        param.requires_grad = requires_grad

In [None]:
weights = torch.FloatTensor(list(class_weights.values())).cuda()
criterion = nn.CrossEntropyLoss(weight=weights)

In [None]:
set_parameter_requires_grad(model, requires_grad=False)
set_parameter_requires_grad(model.classifier, requires_grad=True)
train(model,criterion,3,0.001)

In [None]:
set_parameter_requires_grad(model, requires_grad=True)
train(model,criterion,6,1e-4)

In [None]:
torch.save(model,"EfficientNetb0.pth")