In [1]:
!pip install -U transformers
## Local Inference on GPU



In [None]:
# Loading necessaary libraries
import os
from transformers import AutoImageProcessor, AutoModelForImageClassification
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, random_split
from torchvision import transforms
import torch

‚ö†Ô∏è If the generated code snippets do not work, please open an issue on either the [model repo](https://huggingface.co/google/efficientnet-b3)
			and/or on [huggingface.js](https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/model-libraries-snippets.ts) üôè

Model page: https://huggingface.co/google/efficientnet-b3

In [None]:
# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("image-classification", model="google/efficientnet-b3")
pipe("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/hub/parrots.png")

processor = AutoImageProcessor.from_pretrained("google/efficientnet-b3")
model = AutoModelForImageClassification.from_pretrained("google/efficientnet-b3")


# Load model directly
from transformers import AutoImageProcessor, AutoModelForImageClassification
from transformers import EfficientNetImageProcessor, EfficientNetForImageClassification

processor = EfficientNetImageProcessor.from_pretrained("google/efficientnet-b3")
model = EfficientNetForImageClassification.from_pretrained("google/efficientnet-b3")

In [3]:
import kagglehub

# Download latest version of dataset
path = kagglehub.dataset_download("msambare/fer2013")

print("Path to dataset files:", path)

Using Colab cache for faster access to the 'fer2013' dataset.
Path to dataset files: /kaggle/input/fer2013


In [4]:
import os
files_in_dir = os.listdir(path)

print("Files in the directory:")
for file in files_in_dir:
    print(file)

Files in the directory:
test
train


We can see that in the kaggle directory we have test and train values

We will utilize PyTorch ImageFolder library to load our data

In [5]:
normalize = transforms.Normalize(
    mean=processor.image_mean,
    std=processor.image_std
)
# EfficientNet-B3 expects a specific input size (300x300x3)
target_size = processor.size['height']

# Define the full transformation pipeline for the training set
train_transforms = transforms.Compose([
    # Resize to the model's required input size
    transforms.Resize((target_size, target_size)),
    # Convert PIL Image to PyTorch Tensor
    transforms.ToTensor(),
    # Apply the normalization (using ImageNet stats on which the model was originally trained)
    normalize,
])

# Define the full transformation pipeline for the test/validation set (similar but might skip augmentation)
val_test_transforms = transforms.Compose([
    transforms.Resize((target_size, target_size)),
    # Convert PIL Image to PyTorch Tensor
    transforms.ToTensor(),
    normalize,
])

In [20]:
target_size

300

In [6]:
from torchvision.datasets import ImageFolder

# train_dataset = ImageFolder(root='/kaggle/input/fer2013/train')

# test_dataset = ImageFolder(root='/kaggle/input/fer2013/test')


# Load the datasets with the defined transformations

full_train_dataset = ImageFolder(
    root=os.path.join(path, 'train'),
    transform=train_transforms # Apply the transformation pipeline on the training data
)

test_dataset = ImageFolder(
    root=os.path.join(path, 'test'),
    transform=val_test_transforms
)

# Get class names (sanity check)
class_names = full_train_dataset.classes
print(f"Detected class names (7 categories): {class_names}")
# Expected: ['Angry', 'Disgust', 'Fear', 'Happy', 'Neutral', 'Sad', 'Surprise']

Detected class names (7 categories): ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']


In [7]:
# Split the training data into Train and Validation sets
VAL_SPLIT_RATIO = 0.1 # Use 10% of the training data for validation

train_size = int((1 - VAL_SPLIT_RATIO) * len(full_train_dataset))
val_size = len(full_train_dataset) - train_size

train_dataset, val_dataset = random_split(
    full_train_dataset,
    [train_size, val_size],
    generator=torch.Generator().manual_seed(42) # For reproducibility
)

print(f"Total training samples: {len(full_train_dataset)}")
print(f"Actual Training samples: {len(train_dataset)}")
print(f"Validation samples: {len(val_dataset)}")
print(f"Test samples: {len(test_dataset)}")

Total training samples: 28709
Actual Training samples: 25838
Validation samples: 2871
Test samples: 7178


In [8]:
# Create DataLoaders

# Adjust based on GPU memory
BATCH_SIZE = 64  # @param {type: "slider", min:30, max:100}

train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True, # Shuffle training data
    num_workers=2  # Use multiple processes for faster loading
)

val_loader = DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False, # No need to shuffle validation data
    num_workers=2
)

test_loader = DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=2
)

# Sanity check the first batch
data_batch, labels_batch = next(iter(train_loader))
print("\n--- First Batch Sanity Check ---")
print(f"Batch Tensor Shape: {data_batch.shape}")
print(f"Label Tensor Shape: {labels_batch.shape}")


--- First Batch Sanity Check ---
Batch Tensor Shape: torch.Size([64, 3, 300, 300])
Label Tensor Shape: torch.Size([64])


Dimension explanations:

64 - Batch size

3 - RGB color dimensions

300 - x pixel dimension

300 - y pixel dimension

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm

# Check for GPU availability and set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device set to use {device}")
model.to(device)

# Define loss function and optimizer
# For multi-class classification, CrossEntropyLoss is common.
# Note: EfficientNetForImageClassification typically outputs logits, so CrossEntropyLoss is appropriate.
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4) # You might want to tune the learning rate

epochs = 40  # @param {type: "slider", min:10, max:100}

history = {"train_loss": [], "train_acc": [], "val_loss": [], "val_acc": []}

for epoch in range(epochs):
    model.train()  # Set model to training mode
    total_train_loss = 0
    correct_train_predictions = 0
    total_train_samples = 0

    for batch_idx, (inputs, labels) in enumerate(tqdm(train_loader, desc=f"Training Epoch {epoch+1}")):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad() # Zero the gradients
        outputs = model(inputs).logits # Get logits from the model output
        loss = loss_fn(outputs, labels)
        loss.backward() # Backpropagation
        optimizer.step() # Update weights

        total_train_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total_train_samples += labels.size(0)
        correct_train_predictions += (predicted == labels).sum().item()

    avg_train_loss = total_train_loss / len(train_loader)
    train_accuracy = correct_train_predictions / total_train_samples
    history["train_loss"].append(avg_train_loss)
    history["train_acc"].append(train_accuracy)

    # Validation phase
    model.eval()  # Set model to evaluation mode
    total_val_loss = 0
    correct_val_predictions = 0
    total_val_samples = 0

    with torch.no_grad(): # No gradient calculations in validation
        for inputs, labels in tqdm(val_loader, desc=f"Validation Epoch {epoch+1}"):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs).logits
            loss = loss_fn(outputs, labels)

            total_val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total_val_samples += labels.size(0)
            correct_val_predictions += (predicted == labels).sum().item()

    avg_val_loss = total_val_loss / len(val_loader)
    val_accuracy = correct_val_predictions / total_val_samples
    history["val_loss"].append(avg_val_loss)
    history["val_acc"].append(val_accuracy)

    print(f"Epoch {epoch+1}/{epochs} | Train Loss: {avg_train_loss:.4f}, Train Acc: {train_accuracy:.4f} | Val Loss: {avg_val_loss:.4f}, Val Acc: {val_accuracy:.4f}")

Device set to use cpu


NameError: name 'model' is not defined

In [None]:
# Optional: Evaluation on test set after training
# model.eval()
# correct_test_predictions = 0
# total_test_samples = 0
# with torch.no_grad():
#     for inputs, labels in tqdm(test_loader, desc="Testing"):
#         inputs, labels = inputs.to(device), labels.to(device)
#         outputs = model(inputs).logits
#         _, predicted = torch.max(outputs.data, 1)
#         total_test_samples += labels.size(0)
#         correct_test_predictions += (predicted == labels).sum().item()
# test_accuracy = correct_test_predictions / total_test_samples
# print(f"Test Accuracy: {test_accuracy:.4f}")