In [None]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'emo-map-challenge:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-competitions-data%2Fkaggle-v2%2F84797%2F9530234%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240916%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240916T081747Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D511f54dcacc0803da7ba8fbcb9f37755157c416020ea2b1350130af4e34d493267520d7d8f78a24cf0854073af4733eca3155b33ddc7849c11cfb81941642a79c83e88440800f61268827298f8662632fe23591261335cd7f98e07cc75d4118eeb6162c7bd1d9d3d3047c8e4e0ab1365acbbee30052a8c25a792637fb99eedcf9a4f8c00bc7b87675811513627d57797a3f9c94454f0a10b55567e098526e0ae7c619af622b67375ac56d8757449fe60cee7a970464908b8c37906aa45323d02a06f9da1999cf9c3e1e4c167714c61733ebbc783145c086036cd44d0468a77150bda67d8415deb4b675cf4fe3d9b900d9d2b46225c3723b97492bd82d7496ca0'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


Downloading emo-map-challenge, 21203873 bytes compressed
Downloaded and uncompressed: emo-map-challenge
Data source import complete.


In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/emo-map-challenge/train_dataset.csv
/kaggle/input/emo-map-challenge/sample_submission.csv
/kaggle/input/emo-map-challenge/test_dataset.csv


In [None]:
pip install torch torchvision timm


Collecting timm
  Downloading timm-1.0.9-py3-none-any.whl.metadata (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.4/42.4 kB[0m [31m576.6 kB/s[0m eta [36m0:00:00[0m
Downloading timm-1.0.9-py3-none-any.whl (2.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m12.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: timm
Successfully installed timm-1.0.9


In [None]:
train_link='/kaggle/input/emo-map-challenge/train_dataset.csv'
train_df=pd.read_csv(train_link)
train_df.head()

Unnamed: 0,id,pixels,emotion
0,1,70 80 82 72 58 58 60 63 54 58 60 48 89 115 121...,0
1,2,151 150 147 155 148 133 111 140 170 174 182 15...,0
2,3,231 212 156 164 174 138 161 173 182 200 106 38...,2
3,4,24 32 36 30 32 23 19 20 30 41 21 22 32 34 21 1...,4
4,5,4 0 0 0 0 0 0 0 0 0 0 0 3 15 23 28 48 50 58 84...,6
...,...,...,...
4995,4996,22 24 23 23 25 24 23 20 18 13 6 2 0 1 7 22 32 ...,3
4996,4997,73 85 87 87 74 118 120 132 134 127 133 118 105...,3
4997,4998,253 253 254 254 254 254 250 219 166 141 109 70...,6
4998,4999,78 84 77 95 90 85 72 75 79 84 86 82 88 102 110...,6


In [None]:
test_link = '/kaggle/input/emo-map-challenge/test_dataset.csv'
test_df = pd.read_csv(test_link)
test_df

Unnamed: 0,id,pixels
0,5001,80 81 77 69 66 59 70 89 112 132 140 142 144 14...
1,5002,226 226 226 217 203 189 97 149 193 193 199 200...
2,5003,98 112 43 41 46 47 67 37 27 37 65 32 39 29 41 ...
3,5004,35 38 29 25 21 29 35 32 41 49 34 68 123 136 11...
4,5005,4 1 5 19 14 15 21 50 73 73 61 62 72 76 66 55 6...
...,...,...
2495,7496,50 36 17 22 23 29 33 39 34 37 37 37 39 43 48 5...
2496,7497,178 174 172 173 181 188 191 194 196 199 200 20...
2497,7498,17 17 16 23 28 22 19 17 25 26 20 24 31 19 27 9...
2498,7499,30 28 28 29 31 30 42 68 79 81 77 67 67 71 63 6...


In [None]:
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import numpy as np
import torch
import pandas as pd

#class for data-preprocessing
class CustomDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        image = np.array(self.dataframe.iloc[idx]['pixels'].split(), dtype=np.uint8).reshape(48, 48)
        image = Image.fromarray(image)
        label = int(self.dataframe.iloc[idx]['emotion'])

        if self.transform:
            image = self.transform(image)

        return image, label


from torchvision import transforms

from torchvision import transforms
import numpy as np
from PIL import Image

transform = transforms.Compose([
    transforms.Resize((224, 224)),                # Resize the image to 224x224 as ViT requires this input size
    transforms.RandomHorizontalFlip(p=0.5),       # Randomly flip the image horizontally
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),  # Randomly crop and resize
    transforms.Grayscale(num_output_channels=3),  # Convert grayscale to 3-channel image
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),  # Slight color jitter
    transforms.ToTensor(),                        # Convert image to tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize with ImageNet stats
])

# The rotation transformation didn't improve the accuracy rather it decreased so
# Create dataset and dataloaders
train_dataset = CustomDataset(dataframe=train_df, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)


with train and val

In [None]:
from torch.utils.data import Dataset, DataLoader, random_split
from PIL import Image
import numpy as np
import torch
import pandas as pd

class CustomDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        image = np.array(self.dataframe.iloc[idx]['pixels'].split(), dtype=np.uint8).reshape(48, 48)
        image = Image.fromarray(image)
        label = int(self.dataframe.iloc[idx]['emotion'])

        if self.transform:
            image = self.transform(image)

        return image, label


from torchvision import transforms

transform = transforms.Compose([
    transforms.Resize((224, 224)),                # Resize the image to 224x224 as ViT requires this input size
    transforms.RandomHorizontalFlip(p=0.5),       # Randomly flip the image horizontally
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),  # Randomly crop and resize
    transforms.Grayscale(num_output_channels=3),  # Convert grayscale to 3-channel image
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),  # Slight color jitter
    transforms.ToTensor(),                        # Convert image to tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize with ImageNet stats
])

# Creating dataset
full_dataset = CustomDataset(dataframe=train_df, transform=transform)


dataset_size = len(full_dataset)
train_size = int(0.9 * dataset_size)
val_size = dataset_size - train_size

# Split the dataset
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Now you have train_loader and val_loader ready for training and validation
print("done")

done


In [None]:
import timm
import torch.nn as nn

# Load a pretrained ViT model
model = timm.create_model('vit_base_patch16_224', pretrained=True)

# Modify the final layer for your number of classes
num_classes = 7  # Number of emotion classes
model.head = nn.Linear(model.head.in_features, num_classes)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

freezing the layers gave bad accuracy

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm  # For progress bar

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = nn.DataParallel(model)
model.to(device)

# Class counts from dataset
class_counts = torch.tensor([1142, 780, 741, 661, 643, 467, 66], dtype=torch.float)
total_samples = class_counts.sum()

# Calculating class weights inversely proportional to frequency
class_weights = total_samples / (len(class_counts) * class_counts)
class_weights = class_weights.to(device)

criterion = nn.CrossEntropyLoss(weight=class_weights)
optimizer = optim.AdamW(model.parameters(), lr=1e-5)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3)

def train_model(model, train_loader, val_loader, criterion, optimizer, device, num_epochs=10):
    best_val_acc = 0.0  # Tracking the best validation accuracy
    best_model_path = 'best_model.pth'

    for epoch in range(num_epochs):
        # Training phase
        model.train()
        running_loss = 0.0
        correct_train = 0
        total_train = 0

        for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} - Training"):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs, 1)
            correct_train += (predicted == labels).sum().item()
            total_train += labels.size(0)

        train_loss = running_loss / len(train_loader.dataset)
        train_accuracy = 100 * correct_train / total_train
        print(f"Training Loss: {train_loss:.4f}, Accuracy: {train_accuracy:.2f}%")

        # Validation phase
        model.eval()
        val_loss = 0.0
        correct_val = 0
        total_val = 0

        with torch.no_grad():
            for images, labels in tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} - Validation"):
                images, labels = images.to(device), labels.to(device)

                outputs = model(images)
                loss = criterion(outputs, labels)

                val_loss += loss.item() * images.size(0)
                _, predicted = torch.max(outputs, 1)
                correct_val += (predicted == labels).sum().item()
                total_val += labels.size(0)

        val_loss = val_loss / len(val_loader.dataset)
        val_accuracy = 100 * correct_val / total_val
        print(f"Validation Loss: {val_loss:.4f}, Accuracy: {val_accuracy:.2f}%\n")

        # Update learning rate
        scheduler.step(val_loss)

        # Save the model if the validation accuracy improves
        if val_accuracy > best_val_acc:
            best_val_acc = val_accuracy
            torch.save(model.state_dict(), best_model_path)
            print(f"Saved best model with validation accuracy: {best_val_acc:.2f}%")

    # Load the best model after training
    model.load_state_dict(torch.load(best_model_path))
    print(f"Loaded best model from epoch with validation accuracy: {best_val_acc:.2f}%")

# Call the function to start training and evaluation
train_model(model, train_loader, val_loader, criterion, optimizer, device)


Epoch 1/10 - Training: 100%|██████████| 141/141 [02:55<00:00,  1.25s/it]


Training Loss: 0.9974, Accuracy: 31.22%


Epoch 1/10 - Validation: 100%|██████████| 16/16 [00:08<00:00,  1.96it/s]


Validation Loss: 0.8175, Accuracy: 39.40%

Saved best model with validation accuracy: 39.40%


Epoch 2/10 - Training: 100%|██████████| 141/141 [02:53<00:00,  1.23s/it]


Training Loss: 0.7204, Accuracy: 47.38%


Epoch 2/10 - Validation: 100%|██████████| 16/16 [00:08<00:00,  1.98it/s]


Validation Loss: 0.8062, Accuracy: 38.40%



Epoch 3/10 - Training: 100%|██████████| 141/141 [02:53<00:00,  1.23s/it]


Training Loss: 0.6266, Accuracy: 53.11%


Epoch 3/10 - Validation: 100%|██████████| 16/16 [00:08<00:00,  1.97it/s]


Validation Loss: 0.7387, Accuracy: 49.20%

Saved best model with validation accuracy: 49.20%


Epoch 4/10 - Training: 100%|██████████| 141/141 [02:54<00:00,  1.23s/it]


Training Loss: 0.5117, Accuracy: 60.51%


Epoch 4/10 - Validation: 100%|██████████| 16/16 [00:08<00:00,  1.97it/s]


Validation Loss: 0.7419, Accuracy: 51.80%

Saved best model with validation accuracy: 51.80%


Epoch 5/10 - Training: 100%|██████████| 141/141 [02:52<00:00,  1.23s/it]


Training Loss: 0.4368, Accuracy: 66.53%


Epoch 5/10 - Validation: 100%|██████████| 16/16 [00:08<00:00,  1.98it/s]


Validation Loss: 0.8338, Accuracy: 56.20%

Saved best model with validation accuracy: 56.20%


Epoch 6/10 - Training: 100%|██████████| 141/141 [02:53<00:00,  1.23s/it]


Training Loss: 0.3733, Accuracy: 71.67%


Epoch 6/10 - Validation: 100%|██████████| 16/16 [00:08<00:00,  1.93it/s]


Validation Loss: 0.8029, Accuracy: 59.00%

Saved best model with validation accuracy: 59.00%


Epoch 7/10 - Training: 100%|██████████| 141/141 [02:53<00:00,  1.23s/it]


Training Loss: 0.2745, Accuracy: 78.02%


Epoch 7/10 - Validation: 100%|██████████| 16/16 [00:08<00:00,  1.96it/s]


Validation Loss: 0.8408, Accuracy: 58.00%



Epoch 8/10 - Training: 100%|██████████| 141/141 [02:53<00:00,  1.23s/it]


Training Loss: 0.1841, Accuracy: 84.16%


Epoch 8/10 - Validation: 100%|██████████| 16/16 [00:08<00:00,  1.94it/s]


Validation Loss: 0.9161, Accuracy: 63.00%

Saved best model with validation accuracy: 63.00%


Epoch 9/10 - Training: 100%|██████████| 141/141 [02:53<00:00,  1.23s/it]


Training Loss: 0.1541, Accuracy: 86.93%


Epoch 9/10 - Validation: 100%|██████████| 16/16 [00:08<00:00,  1.97it/s]


Validation Loss: 1.1297, Accuracy: 62.20%



Epoch 10/10 - Training: 100%|██████████| 141/141 [02:53<00:00,  1.23s/it]


Training Loss: 0.1380, Accuracy: 88.67%


Epoch 10/10 - Validation: 100%|██████████| 16/16 [00:08<00:00,  1.96it/s]


Validation Loss: 1.0861, Accuracy: 64.20%

Saved best model with validation accuracy: 64.20%


  model.load_state_dict(torch.load(best_model_path))


Loaded best model from epoch with validation accuracy: 64.20%


In [None]:
model.load_state_dict(torch.load('best_model.pth'))

  model.load_state_dict(torch.load('best_model.pth'))


<All keys matched successfully>

In [None]:
model.eval()


DataParallel(
  (module): VisionTransformer(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
      (norm): Identity()
    )
    (pos_drop): Dropout(p=0.0, inplace=False)
    (patch_drop): Identity()
    (norm_pre): Identity()
    (blocks): Sequential(
      (0): Block(
        (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (attn): Attention(
          (qkv): Linear(in_features=768, out_features=2304, bias=True)
          (q_norm): Identity()
          (k_norm): Identity()
          (attn_drop): Dropout(p=0.0, inplace=False)
          (proj): Linear(in_features=768, out_features=768, bias=True)
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (ls1): Identity()
        (drop_path1): Identity()
        (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (mlp): Mlp(
          (fc1): Linear(in_features=768, out_features=3072, bias=True)
          (act): GELU(approximate='none'

In [None]:
# Save the model's state dictionary
torch.save(model.state_dict(), '/kaggle/working/best_model.pth')


In [None]:
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import numpy as np
import torch
import pandas as pd
from torchvision import transforms

class CustomDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        # Convert the 'pixels' column to a NumPy array and reshape it
        image_array = np.array(self.dataframe.iloc[idx]['pixels'].split(), dtype=np.uint8).reshape(48, 48)
        image = Image.fromarray(image_array)
        image_id = self.dataframe.iloc[idx]['id']  # Get the image ID

        # Apply transformations
        if self.transform:
            image = self.transform(image)

        return image, image_id


transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resizing image to 224x224
    transforms.Grayscale(num_output_channels=3),  # Converting grayscale to 3-channel
    transforms.ToTensor(),  # Converting image to tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalizing using ImageNet stats
])

# Create dataset and dataloaders
test_dataset = CustomDataset(dataframe=test_df, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

print("done")

done


In [None]:
import pandas as pd
import torch
import torch.nn as nn

# Initialize the model and load the trained weights
model = timm.create_model('vit_base_patch16_224', pretrained=True)
num_classes = 7  # Number of emotion classes
model.head = nn.Linear(model.head.in_features, num_classes)
model = nn.DataParallel(model)  # Wrap the model for multi-GPU
model.load_state_dict(torch.load('/kaggle/working/best_model.pth'))
model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

# Set the model to evaluation mode
model.eval()

predictions = []
ids = []

# Define the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

with torch.no_grad():
    for batch in test_loader:
        print("Processing batch")
        images, batch_ids = batch

        # Move images and batch IDs to the appropriate device
        images = images.to(device)
        batch_ids = batch_ids.to(device)

        # Forward pass
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)  # Get the class with the highest score

        # Collect predictions and IDs
        predictions.extend(predicted.cpu().numpy())
        ids.extend(batch_ids.cpu().numpy())

# Create a DataFrame with IDs and predictions
results_df = pd.DataFrame({
    'id': ids,
    'predictions': predictions
})

# Save the DataFrame to a CSV file
results_df.to_csv('predictions_vit_class_based_weights_1_without_rotation.csv', index=False)
print("Prediction done")


  model.load_state_dict(torch.load('/kaggle/working/best_model.pth'))


Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing batch
Processing bat