In [None]:
 # for google colaab setup
import sys
from google.colab import drive
drive.mount('/content/drive')

In [None]:
%cd /content/drive/MyDrive/projects/sign-language-image-detection
!pip install virtualenv
!pip install -r requirements.txt
!python -m ipykernel install --user --name=sl_detection --display-name "SL Detection"

In [None]:
# Install required packages (add `--upgrade` to ensure the latest versions)
!pip install --upgrade transformers datasets torch torchvision accelerate


In [None]:
from datasets import load_dataset, DatasetDict
from transformers import (
    DeiTImageProcessor,
    DeiTForImageClassification,
    TrainingArguments,
    Trainer,
)
from torchvision.transforms import Normalize, ToTensor, Resize
import torch
from torch.utils.data import DataLoader


In [None]:
# Load the ASL Sign Language Alphabets dataset from Hugging Face
dataset_name = "Marxulia/asl_sign_languages_alphabets_v03"
dataset = load_dataset(dataset_name)

In [None]:
# Calculate sizes for train, validation, and test splits
train_size = int(0.7 * len(dataset['train']))  # 70% for training
val_size = int(0.1 * len(dataset['train']))    # 10% for validation
test_size = len(dataset['train']) - train_size - val_size  # Remaining for testing

# Split dataset into train, validation, and test sets
train_ds = dataset['train'].select(range(0, train_size))
val_ds = dataset['train'].select(range(train_size, train_size + val_size))
test_ds = dataset['train'].select(range(train_size + val_size, len(dataset['train'])))
dataset_splits = DatasetDict({
    "train": train_ds,
    "validation": val_ds,
    "test": test_ds
})



# Load the DeiT image processor
model_name = "facebook/deit-tiny-distilled-patch16-224"
processor = DeiTImageProcessor.from_pretrained(model_name)

# Define image normalization and resizing
image_mean, image_std = processor.image_mean, processor.image_std
normalize = Normalize(mean=image_mean, std=image_std)
image_size = processor.size["height"]
resize = Resize((image_size, image_size))

# Define transformation function
def apply_transforms(examples):
    examples["pixel_values"] = [
        normalize(ToTensor()(resize(image.convert("RGB")))) for image in examples["image"]
    ]
    return examples

# Apply transformations to dataset splits
dataset_splits["train"].set_transform(apply_transforms)
dataset_splits["validation"].set_transform(apply_transforms)
dataset_splits["test"].set_transform(apply_transforms)

In [None]:
# Map label IDs to labels
id2label = {id: label for id, label in enumerate(train_ds.features["label"].names)}
label2id = {label: id for id, label in id2label.items()}

# Load the DeiT model
model = DeiTForImageClassification.from_pretrained(
    model_name,
    id2label=id2label,
    label2id=label2id,
    ignore_mismatched_sizes=True
)

In [None]:
# Define a collate function for batching
def collate_fn(examples):
    pixel_values = torch.stack([example["pixel_values"] for example in examples])
    labels = torch.tensor([example["label"] for example in examples])
    return {"pixel_values": pixel_values, "labels": labels}

In [None]:

# Define training arguments
train_args = TrainingArguments(
    output_dir="output-models-deit",
    save_total_limit=2,
    evaluation_strategy="epoch",  # Evaluate at the end of each epoch
    save_strategy="epoch",       # Save the model at the end of each epoch
    learning_rate=1e-3,
    per_device_train_batch_size=64,
    per_device_eval_batch_size=64,
    num_train_epochs=2,
    weight_decay=0.01,
    load_best_model_at_end=True,
    logging_dir="logs",
    remove_unused_columns=False,
    report_to="none",  # Disable W&B logging
    local_rank=-1       # Disable distributed training
)

In [None]:
# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=train_args,
    train_dataset=dataset_splits["train"],
    eval_dataset=dataset_splits["validation"],
    data_collator=collate_fn,
    tokenizer=processor
)

In [None]:
# Train the model
trainer.train()

In [None]:
# Evaluate the model on the test set
outputs = trainer.predict(dataset_splits["test"])


print(outputs.metrics)

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

# Get predictions and true labels
y_true = outputs.label_ids
y_pred = outputs.predictions.argmax(1)

# Get label names
labels = train_ds.features["label"].names

# Compute confusion matrix
cm = confusion_matrix(y_true, y_pred)

# Display confusion matrix
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)
disp.plot(cmap='Blues')
plt.show()


In [None]:
# Calculate the recall scores for each class
recall = recall_score(y_true, y_pred, average=None)

# Print the recall for each class
print("\nRecall Scores:")
for label, score in zip(labels, recall):
    print(f"Recall for {label}: {score:.2f}")