# Named Entity Recognition and Image Classification

In [None]:
!pip install datasets

In [1]:
import torch
import torch.nn.functional as F
import torch.optim as optim
import torchvision.models as models
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as datasets

from torch.utils.data import DataLoader
from transformers import pipeline
from PIL import Image
from datasets import load_dataset
from transformers import Trainer
from transformers import (
    AutoTokenizer,
    AutoModelForTokenClassification,
    Trainer,
    TrainingArguments,
)

## Training NER transformer model

In [9]:
dataset = load_dataset("json", data_files="animal_ner_dataset.json", split="train")
dataset = dataset.train_test_split(test_size=0.2)
train_dataset = dataset["train"]
eval_dataset = dataset["test"]

Generating train split: 0 examples [00:00, ? examples/s]

In [None]:
def tokenize_and_align_labels(examples):
    tokenized_inputs = tokenizer(
        examples["tokens"],
        truncation=True,
        padding="max_length",
        max_length=128,
        is_split_into_words=True
    )

    labels = []
    for i, label in enumerate(examples["ner_tags"]):
        word_ids = tokenized_inputs.word_ids(batch_index=i)
        label_ids = []

        prev_word_id = None
        for word_id in word_ids:
            if word_id is None:
                label_ids.append(-100)
            elif word_id != prev_word_id:
                label_ids.append(label[word_id])
            else:
                label_ids.append(label[word_id])
            prev_word_id = word_id

        labels.append(label_ids)

    tokenized_inputs["labels"] = labels
    return tokenized_inputs


train_dataset = train_dataset.map(tokenize_and_align_labels, batched=True, remove_columns=["tokens", "ner_tags"])
eval_dataset = eval_dataset.map(tokenize_and_align_labels, batched=True, remove_columns=["tokens", "ner_tags"])

In [15]:
model_name = "distilbert-base-cased"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForTokenClassification.from_pretrained(model_name, num_labels=2)

print(model.config)

training_args = TrainingArguments(
    output_dir="./ner_model",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    num_train_epochs=2,
    save_total_limit=2,
    logging_dir="./logs"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset
)

trainer.train()
model.save_pretrained("./animal_ner_model")
tokenizer.save_pretrained("./animal_ner_model")


Some weights of DistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


DistilBertConfig {
  "_attn_implementation_autoset": true,
  "_name_or_path": "distilbert-base-cased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "initializer_range": 0.02,
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "output_past": true,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "transformers_version": "4.48.3",
  "vocab_size": 28996
}



Epoch,Training Loss,Validation Loss
1,No log,0.085088
2,No log,0.038998


('./animal_ner_model/tokenizer_config.json',
 './animal_ner_model/special_tokens_map.json',
 './animal_ner_model/vocab.txt',
 './animal_ner_model/added_tokens.json',
 './animal_ner_model/tokenizer.json')

In [16]:
ner_pipeline = pipeline("ner", model="./animal_ner_model", tokenizer="./animal_ner_model")

text = "There is a cow in the picture."
result = ner_pipeline(text)

print(result)

Device set to use cuda:0


[{'entity': 'LABEL_0', 'score': 0.9912977, 'index': 1, 'word': 'There', 'start': 0, 'end': 5}, {'entity': 'LABEL_0', 'score': 0.9842253, 'index': 2, 'word': 'is', 'start': 6, 'end': 8}, {'entity': 'LABEL_0', 'score': 0.99444264, 'index': 3, 'word': 'a', 'start': 9, 'end': 10}, {'entity': 'LABEL_1', 'score': 0.94378847, 'index': 4, 'word': 'cow', 'start': 11, 'end': 14}, {'entity': 'LABEL_0', 'score': 0.99876547, 'index': 5, 'word': 'in', 'start': 15, 'end': 17}, {'entity': 'LABEL_0', 'score': 0.99869734, 'index': 6, 'word': 'the', 'start': 18, 'end': 21}, {'entity': 'LABEL_0', 'score': 0.99869365, 'index': 7, 'word': 'picture', 'start': 22, 'end': 29}, {'entity': 'LABEL_0', 'score': 0.98500687, 'index': 8, 'word': '.', 'start': 29, 'end': 30}]


## Training Image Classification model

In [1]:
!kaggle datasets download viratkothari/animal10

Dataset URL: https://www.kaggle.com/datasets/viratkothari/animal10
License(s): GPL-2.0
animal10.zip: Skipping, found more recently modified local copy (use --force to force download)


In [None]:
!unzip -q animal10.zip

In [17]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

data_dir = "Animals-10"
dataset = datasets.ImageFolder(root=data_dir, transform=transform)

train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

print("Class names:", dataset.classes)

Class names: ['butterfly', 'cat', 'chicken', 'cow', 'dog', 'elephant', 'horse', 'sheep', 'spider', 'squirrel']


In [18]:
model = models.resnet50(pretrained=True)

num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 10)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 122MB/s]


In [19]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [21]:
num_epochs = 5

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss / len(train_loader):.4f}")

    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f"Validation Accuracy: {100 * correct / total:.2f}%")


Epoch 1/5, Loss: 0.5252
Validation Accuracy: 85.29%
Epoch 2/5, Loss: 0.3992
Validation Accuracy: 84.47%
Epoch 3/5, Loss: 0.3092
Validation Accuracy: 85.45%
Epoch 4/5, Loss: 0.2630
Validation Accuracy: 85.35%
Epoch 5/5, Loss: 0.2142
Validation Accuracy: 83.14%


In [22]:
torch.save(model.state_dict(), "animal_classifier.pth")

In [None]:
ner_pipeline = pipeline("ner", model="./animal_ner_model", tokenizer="./animal_ner_model")

model.load_state_dict(torch.load("animal_classifier.pth"))
model.eval()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

In [None]:
def preprocess_image(image_path):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    image = Image.open(image_path).convert("RGB")
    return transform(image).unsqueeze(0)

In [None]:
def check_animal_match(text, image_path):
    entities = ner_pipeline(text)
    detected_animals = set(ent['word'].replace("##", "") for ent in entities if 'LABEL' in ent['entity'])

    image_tensor = preprocess_image(image_path).to(device)
    with torch.no_grad():
        output = model(image_tensor)
        predicted_label = torch.argmax(output, dim=1).item()
    predicted_class = dataset.classes[predicted_label]

    return predicted_class in detected_animals

In [24]:
image_path = "/content/Animals-10/chicken/chicken (1).jpeg"
text_input = "There is a cow in the picture."

result = check_animal_match(text_input, image_path)
print("Match:", result)

Device set to use cuda:0
  model.load_state_dict(torch.load("animal_classifier.pth"))


Match: False
