In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
import os

PROJECT_NAME = "human_animal_detection"   # change if needed
BASE_DIR = f"/content/drive/MyDrive/{PROJECT_NAME}"
DATASET_DIR = f"{BASE_DIR}/datasets"

os.makedirs(DATASET_DIR, exist_ok=True)

print("Project directory created at:")
print(BASE_DIR)


Project directory created at:
/content/drive/MyDrive/human_animal_detection


Download dataset

In [None]:
import fiftyone as fo
import fiftyone.zoo as foz

classes = [
    # Humans / Person-related
    "Person", "Man", "Woman", "Boy", "Girl",
    "Human body", "Human head", "Human face",
    "Human arm", "Human hand", "Human leg",
    "Human foot", "Human eye", "Human mouth",
    "Human nose", "Human hair", "Human beard",

    # Animals
    "Dog", "Cat", "Elephant", "Squirrel", "Crab",
    "Insect", "Carnivore", "Shellfish", "Bird", "Fish",
    "Horse", "Goat", "Pig", "Rabbit", "Sheep",
    "Duck", "Sea lion", "Whale", "Dolphin", "Tortoise",
    "Marine mammal", "Lion", "Giraffe", "Bat (Animal)", "Fox"
]


import fiftyone.zoo as foz

dataset = foz.load_zoo_dataset(
    "open-images-v7",
    split="train",
    label_types=["detections"],
    classes=classes,
    max_samples=5000,
)


print(f"✓ Downloaded {len(dataset)} images")


  return '(?ms)' + res + '\Z'


Downloading split 'train' to '/root/fiftyone/open-images-v7/train' if necessary


INFO:fiftyone.zoo.datasets:Downloading split 'train' to '/root/fiftyone/open-images-v7/train' if necessary


Downloading 'https://storage.googleapis.com/openimages/2018_04/train/train-images-boxable-with-rotation.csv' to '/root/fiftyone/open-images-v7/train/metadata/image_ids.csv'


INFO:fiftyone.utils.openimages:Downloading 'https://storage.googleapis.com/openimages/2018_04/train/train-images-boxable-with-rotation.csv' to '/root/fiftyone/open-images-v7/train/metadata/image_ids.csv'


 100% |██████|    4.8Gb/4.8Gb [4.0s elapsed, 0s remaining, 1.5Gb/s]         


INFO:eta.core.utils: 100% |██████|    4.8Gb/4.8Gb [4.0s elapsed, 0s remaining, 1.5Gb/s]         


Downloading 'https://storage.googleapis.com/openimages/v5/class-descriptions-boxable.csv' to '/root/fiftyone/open-images-v7/train/metadata/classes.csv'


INFO:fiftyone.utils.openimages:Downloading 'https://storage.googleapis.com/openimages/v5/class-descriptions-boxable.csv' to '/root/fiftyone/open-images-v7/train/metadata/classes.csv'


Downloading 'https://storage.googleapis.com/openimages/2018_04/bbox_labels_600_hierarchy.json' to '/tmp/tmpdwy2vg8j/metadata/hierarchy.json'


INFO:fiftyone.utils.openimages:Downloading 'https://storage.googleapis.com/openimages/2018_04/bbox_labels_600_hierarchy.json' to '/tmp/tmpdwy2vg8j/metadata/hierarchy.json'


Downloading 'https://storage.googleapis.com/openimages/v6/oidv6-train-annotations-bbox.csv' to '/root/fiftyone/open-images-v7/train/labels/detections.csv'


INFO:fiftyone.utils.openimages:Downloading 'https://storage.googleapis.com/openimages/v6/oidv6-train-annotations-bbox.csv' to '/root/fiftyone/open-images-v7/train/labels/detections.csv'


Downloading 5000 images


INFO:fiftyone.utils.openimages:Downloading 5000 images


 100% |█████████████████| 5000/5000 [9.2m elapsed, 0s remaining, 8.6 files/s]       


INFO:eta.core.utils: 100% |█████████████████| 5000/5000 [9.2m elapsed, 0s remaining, 8.6 files/s]       


Dataset info written to '/root/fiftyone/open-images-v7/info.json'


INFO:fiftyone.zoo.datasets:Dataset info written to '/root/fiftyone/open-images-v7/info.json'


Loading 'open-images-v7' split 'train'


INFO:fiftyone.zoo.datasets:Loading 'open-images-v7' split 'train'


 100% |███████████████| 5000/5000 [55.3s elapsed, 0s remaining, 130.7 samples/s]      


INFO:eta.core.utils: 100% |███████████████| 5000/5000 [55.3s elapsed, 0s remaining, 130.7 samples/s]      


Dataset 'open-images-v7-train-5000' created


INFO:fiftyone.zoo.datasets:Dataset 'open-images-v7-train-5000' created


✓ Downloaded 5000 images


In [None]:
EXPORT_DIR = os.path.join(DATASET_DIR, "train")
dataset.export(
    export_dir=EXPORT_DIR,
    dataset_type=fo.types.COCODetectionDataset,
    label_field="ground_truth",
    overwrite=True
)

 100% |███████████████| 5000/5000 [1.5m elapsed, 0s remaining, 67.9 samples/s]      


INFO:eta.core.utils: 100% |███████████████| 5000/5000 [1.5m elapsed, 0s remaining, 67.9 samples/s]      


In [None]:
import os

PROJECT_NAME = "human_animal_detection"   # change if needed
BASE_DIR = f"/content/drive/MyDrive/{PROJECT_NAME}"
SCRIPTS_DIR = f"{BASE_DIR}/scripts"

os.makedirs(SCRIPTS_DIR, exist_ok=True)

Detection codes

In [None]:
# Your scripts folder
SCRIPTS_DIR = "/content/drive/MyDrive/human_animal_detection/scripts"

# 1️⃣ Create dataset.py
dataset_code = """
import os
import json
from PIL import Image
import torch
from torch.utils.data import Dataset

class HumanAnimalDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data_dir = data_dir
        self.transform = transform

        # Load COCO-style JSON annotations
        with open(os.path.join(data_dir, "labels.json")) as f:
            data = json.load(f)

        # Map category_id -> label index (start from 1, 0 is background)
        self.cat2label = {cat["id"]: i+1 for i, cat in enumerate(data["categories"])}

        # Map image_id -> image info
        self.images = {img["id"]: img for img in data["images"]}

        # Group annotations by image_id
        self.image_id_to_ann = {}
        for ann in data["annotations"]:
            img_id = ann["image_id"]
            if img_id not in self.image_id_to_ann:
                self.image_id_to_ann[img_id] = []
            self.image_id_to_ann[img_id].append(ann)

        # Keep a list of image ids for indexing
        self.ids = list(self.images.keys())

    def __len__(self):
        return len(self.ids)

    def __getitem__(self, idx):
        image_id = self.ids[idx]
        img_info = self.images[image_id]
        anns = self.image_id_to_ann.get(image_id, [])

        img_path = os.path.join(self.data_dir, "data", img_info["file_name"])
        if not os.path.exists(img_path):
            raise FileNotFoundError(f"Image not found: {img_path}")

        img = Image.open(img_path).convert("RGB")

        boxes = []
        labels = []

        for ann in anns:
            x, y, w, h = ann["bbox"]
            boxes.append([x, y, x + w, y + h])
            labels.append(self.cat2label[ann["category_id"]])

        boxes = torch.tensor(boxes, dtype=torch.float32)
        labels = torch.tensor(labels, dtype=torch.int64)

        target = {"boxes": boxes, "labels": labels}

        if self.transform:
            img = self.transform(img)

        return img, target


"""

with open(os.path.join(SCRIPTS_DIR, "dataset.py"), "w") as f:
    f.write(dataset_code)

In [16]:
# Your scripts folder
SCRIPTS_DIR = "/content/drive/MyDrive/human_animal_detection/scripts"

train_code = """
import os
import torch
from torch.utils.data import DataLoader
import torchvision
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision import transforms as T
from dataset import HumanAnimalDataset

# Device
device = "cuda" if torch.cuda.is_available() else "cpu"

# Paths
DATA_DIR = "/content/drive/MyDrive/human_animal_detection/datasets/train"  # images + labels.json
MODEL_PATH = "/content/drive/MyDrive/human_animal_detection/models/detector_mobilenet.pth"
os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)

# Dataset & Dataloader
dataset = HumanAnimalDataset(DATA_DIR, transform=T.ToTensor())
loader = DataLoader(dataset, batch_size=4, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))

# Model: FasterRCNN with MobileNetV3 + FPN
model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_fpn(weights="DEFAULT")
num_classes = len(dataset.cat2label) + 1  # background + all categories
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(
    in_features, num_classes
)
model.to(device)

# Optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0005)

# Training
num_epochs = 2
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for images, targets in loader:
        images = [img.to(device) for img in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        loss = sum(loss_dict.values())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss/len(loader):.4f}")

# Save model
torch.save(model.state_dict(), MODEL_PATH)
print("Detector model saved to:", MODEL_PATH)
"""

import os
with open(os.path.join(SCRIPTS_DIR, "train_detector.py"), "w") as f:
    f.write(train_code)


To train detector model

In [17]:
!python /content/drive/MyDrive/human_animal_detection/scripts/train_detector.py

Epoch 1/2, Loss: 1.6379
Epoch 2/2, Loss: 1.3786
Detector model saved to: /content/drive/MyDrive/human_animal_detection/models/detector_mobilenet.pth


Classification codes

In [3]:
# Your scripts folder
SCRIPTS_DIR = "/content/drive/MyDrive/human_animal_detection/scripts"

dataset_code = """
import os
import json
from PIL import Image
from torch.utils.data import Dataset

HUMAN_KEYWORDS = [
    "person", "man", "woman", "boy", "girl", "human"
]

ANIMAL_KEYWORDS = [
    "animal", "dog", "cat", "horse", "sheep", "pig",
    "goat", "rabbit", "lion", "fox", "bat",
    "bird", "fish", "whale", "dolphin", "tortoise",
    "squirrel", "crab", "shellfish", "marine mammal",
    "sea lion", "insect", "giraffe", "elephant"
]

class HumanAnimalClassificationDataset(Dataset):
    \"""
    0 -> Human
    1 -> Animal
    \"""

    def __init__(self, root_dir, transform=None):
        self.image_dir = os.path.join(root_dir, "data")
        self.transform = transform

        with open(os.path.join(root_dir, "labels.json")) as f:
            coco = json.load(f)

        cat_id_to_name = {
            cat["id"]: cat["name"].lower() for cat in coco["categories"]
        }

        image_id_to_name = {
            img["id"]: img["file_name"] for img in coco["images"]
        }

        image_labels = {}
        for ann in coco["annotations"]:
            img_id = ann["image_id"]
            cat_name = cat_id_to_name[ann["category_id"]]
            image_labels.setdefault(img_id, set()).add(cat_name)

        self.samples = []
        for img_id, cats in image_labels.items():
            label = None
            if any(any(h in c for h in HUMAN_KEYWORDS) for c in cats):
                label = 0
            elif any(any(a in c for a in ANIMAL_KEYWORDS) for c in cats):
                label = 1

            if label is not None:
                img_path = os.path.join(self.image_dir, image_id_to_name[img_id])
                if os.path.exists(img_path):
                    self.samples.append((img_path, label))

        print(f"Loaded {len(self.samples)} classification samples")

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        img_path, label = self.samples[idx]
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, label
"""

import os
with open(os.path.join(SCRIPTS_DIR, "dataset_cls.py"), "w") as f:
    f.write(dataset_code)


In [10]:
# Your scripts folder
SCRIPTS_DIR = "/content/drive/MyDrive/human_animal_detection/scripts"

train_code = """
import os
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import transforms, models
from dataset_cls import HumanAnimalClassificationDataset

# Device
device = "cuda" if torch.cuda.is_available() else "cpu"

# Paths
DATA_DIR = "/content/drive/MyDrive/human_animal_detection/datasets/train"
MODEL_PATH = "/content/drive/MyDrive/human_animal_detection/models/classifier_resnet18.pth"
os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)

# Transforms
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

# Dataset & Dataloader
dataset = HumanAnimalClassificationDataset(DATA_DIR, transform=transform)
loader = DataLoader(dataset, batch_size=32, shuffle=True, num_workers=2)

# Model: ResNet18
model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
model.fc = nn.Linear(model.fc.in_features, 2)  # human vs animal
model.to(device)

# Loss & Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

# Training
epochs = 2
for epoch in range(epochs):
    model.train()
    running_loss = 0
    correct = 0
    total = 0

    for images, labels in loader:
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        preds = outputs.argmax(dim=1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    acc = 100 * correct / total
    print(f"Epoch {epoch+1}/{epochs} | Loss: {running_loss/len(loader):.4f} | Acc: {acc:.2f}%")

# Save model
torch.save(model.state_dict(), MODEL_PATH)
print("Classifier saved to:", MODEL_PATH)
"""

with open(os.path.join(SCRIPTS_DIR, "train_classifier.py"), "w") as f:
    f.write(train_code)

To run classification model

In [6]:
!python /content/drive/MyDrive/human_animal_detection/scripts/train_classifier.py


Loaded 4924 classification samples
Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100% 44.7M/44.7M [00:00<00:00, 179MB/s]
Epoch 1/2 | Loss: 0.2059 | Acc: 92.22%
Epoch 2/2 | Loss: 0.0357 | Acc: 99.07%
Classifier saved to: /content/drive/MyDrive/human_animal_detection/models/classifier_resnet18.pth


Inference code

In [22]:
# Your scripts folder
SCRIPTS_DIR = "/content/drive/MyDrive/human_animal_detection/scripts"

infer_code = """
# scripts/inference.py
# scripts/inference_video.py
import os
import torch
from torchvision import transforms as T
import torchvision.models as models
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import cv2
from dataset import HumanAnimalDataset  # for num_classes info if needed

# ---------------- Paths ---------------- #
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DETECTOR_PATH = "/content/drive/MyDrive/human_animal_detection/models/detector_mobilenet.pth"
CLASSIFIER_PATH = "/content/drive/MyDrive/human_animal_detection/models/classifier_resnet18.pth"
VIDEO_PATH = "/content/drive/MyDrive/human_animal_detection/test_video/video.mp4"
OUTPUT_PATH = "/content/drive/MyDrive/human_animal_detection/test_video/output.mp4"

# ---------------- Load Detector ---------------- #
dataset_info = HumanAnimalDataset("/content/drive/MyDrive/human_animal_detection/datasets/train")
num_classes = len(dataset_info.cat2label) + 1  # background + all categories

detector = models.detection.fasterrcnn_mobilenet_v3_large_fpn(weights=None)
in_features = detector.roi_heads.box_predictor.cls_score.in_features
detector.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

detector.load_state_dict(torch.load(DETECTOR_PATH, map_location=DEVICE))
detector.to(DEVICE)
detector.eval()

# ---------------- Load Classifier ---------------- #
classifier = models.resnet18(weights=None)
classifier.fc = torch.nn.Linear(classifier.fc.in_features, 2)  # 0: human, 1: animal
classifier.load_state_dict(torch.load(CLASSIFIER_PATH, map_location=DEVICE))
classifier.to(DEVICE)
classifier.eval()

# ---------------- Video Setup ---------------- #
cap = cv2.VideoCapture(VIDEO_PATH)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
out = cv2.VideoWriter(OUTPUT_PATH, fourcc, fps, (width, height))

transform = T.ToTensor()

# ---------------- Inference Loop ---------------- #
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    img_tensor = transform(frame).to(DEVICE)

    with torch.no_grad():
        outputs = detector([img_tensor])

    boxes = outputs[0]['boxes']
    scores = outputs[0]['scores']

    for box, score in zip(boxes, scores):
        if score < 0.5:
            continue
        x1, y1, x2, y2 = map(int, box)
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)

        crop = frame[y1:y2, x1:x2]
        if crop.shape[0] == 0 or crop.shape[1] == 0:
            continue
        crop_tensor = transform(crop).unsqueeze(0).to(DEVICE)

        with torch.no_grad():
            pred = classifier(crop_tensor).argmax(dim=1).item()
        class_name = "Human" if pred == 0 else "Animal"
        cv2.putText(frame, class_name, (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX,
                    0.8, (0, 0, 255), 2)

    out.write(frame)

cap.release()
out.release()
print("Output video saved to:", OUTPUT_PATH)

"""

with open(os.path.join(SCRIPTS_DIR, "inference.py"), "w") as f:
    f.write(infer_code)

To run inference code

In [23]:
!python /content/drive/MyDrive/human_animal_detection/scripts/inference.py


Output video saved to: /content/drive/MyDrive/human_animal_detection/test_video/output.mp4
