<a href="https://colab.research.google.com/github/oleeejka/logo_detection/blob/main/logo_detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Сначала создаем и обучаем модель классификации.

In [29]:
import gdown
import zipfile
import os
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, datasets, transforms
from torch.utils.data import DataLoader, Subset
import cv2

Загружаем датасет LogoDet-3K из архива. Датасет в виде архива сохранен на моем Google Drive.

In [30]:
url = f'https://drive.google.com/uc?id=1uiKeHsd5hJeMX7hHZvozxTDrGn8UE3Eu'
output = 'logodet-3k.zip'

gdown.download(url, output, quiet=False)

with zipfile.ZipFile(output, 'r') as zip_ref:
        zip_ref.extractall('data/logos')

os.remove(output)

Downloading...
From (original): https://drive.google.com/uc?id=1uiKeHsd5hJeMX7hHZvozxTDrGn8UE3Eu
From (redirected): https://drive.google.com/uc?id=1uiKeHsd5hJeMX7hHZvozxTDrGn8UE3Eu&confirm=t&uuid=c6e08cbe-a913-4ed4-813d-dcdb160ca5ca
To: /content/logodet-3k.zip
100%|██████████| 3.08G/3.08G [00:49<00:00, 62.5MB/s]


In [31]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

In [32]:
data_dir = 'data/logos/LogoDet-3K'

dataset = datasets.ImageFolder(data_dir, transform=transform)

Разделяем датасет на обучающую и тестовую выборки

In [33]:
train_indices, val_indices = train_test_split(
    range(len(dataset)),
    test_size=0.2,
    stratify=dataset.targets,
    random_state=42
)

train_dataset = Subset(dataset, train_indices)
val_dataset = Subset(dataset, val_indices)

Создаем DataLoader

In [34]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

Обучаем модель классификации. Я использовал модель ResNet.

In [35]:
# Преобразуем изображение для классификации
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [36]:
model_classifier = models.resnet50(pretrained=True)
model_classifier.fc = nn.Linear(model_classifier.fc.in_features, len(dataset.classes))



In [37]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_classifier.parameters(), lr=0.001)

In [None]:
num_epochs = 10
for epoch in range(num_epochs):
    model_classifier.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model_classifier(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader)}")

Сохраняем модель

In [None]:
torch.save(model_classifier.state_dict(), 'models/logo_classifier.pth')

Используем обученную модель для получения предсказания

In [None]:
model_classifier = models.resnet50(pretrained=True)
model_classifier.fc = nn.Linear(model_classifier.fc.in_features, 2)  # 2 класса: логотип искомой организации и не логотип
model_classifier.load_state_dict(torch.load('models/logo_classifier.pth'))
model_classifier.eval()

In [None]:
# Преобразуем изображение для классификации
preprocess = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])


In [None]:
def classify_logo(image):
    image = preprocess(image).unsqueeze(0)
    with torch.no_grad():
        prediction = model_classifier(image)
    return torch.argmax(prediction)

In [None]:
import cv2
image = cv2.imread('path/to/logo/image.jpg')
result = classify_logo(image)
print(f"Classification result: {result}")