### classifier

In [13]:
!pip install efficientnet_pytorch

Collecting efficientnet_pytorch
  Downloading efficientnet_pytorch-0.7.1.tar.gz (21 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: efficientnet_pytorch
  Building wheel for efficientnet_pytorch (setup.py): started
  Building wheel for efficientnet_pytorch (setup.py): finished with status 'done'
  Created wheel for efficientnet_pytorch: filename=efficientnet_pytorch-0.7.1-py3-none-any.whl size=16522 sha256=b71bc9c8ba1f23088c87891cf50ceaa98bee5179203e83172b56c54a787b1d42
  Stored in directory: c:\users\fortn\appdata\local\pip\cache\wheels\29\16\24\752e89d88d333af39a288421e64d613b5f652918e39ef1f8e3
Successfully built efficientnet_pytorch
Installing collected packages: efficientnet_pytorch
Successfully installed efficientnet_pytorch-0.7.1


In [15]:
!pip install torchmetrics

Collecting torchmetrics
  Downloading torchmetrics-1.6.1-py3-none-any.whl.metadata (21 kB)
Collecting lightning-utilities>=0.8.0 (from torchmetrics)
  Downloading lightning_utilities-0.12.0-py3-none-any.whl.metadata (5.6 kB)
Downloading torchmetrics-1.6.1-py3-none-any.whl (927 kB)
   ---------------------------------------- 0.0/927.3 kB ? eta -:--:--
   ---------------------- ----------------- 524.3/927.3 kB 5.7 MB/s eta 0:00:01
   ---------------------------------------- 927.3/927.3 kB 6.1 MB/s eta 0:00:00
Downloading lightning_utilities-0.12.0-py3-none-any.whl (28 kB)
Installing collected packages: lightning-utilities, torchmetrics
Successfully installed lightning-utilities-0.12.0 torchmetrics-1.6.1


In [3]:
!pip install scikit-learn

Collecting scikit-learn
  Downloading scikit_learn-1.6.1-cp39-cp39-win_amd64.whl.metadata (15 kB)
Collecting joblib>=1.2.0 (from scikit-learn)
  Downloading joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Downloading threadpoolctl-3.5.0-py3-none-any.whl.metadata (13 kB)
Downloading scikit_learn-1.6.1-cp39-cp39-win_amd64.whl (11.2 MB)
   ---------------------------------------- 0.0/11.2 MB ? eta -:--:--
   - -------------------------------------- 0.5/11.2 MB 8.2 MB/s eta 0:00:02
   ------ --------------------------------- 1.8/11.2 MB 5.9 MB/s eta 0:00:02
   ------------ --------------------------- 3.4/11.2 MB 6.7 MB/s eta 0:00:02
   -------------------- ------------------- 5.8/11.2 MB 8.0 MB/s eta 0:00:01
   ------------------------------ --------- 8.4/11.2 MB 8.8 MB/s eta 0:00:01
   ---------------------------------------  11.0/11.2 MB 9.4 MB/s eta 0:00:01
   ---------------------------------------- 11.2/11.2 MB 9.2 MB/s eta 0:00:00

In [4]:
from efficientnet_pytorch import EfficientNet
import torch.nn as nn
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.optim as optim
from torchmetrics import Precision, Recall
from tqdm import tqdm
from loguru import logger
from sklearn.metrics import precision_score, recall_score, accuracy_score

In [5]:
# Определение преобразований для изображений
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Изменение размера изображения
    transforms.ToTensor(),          # Преобразование в тензор
    transforms.Normalize(           # Нормализация
        mean=[0.485, 0.456, 0.406], 
        std=[0.229, 0.224, 0.225]
    )
])

In [6]:
import os
from PIL import Image
from torch.utils.data import Dataset
from torchvision import transforms

class SubclassDataset(Dataset):
    def __init__(self, root, transform=None):  # Изменяем имя параметра на "root"
        self.root = root  # Используем "root" вместо "root_dir"
        self.transform = transform
        self.samples = []
        self.subclass_to_idx = {}
        self.idx_to_subclass = {}
        self.classes = []  # Добавляем атрибут classes

        # Рекурсивно проходим по всем папкам и изображениям
        for class_name in sorted(os.listdir(self.root)):
            class_path = os.path.join(self.root, class_name)
            if not os.path.isdir(class_path):
                continue

            for subclass_name in sorted(os.listdir(class_path)):
                subclass_path = os.path.join(class_path, subclass_name)
                if not os.path.isdir(subclass_path):
                    continue

                # Добавляем подкласс в словарь меток
                if subclass_name not in self.subclass_to_idx:
                    idx = len(self.subclass_to_idx)
                    self.subclass_to_idx[subclass_name] = idx
                    self.idx_to_subclass[idx] = subclass_name
                    self.classes.append(subclass_name)  # Добавляем подкласс в список classes

                # Добавляем все изображения в этот подкласс
                for image_name in sorted(os.listdir(subclass_path)):
                    image_path = os.path.join(subclass_path, image_name)
                    if os.path.isfile(image_path):
                        self.samples.append((image_path, self.subclass_to_idx[subclass_name]))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        image_path, label = self.samples[idx]
        image = Image.open(image_path).convert('RGB')

        if self.transform:
            image = self.transform(image)

        return image, label

In [7]:
# Загрузка обучающего и тестового наборов данных
train_dataset = SubclassDataset(root="D:/Dina/LogoLensAI/ml_experements/set/train_and_test/train", transform=transform)
test_dataset =SubclassDataset(root="D:/Dina/LogoLensAI/ml_experements/set/train_and_test/test", transform=transform)

In [8]:
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

In [9]:
# Загрузка предобученной модели EfficientNet
model = EfficientNet.from_pretrained('efficientnet-b0')

# Замена последнего слоя для задачи классификации
num_classes = len(train_dataset.classes)  # Количество классов в датасете
model._fc = nn.Linear(model._fc.in_features, num_classes)

# Перемещение модели на GPU (если доступно)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

Loaded pretrained weights for efficientnet-b0


In [10]:
num_classes

2341

In [11]:
# Определение функции потерь и оптимизатора
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [12]:
# Функция для обучения модели

def train_model(model, train_loader, test_loader, criterion, optimizer, num_epochs=10):
    model.train()
    
    for epoch in range(num_epochs):
        # --- Обучение на трейне ---
        model.train()
        running_loss = 0.0
        for inputs, labels in tqdm(train_loader, desc=f'Training Epoch {epoch+1}/{num_epochs}'):
            inputs, labels = inputs.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        
        # --- Оценка на тесте ---
        model.eval()
        all_preds = []
        all_labels = []
        
        with torch.no_grad():
            for inputs, labels in tqdm(test_loader, desc=f'Testing Epoch {epoch+1}/{num_epochs}'):
                inputs, labels = inputs.to(device), labels.to(device)
                
                outputs = model(inputs)
                _, predicted = outputs.max(1)
                
                all_preds.extend(predicted.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())
        
        # Вычисление метрик
        accuracy = accuracy_score(all_labels, all_preds) * 100
        precision = precision_score(all_labels, all_preds, average='macro', zero_division=0)
        recall = recall_score(all_labels, all_preds, average='macro', zero_division=0)
        
        logger.info(f"Epoch [{epoch+1}/{num_epochs}], "
                    f"Train Loss: {running_loss/len(train_loader):.4f}, "
                    f"Test Accuracy: {accuracy:.2f}%, "
                    f"Test Precision: {precision:.4f}, "
                    f"Test Recall: {recall:.4f}")

In [14]:
train_model(model, train_loader, test_loader, criterion, optimizer, num_epochs=15)

Training Epoch 1/15: 100%|██████████| 914/914 [04:32<00:00,  3.36it/s]
Testing Epoch 1/15: 100%|██████████| 393/393 [01:11<00:00,  5.52it/s]
[32m2025-02-04 15:21:02.085[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain_model[0m:[36m40[0m - [1mEpoch [1/15], Train Loss: 4.5373, Test Accuracy: 47.54%, Test Precision: 0.5530, Test Recall: 0.4713[0m
Training Epoch 2/15: 100%|██████████| 914/914 [04:31<00:00,  3.37it/s]
Testing Epoch 2/15: 100%|██████████| 393/393 [01:10<00:00,  5.57it/s]
[32m2025-02-04 15:26:44.126[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain_model[0m:[36m40[0m - [1mEpoch [2/15], Train Loss: 2.1011, Test Accuracy: 59.87%, Test Precision: 0.6638, Test Recall: 0.5962[0m
Training Epoch 3/15: 100%|██████████| 914/914 [04:31<00:00,  3.37it/s]
Testing Epoch 3/15: 100%|██████████| 393/393 [01:10<00:00,  5.55it/s]
[32m2025-02-04 15:32:26.494[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain_model[0m:[36m40[0m - [1mEpoch [3/15], Train Loss: 1.2793, 

In [16]:
# Сохранение модели
torch.save(model.state_dict(), "efficientnet_logo2k.pth")


#### inference

In [20]:
import torch
from PIL import Image
from torchvision import transforms

# Загрузка модели
def load_model(model_path, num_classes, device):
    # Создаем модель EfficientNet-B0
    model = EfficientNet.from_pretrained('efficientnet-b0')
    model._fc = torch.nn.Linear(model._fc.in_features, num_classes)
    
    # Загружаем веса модели
    model.load_state_dict(torch.load(model_path, map_location=device))
    
    # Перемещаем модель на устройство (CPU или GPU)
    model = model.to(device)
    model.eval()  # Переводим модель в режим оценки
    return model

# Функция для инференса
def predict_image_class(image_path, model, class_names, device):
    # Определяем преобразования для входного изображения
    transform = transforms.Compose([
        transforms.Resize((224, 224)),  # EfficientNet-B0 ожидает входное изображение размером 224x224
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Стандартные значения нормализации
    ])
    
    # Загружаем изображение
    image = Image.open(image_path).convert('RGB')
    
    # Применяем преобразования
    image_tensor = transform(image).unsqueeze(0)  # Добавляем размерность батча (batch dimension)
    
    # Перемещаем изображение на устройство (CPU или GPU)
    image_tensor = image_tensor.to(device)
    
    # Выполняем предсказание
    with torch.no_grad():  # Отключаем вычисление градиентов
        outputs = model(image_tensor)
        _, predicted_idx = torch.max(outputs, 1)  # Получаем индекс максимального значения
    
    # Преобразуем индекс в название класса
    predicted_class = class_names[predicted_idx.item()]
    
    return predicted_class


# Параметры
model_path = "D:/Dina/LogoLensAI/ml_experements/efficientnet_logo2k.pth"  # Путь к файлу с весами модели
num_classes = len(train_dataset.classes)
class_names =  train_dataset.classes # Список названий классов
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  # Выбираем устройство

# Загружаем модель
model = load_model(model_path, num_classes, device)

# Путь к изображению
image_path = "D:/Dina/LogoLensAI/ml_experements/data_img/toyota_1.jpg"

# Выполняем предсказание
predicted_class = predict_image_class(image_path, model, class_names, device)
print(f"Predicted class: {predicted_class}")

Loaded pretrained weights for efficientnet-b0
Predicted class: Toyota


### clip

In [1]:
!pip install git+https://github.com/openai/CLIP.git

Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to c:\users\fortn\appdata\local\temp\pip-req-build-e7vb8y4b
  Resolved https://github.com/openai/CLIP.git to commit dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting ftfy (from clip==1.0)
  Using cached ftfy-6.3.1-py3-none-any.whl.metadata (7.3 kB)
Using cached ftfy-6.3.1-py3-none-any.whl (44 kB)
Building wheels for collected packages: clip
  Building wheel for clip (setup.py): started
  Building wheel for clip (setup.py): finished with status 'done'
  Created wheel for clip: filename=clip-1.0-py3-none-any.whl size=1369594 sha256=01a007c5d0825feaeaf9325f0e1318ae3d7820e552a12eeffe37ddeb3b0d0cc3
  Stored in directory: C:\Users\fortn\AppData\Local\Temp\pip-ephem-wheel-cache-p4iyq5qy\wheels\c8\e4\e1\11374c111387672fc2068dfbe0d4b424cb9cdd1b2e184a71b5
Successfully built clip
Installing collected 

  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git 'C:\Users\fortn\AppData\Local\Temp\pip-req-build-e7vb8y4b'


In [1]:
from loguru import logger
import torch
from PIL import Image
import clip
import numpy as np
import os
from tqdm import tqdm
import shutil
import torch.nn.functional as F
from typing import List, Tuple

In [2]:
class Classifier:

    def __init__(self, labels: list, device: str = None, clip_model_name: str = "ViT-B/32"):
        if device is None:
            self.device = "cuda" if torch.cuda.is_available() else "cpu"
        else:
            self.device = device

        self.model, self.preprocess = clip.load(clip_model_name, device=self.device)
        
        self.labels = labels 
        self.text_inputs = clip.tokenize(self.labels).to(self.device)

    def classify_image(self, image: Image.Image) -> dict:
        image_input = self.preprocess(image).unsqueeze(0).to(self.device)
        with torch.no_grad():
            logits_per_image, _ = self.model(image_input, self.text_inputs)
            probs = logits_per_image.softmax(dim=-1).cpu().numpy()[0]
        result = dict(zip(self.labels, probs))
        return result
    
   

    def classify_collage(self, image_path: str) -> int:
        image = Image.open(image_path)
        image_np = np.array(image)


        pil_img = Image.fromarray(image_np)
        result = self.classify_image(pil_img)
        best_key = max(result, key=result.get)
        best_value = result[best_key]
        
        return best_key, best_value

In [None]:
file_path = 'D:/Dina/LogoLensAI/ml_experements/data_img/VK-1.jpg'
labels = [ "other", 'is this VK?']
classifier = Classifier(labels = labels)
key, value = classifier.classify_collage(file_path)
print(key)