In [1]:
import os
import shutil
from sklearn.model_selection import train_test_split
import shutil
import numpy as np


data_dir = '/Users/dmitrykutsenko/Desktop/hack_22_06_24_school_of_programming/data/classification_good'

train_dir = os.path.join(data_dir, 'train')
val_dir = os.path.join(data_dir, 'val')

val_split = 0.2

shutil.rmtree(train_dir, ignore_errors=True)
shutil.rmtree(val_dir, ignore_errors=True)

os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)

for class_name in os.listdir(data_dir):
    class_path = os.path.join(data_dir, class_name)
    if os.path.isdir(class_path) and class_name not in ['train', 'val']:
        images = [os.path.join(class_path, img) for img in os.listdir(class_path) if img.endswith(('.jpg', '.jpeg', '.png'))]

        if len(images) <= 5: continue

        train_images, val_images = train_test_split(images, test_size=val_split, random_state=42)

        train_class_dir = os.path.join(train_dir, class_name)
        val_class_dir = os.path.join(val_dir, class_name)
        os.makedirs(train_class_dir, exist_ok=True)
        os.makedirs(val_class_dir, exist_ok=True)

        for img in train_images:
            shutil.copy(img, os.path.join(train_class_dir, os.path.basename(img)))
        for img in val_images:
            shutil.copy(img, os.path.join(val_class_dir, os.path.basename(img)))

print("Разделение данных завершено.")

Разделение данных завершено.


In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
from sklearn.metrics import f1_score, roc_auc_score, accuracy_score
import os
from tqdm.notebook import tqdm
import time
import numpy as np

torch.manual_seed(777)

data_dir = '/Users/dmitrykutsenko/Desktop/hack_22_06_24_school_of_programming/data/classification_good'
batch_size = 16
num_epochs = 20
learning_rate = 0.0005

# Трансформации для данных
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(10),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

# Загрузка данных
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'val']}
dataloaders = {x: DataLoader(image_datasets[x], batch_size=batch_size, shuffle=True, num_workers=4) for x in ['train', 'val']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes

model = models.mobilenet_v3_small(pretrained=True)
model.classifier[3] = nn.Linear(model.classifier[3].in_features, 1)

device = torch.device("mps")
model = model.to(device)

criterion = nn.BCEWithLogitsLoss()
# optimizer = optim.Adam(model.parameters(), lr=learning_rate)
optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
# scheduler = optim.lr_scheduler.OneCycleLR(optimizer, max_lr=learning_rate, steps_per_epoch=len(dataloaders['train']), epochs=num_epochs)

best_score = 0

for epoch in range(num_epochs):
    print(f'Epoch {epoch}/{num_epochs - 1}')
    print('-' * 10)

    if epoch == 1:
        for param_group in optimizer.param_groups:
            param_group['lr'] = 0.0003

    for phase in ['train', 'val']:
        if phase == 'train':
            model.train()
        else:
            model.eval()

        running_loss = 0.0
        running_corrects = 0
        all_labels = []
        all_probs = []

        correct = 0
        total = 0
        for inputs, labels in dataloaders[phase]:
            inputs = inputs.to(device)
            labels = labels.to(device).float().view(-1, 1)

            optimizer.zero_grad()

            with torch.set_grad_enabled(phase == 'train'):
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)

                if phase == 'train':
                    loss.backward()
                    optimizer.step()
                    # scheduler.step()

            running_loss += loss.item() * inputs.size(0)
            total += labels.size(0)
            probs = torch.sigmoid(outputs).detach().cpu().numpy()

            all_labels.extend(labels.cpu().numpy())
            all_probs.extend(probs)

            start = time.time()

        all_probs = np.concatenate(all_probs)
        all_preds = all_probs.round()
        all_labels = np.concatenate(all_labels)

        epoch_acc = accuracy_score(all_labels, all_preds)
        epoch_f1 = f1_score(all_labels, all_preds, average='weighted')
        epoch_auc = roc_auc_score(all_labels, all_probs, average='weighted')

        print(f'{phase} Acc: {epoch_acc:.4f} F1: {epoch_f1:.4f} AUC: {epoch_auc}')

        if epoch_auc > best_score:
            torch.save(model.state_dict(), 'model.pt')
            best_score = epoch_auc

Epoch 0/19
----------




train Acc: 0.9143 F1: 0.9094 AUC: 0.7453457446808511
val Acc: 0.9291 F1: 0.8950 AUC: 0.8822975517890772
Epoch 1/19
----------
train Acc: 0.9382 F1: 0.9258 AUC: 0.8984042553191489
val Acc: 0.9449 F1: 0.9281 AUC: 0.8757062146892655
Epoch 2/19
----------
train Acc: 0.9303 F1: 0.9202 AUC: 0.9101063829787234
val Acc: 0.9291 F1: 0.8950 AUC: 0.8418079096045199
Epoch 3/19
----------


In [2]:
import torch
import torchvision.models as models
import torch.nn as nn

model = models.mobilenet_v3_small(pretrained=False)
model.classifier[3] = nn.Linear(model.classifier[3].in_features, 1)
model.load_state_dict(torch.load('model.pt'))

model.eval()

dummy_input = torch.randn(1, 3, 224, 224)

torch.onnx.export(model, dummy_input, 'classification_check_good.onnx', input_names=['input'], output_names=['output'])



In [8]:
import onnxruntime as ort
import numpy as np
from PIL import Image
from torchvision import transforms

# Загрузка и предобработка изображения
image_path = '/Users/dmitrykutsenko/Desktop/hack_22_06_24_school_of_programming/data/classification_good/train/good/--fZyzXJzmU.jpg'
image = Image.open(image_path)

preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

input_tensor = preprocess(image)
input_batch = input_tensor.unsqueeze(0).numpy()

# Загрузка ONNX модели
onnx_model_path = '/Users/dmitrykutsenko/Desktop/hack_22_06_24_school_of_programming/app/ml/weights/classification_check_good.onnx'
ort_session = ort.InferenceSession(onnx_model_path)

# Получение предсказания
ort_inputs = {ort_session.get_inputs()[0].name: input_batch}
ort_outs = ort_session.run(None, ort_inputs)

# Преобразование выходных данных
output = ort_outs[0]

output# [0][0] > 0

array([[7.5271616]], dtype=float32)