In [1]:
from torchvision.transforms import transforms
import torchvision.models as models
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision.datasets import ImageFolder
from torchvision import transforms
import os
import numpy as np
import glob
import random
import matplotlib.pyplot as plt
import logging

from PIL import Image
import torch.nn.functional as F

import torchvision
import torchvision.transforms as transforms
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
batch_size = 8

In [3]:
class CustomImageDataset(torch.utils.data.Dataset):
    def __init__(self, img_dir, class_name_in_int, transform=None):
        self.img_dir = img_dir
        self.class_name_in_int = class_name_in_int
        self.transform = transform

        # Get list of file paths and tags
        self.file_paths = []
        self.tags = []
        for class_name in os.listdir(self.img_dir):
            class_dir = os.path.join(self.img_dir, class_name)
            if not os.path.isdir(class_dir):
                continue
            tag = class_name_in_int.get(class_name)
            if tag is None:
                continue
                
            class_file_paths = glob.glob(os.path.join(class_dir, "*.jpg"))
            self.file_paths.extend(class_file_paths)
            self.tags.extend([tag] * len(class_file_paths))

    def __len__(self):
        return len(self.tags)

    def __getitem__(self, idx):
        img_path = self.file_paths[idx]
        tag = self.tags[idx]
        with open(img_path, "rb") as f:
            img = Image.open(f).convert("RGB")
        if self.transform:
            img = self.transform(img)

        return img, torch.tensor(tag, dtype=torch.long)


transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.RandomRotation(degrees=15),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

In [4]:
path = "imagenet1k"

class_name_in_int = {}
class_name_in_str = {}
i = 0
for classes_name in os.listdir(path):
    if classes_name.endswith('.ipynb_checkpoints'):
        continue
    class_name = classes_name.split('_', 1)[-1]
    class_name_in_int[classes_name] = i
    class_name_in_str[i] = class_name
    i += 1

In [5]:
dataset = CustomImageDataset(path, class_name_in_int, transform=transform)

In [6]:
len(dataset)

100001

In [7]:
dataset = CustomImageDataset(path, class_name_in_int, transform=transform)

train_size = int(0.7 * len(dataset))
valid_size = int(0.2 * len(dataset))
test_size = len(dataset) - train_size - valid_size

train_data, valid_data, test_data = random_split(dataset, [train_size, valid_size, test_size])

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers = 16)
valid_loader = DataLoader(valid_data, batch_size=batch_size, shuffle=False, num_workers = 16)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False, num_workers = 16)

In [8]:
class Tipa_ResNet(nn.Module):
    
    def __init__(self, num_classes = 1000):
        super(Tipa_ResNet, self).__init__()
        self.relu = nn.ReLU()

        #Part 1
        self.conv_layer1 = nn.Conv2d(in_channels=3, out_channels=12, kernel_size=11, padding = 5)
        self.conv_layer1_batch = nn.BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.conv_layer2 = nn.Conv2d(in_channels=12, out_channels=16, kernel_size=3, padding = 1)
        self.conv_layer2_batch = nn.BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.conv_layer3 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding = 1)
        self.conv_layer3_batch = nn.BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.conv_layer4 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=1, padding = 0)
        self.conv_layer4_batch = nn.BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

        self.conv_layer5 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=17, padding = 8)
        self.conv_layer5_batch = nn.BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.conv_layer6 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding = 1)
        self.conv_layer6_batch = nn.BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.conv_layer7 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding = 1)
        self.conv_layer7_batch = nn.BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.conv_layer8 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=1, padding = 0)
        self.conv_layer8_batch = nn.BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

        #Part 2
        self.conv_layer16 = nn.Conv2d(in_channels=256, out_channels=128, kernel_size=3, padding = 1)
        self.conv_layer16_batch = nn.BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.conv_layer17 = nn.Conv2d(in_channels=128, out_channels=32, kernel_size=17, padding = 8)
        self.conv_layer17_batch = nn.BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.conv_layer18 = nn.Conv2d(in_channels=32, out_channels=8, kernel_size=3, padding = 1)
        self.conv_layer18_batch = nn.BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.conv_layer19 = nn.Conv2d(in_channels=8, out_channels=1, kernel_size=3, padding = 1)
        self.conv_layer19_batch = nn.BatchNorm2d(1, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.conv_layer20 = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=1, padding = 0)
        self.conv_layer20_batch = nn.BatchNorm2d(1, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        
        #Part 3
        self.conv_layer21 = nn.Conv2d(in_channels=1, out_channels= 8, kernel_size=23, padding = 11)
        self.conv_layer21_batch = nn.BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.conv_layer22 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3, padding = 1)
        self.conv_layer22_batch = nn.BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.conv_layer23 = nn.Conv2d(in_channels=16, out_channels=64, kernel_size=3, padding = 1)
        self.conv_layer23_batch = nn.BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

        self.fc_layer1 = nn.Linear(64, 128)
        self.fc_layer2 = nn.Linear(128, 256)
        self.fc_layer3 = nn.Linear(256, num_classes)


    def forward(self, x):
        BATCH_SIZE = x.shape[0]
        IMG_SIZE = x.shape[3]
        #Part1
        x = self.relu(self.conv_layer1_batch(self.conv_layer1(x))) if x.size(0) > 1 else self.relu(self.conv_layer1(x))
        x = self.relu(self.conv_layer2_batch(self.conv_layer2(x))) if x.size(0) > 1 else self.relu(self.conv_layer2(x))
        x = self.relu(self.conv_layer3_batch(self.conv_layer3(x))) if x.size(0) > 1 else self.relu(self.conv_layer3(x))
        x2 = self.relu(self.conv_layer4_batch(self.conv_layer4(x))) if x.size(0) > 1 else self.relu(self.conv_layer4(x))
        x = x + x2
        
        x = self.relu(self.conv_layer5_batch(self.conv_layer5(x))) if x.size(0) > 1 else self.relu(self.conv_layer5(x))
        x = self.relu(self.conv_layer6_batch(self.conv_layer6(x))) if x.size(0) > 1 else self.relu(self.conv_layer6(x))
        x = self.relu(self.conv_layer7_batch(self.conv_layer7(x))) if x.size(0) > 1 else self.relu(self.conv_layer7(x))
        x2 = self.relu(self.conv_layer8_batch(self.conv_layer8(x))) if x.size(0) > 1 else self.relu(self.conv_layer8(x))
        x = x + x2
        
        #Part 2
        x = self.relu(self.conv_layer16_batch(self.conv_layer16(x))) if x.size(0) > 1 else self.relu(self.conv_layer16(x))
        x = self.relu(self.conv_layer17_batch(self.conv_layer17(x))) if x.size(0) > 1 else self.relu(self.conv_layer17(x))
        x = self.relu(self.conv_layer18_batch(self.conv_layer18(x))) if x.size(0) > 1 else self.relu(self.conv_layer18(x))
        x = self.relu(self.conv_layer19_batch(self.conv_layer19(x))) if x.size(0) > 1 else self.relu(self.conv_layer19(x))
        x2 = self.relu(self.conv_layer20_batch(self.conv_layer20(x))) if x.size(0) > 1 else self.relu(self.conv_layer20(x))
        x = x + x2
        
        #Part 3
        x = self.relu(self.conv_layer21_batch(self.conv_layer21(x))) if x.size(0) > 1 else self.relu(self.conv_layer21(x))
        x = self.relu(self.conv_layer22_batch(self.conv_layer22(x))) if x.size(0) > 1 else self.relu(self.conv_layer22(x))
        x = self.relu(self.conv_layer23_batch(self.conv_layer23(x))) if x.size(0) > 1 else self.relu(self.conv_layer23(x))

        
        x = F.adaptive_avg_pool2d(x, (1, 1))
        
        x = x.view(x.size(0), -1)
        x = self.fc_layer1(x)
        x = self.fc_layer2(x)
        x = self.fc_layer3(x)

        return x


In [9]:
num_classes = len(class_name_in_int)

model = Tipa_ResNet(num_classes)

In [10]:
model = nn.DataParallel(model).cuda()

In [11]:
criterion = nn.CrossEntropyLoss().cuda()

optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.98, weight_decay=0.001)

In [None]:
best_valid_loss = float('inf')
best_model_path = 'mini_best_model_images.pt'

open('mini_training.log', 'w').close()
logging.basicConfig(filename='mini_training.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

num_epochs = 30

for epoch in range(num_epochs):
    # Обучение модели
    model.train()
    train_loss = 0.0
    total = 0
    correct = 0
    for i, (images, labels) in enumerate(train_loader):
        optimizer.zero_grad()
        images, labels = images.to(device), labels.to(device)
        output = model(images)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()
        _, predicted = torch.max(output.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        train_loss += loss.item() * images.size(0)

        if i % 10 == 9:  
            print('[%d, %5d] loss: %.7f accuracy: %.3f' %
                  (epoch + 1, i + 1, train_loss / 100, correct / total))
            logging.info('[%d, %5d] loss: %.7f accuracy: %.3f' %
                  (epoch + 1, i + 1, train_loss / 100, correct / total))
            train_loss = 0.0
            correct = 0
            total = 0
        

    # Вычисление потерь на обучающем наборе данных
    train_loss = train_loss / len(train_loader.dataset)

    # Валидация модели
    model.eval()
    valid_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in valid_loader:
            images, labels = images.to(device), labels.to(device)
            output = model(images)
            _, predicted = torch.max(output, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = correct / total

    # Вычисление потерь на валидационном наборе данных
    valid_loss = valid_loss / len(valid_loader.dataset)

    # Сохранение лучшей модели
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), best_model_path)

    # Логирование
    logging.info(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Valid Loss: {valid_loss:.4f}, Valid Accuracy: {accuracy:.4f}")
    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Valid Loss: {valid_loss:.4f}, Valid Accuracy: {accuracy:.4f}")


print("Training finished.")

[1,    10] loss: 5.5292402 accuracy: 0.000
[1,    20] loss: 5.5273066 accuracy: 0.000
[1,    30] loss: 5.5262460 accuracy: 0.000
[1,    40] loss: 5.5410062 accuracy: 0.000
[1,    50] loss: 5.5263694 accuracy: 0.000
[1,    60] loss: 5.5293944 accuracy: 0.000
[1,    70] loss: 5.5354773 accuracy: 0.000
[1,    80] loss: 5.5356190 accuracy: 0.000
[1,    90] loss: 5.5145975 accuracy: 0.000
[1,   100] loss: 5.5228328 accuracy: 0.013
[1,   110] loss: 5.5285189 accuracy: 0.000
[1,   120] loss: 5.5196767 accuracy: 0.000
[1,   130] loss: 5.5176483 accuracy: 0.000
[1,   140] loss: 5.5214414 accuracy: 0.000
[1,   150] loss: 5.5296254 accuracy: 0.000
[1,   160] loss: 5.5340282 accuracy: 0.000
[1,   170] loss: 5.5222203 accuracy: 0.000
[1,   180] loss: 5.5297572 accuracy: 0.000
[1,   190] loss: 5.5324872 accuracy: 0.000
[1,   200] loss: 5.5093224 accuracy: 0.000
[1,   210] loss: 5.5257960 accuracy: 0.000
[1,   220] loss: 5.5138416 accuracy: 0.000
[1,   230] loss: 5.5300329 accuracy: 0.000
[1,   240] 