In [1]:
import torch
from torch import nn
import torch.optim as optim
from tqdm import tqdm
import torchvision
from torchvision import transforms
from mxnet.gluon.data.vision import datasets, transforms

device = torch.device('cpu')
torch.manual_seed(42)

alex_transformer = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=0.1307, std=0.3081),])

fashion_train_data = mxnet.gluon.data.vision.datasets.FashionMNIST(train=True, transform=alex_transformer)
fashion_test_data = mxnet.gluon.data.vision.datasets.FashionMNIST(train=False, transform=alex_transformer)

fashion_batch_size = 256
train_dataloader = mx.gluon.data.DataLoader(fashion_train_data, batch_size=fashion_batch_size, shuffle=False)
test_dataloader = mx.gluon.data.DataLoader(fashion_test_data, batch_size=fashion_batch_size, shuffle=False)

def build_alexnet():
    def init_weights(m):
        if isinstance(m, nn.Linear):
            nn.init.xavier_uniform_(m.weight)
            if m.bias is not None:
                nn.init.constant_(m.bias, 0.01)
        if isinstance(m, nn.Conv2d):
            nn.init.xavier_uniform_(m.weight)
            if m.bias is not None:
                nn.init.zeros_(m.bias)

    net = nn.Sequential(
        nn.Conv2d(in_channels=3, out_channels=96, kernel_size=11, stride=4, padding=2, activation='relu'),
        nn.MaxPool2d(kernel_size=3, stride=2),

        nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, padding=2, activation='relu'),
        #nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel_size=3, stride=2),

        nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, padding=1, activation='relu'),
        nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, padding=1, activation='relu'),
        nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, padding=1, activation='relu'),
        nn.MaxPool2d(kernel_size=3, stride=2),

        nn.Flatten(),
        nn.Dense(4096, activation='relu'),
        nn.Dropout(0.5),
        nn.Dense(4096, activation='relu'),
        nn.Dropout(0.5),
        nn.Dense(10, activation=None),
        # nn.Linear(4096, 4096),
        # nn.ReLU(inplace=True),
        # nn.Dropout(p=0.5),
        #nn.Linear(4096, 10)
    )

    net.apply(init_weights)
    return net

alexnet = build_alexnet().to(device)

def train(net, train_loader, device, num_epochs, learning_rate):
    net.train()
    optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9)
    criterion = nn.CrossEntropyLoss()
    acc_history = []

    total_steps = len(train_loader) * num_epochs
    with tqdm(total=total_steps, position=0, leave=True) as pbar:
        for epoch in range(num_epochs):
            running_loss = 0.0
            correct = 0
            total = 0

            for inputs, labels in train_loader:
                inputs = inputs.to(device, non_blocking=True)
                labels = labels.to(device, non_blocking=True)

                optimizer.zero_grad(set_to_none=True)
                outputs = net(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                running_loss += loss.item()

                preds = outputs.argmax(dim=1)
                batch_total = labels.size(0)
                batch_correct = (preds == labels).sum().item()
                total += batch_total
                correct += batch_correct
                batch_acc = batch_correct / batch_total

                pbar.set_description(f"Epoch {epoch+1}/{num_epochs} | Loss {running_loss:.2f} | Acc {batch_acc:.3f}")
                pbar.update(1)

            acc = correct / total
            acc_history.append(acc)

    return acc_history

# Запуск обучения
hist_alexnet = train(alexnet, train_dataloader, device, num_epochs=10, learning_rate=0.01)

ModuleNotFoundError: No module named 'mxnet'

In [1]:
import torch
from torch import nn
import torch.optim as optim
from tqdm import tqdm
import torchvision
from torchvision import transforms, datasets

device = torch.device('cpu')
torch.manual_seed(42)

alex_transformer = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.1307], std=[0.3081]),
])

fashion_train_data = datasets.FashionMNIST(root="./data", train=True, transform=alex_transformer, download=True)
fashion_test_data = datasets.FashionMNIST(root="./data", train=False, transform=alex_transformer, download=True)

fashion_batch_size = 256
train_dataloader = torch.utils.data.DataLoader(fashion_train_data, batch_size=fashion_batch_size, shuffle=True)
test_dataloader = torch.utils.data.DataLoader(fashion_test_data, batch_size=fashion_batch_size, shuffle=False)

def build_alexnet():
    def init_weights(m):
        if isinstance(m, nn.Linear):
            nn.init.xavier_uniform_(m.weight)
            if m.bias is not None:
                nn.init.constant_(m.bias, 0.01)
        if isinstance(m, nn.Conv2d):
            nn.init.xavier_uniform_(m.weight)
            if m.bias is not None:
                nn.init.zeros_(m.bias)

    net = nn.Sequential(
        nn.Conv2d(in_channels=1, out_channels=96, kernel_size=11, stride=4, padding=2),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=3, stride=2),

        nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, padding=2),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=3, stride=2),

        nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, padding=1),
        nn.ReLU(),
        nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, padding=1),
        nn.ReLU(),
        nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, padding=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=3, stride=2),

        nn.Flatten(),
        nn.Linear(256 * 6 * 6, 4096),  # Правильный размер после сверток
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(4096, 4096),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(4096, 10),
    )

    net.apply(init_weights)
    return net

alexnet = build_alexnet().to(device)


In [2]:
def train(net, train_loader, device, num_epochs, learning_rate):
    net.train()
    optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9)
    criterion = nn.CrossEntropyLoss()
    acc_history = []

    total_steps = len(train_loader) * num_epochs
    with tqdm(total=total_steps, position=0, leave=True) as pbar:
        for epoch in range(num_epochs):
            running_loss = 0.0
            correct = 0
            total = 0

            pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", leave=False)
            
            for inputs, labels in train_loader:
                inputs = inputs.to(device, non_blocking=True)
                labels = labels.to(device, non_blocking=True)

                optimizer.zero_grad(set_to_none=True)
                outputs = net(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                running_loss += loss.item()

                preds = outputs.argmax(dim=1)
                batch_total = labels.size(0)
                batch_correct = (preds == labels).sum().item()
                total += batch_total
                correct += batch_correct
                batch_acc = batch_correct / batch_total

                pbar.set_postfix({'Loss': f'{running_loss:.2f}',
                'Acc': f'{batch_acc:.3f}'})
                
                # pbar.set_description(f"Epoch {epoch+1}/{num_epochs} | Loss {running_loss:.2f} | Acc {batch_acc:.3f}")
                # pbar.update(1)

            acc = correct / total
            acc_history.append(acc)

    return acc_history

# Запуск обучения
hist_alexnet = train(alexnet, train_dataloader, device, num_epochs=10, learning_rate=0.01)

  0%|                                                                                                                                                     | 0/2350 [00:00<?, ?it/s]
  0%|                                                                                                                                                     | 0/2350 [00:02<?, ?it/s][A


RuntimeError: [enforce fail at alloc_cpu.cpp:121] data. DefaultCPUAllocator: not enough memory: you tried to allocate 297369600 bytes.

In [None]:
import mxnet as mx
from mxnet import gluon, autograd, nd
from mxnet.gluon import nn
from mxnet.gluon.data.vision import datasets, transforms
import time

# Устанавливаем контекст CPU
ctx = mx.cpu()

# Трансформер для FashionMNIST
fashion_transformer = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=0.1307, std=0.3081)
])

# Загружаем данные
fashion_train_data = datasets.FashionMNIST(train=True).transform_first(fashion_transformer)
fashion_test_data = datasets.FashionMNIST(train=False).transform_first(fashion_transformer)

# DataLoader с меньшим батчем
fashion_batch_size = 128  # Уменьшили с 256 до 128
train_dataloader = gluon.data.DataLoader(fashion_train_data, batch_size=fashion_batch_size, shuffle=True)
test_dataloader = gluon.data.DataLoader(fashion_test_data, batch_size=fashion_batch_size, shuffle=False)

# Архитектура AlexNet
def build_alexnet():
    net = nn.Sequential()
    
    # Первый блок
    net.add(nn.Conv2D(channels=96, kernel_size=11, strides=4, padding=2, activation='relu'))
    net.add(nn.MaxPool2D(pool_size=3, strides=2))
    
    # Второй блок
    net.add(nn.Conv2D(channels=256, kernel_size=5, padding=2, activation='relu'))
    net.add(nn.MaxPool2D(pool_size=3, strides=2))
    
    # Третий блок
    net.add(nn.Conv2D(channels=384, kernel_size=3, padding=1, activation='relu'))
    net.add(nn.Conv2D(channels=384, kernel_size=3, padding=1, activation='relu'))
    net.add(nn.Conv2D(channels=256, kernel_size=3, padding=1, activation='relu'))
    net.add(nn.MaxPool2D(pool_size=3, strides=2))
    
    # Полносвязные слои
    net.add(nn.Flatten())
    net.add(nn.Dense(4096, activation='relu'))
    net.add(nn.Dropout(0.5))
    net.add(nn.Dense(4096, activation='relu'))
    net.add(nn.Dropout(0.5))
    net.add(nn.Dense(10))
    
    return net

# Создаем модель
alexnet = build_alexnet()
alexnet.initialize(ctx=ctx)

# Инициализация весов
def init_weights(m):
    if isinstance(m, nn.Dense):
        m.weight.initialize(mx.init.Xavier(), ctx=ctx)
        if m.bias is not None:
            m.bias.initialize(mx.init.Constant(0.01), ctx=ctx)
    elif isinstance(m, nn.Conv2D):
        m.weight.initialize(mx.init.Xavier(), ctx=ctx)
        if m.bias is not None:
            m.bias.initialize(mx.init.Zeros(), ctx=ctx)

alexnet.apply(init_weights)

# Функция обучения
def train(net, train_loader, ctx, num_epochs, learning_rate):
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': learning_rate, 'momentum': 0.9})
    softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
    
    for epoch in range(num_epochs):
        train_loss = 0.0
        train_acc = 0.0
        num_batches = 0
        
        for i, (data, label) in enumerate(train_loader):
            data = data.as_in_context(ctx)
            label = label.as_in_context(ctx)
            
            with autograd.record():
                output = net(data)
                loss = softmax_cross_entropy(output, label)
            loss.backward()
            trainer.step(data.shape[0])
            
            train_loss += nd.mean(loss).asscalar()
            train_acc += nd.mean(output.argmax(axis=1) == label).asscalar()
            num_batches += 1
            
            if i % 10 == 0:
                print(f'Epoch {epoch+1}/{num_epochs}, Batch {i+1}/{len(train_loader)}, Loss: {train_loss/num_batches:.4f}, Acc: {train_acc/num_batches:.4f}')
        
        print(f'Epoch {epoch+1} completed - Loss: {train_loss/num_batches:.4f}, Accuracy: {train_acc/num_batches:.4f}')

# Запуск обучения
print("Начинаем обучение...")
train(alexnet, train_dataloader, ctx, num_epochs=10, learning_rate=0.01)
print("Обучение завершено!")