<a href="https://colab.research.google.com/github/okorch/project_AI_cancer/blob/main/canser_train_SimpleModel_2023.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Импортируем необходимые модули.

In [None]:
import pickle

import torch
import torch.nn as nn
import torchvision
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

from sklearn.model_selection import train_test_split
import numpy as np
import json

Загружаем данные тензоров фотографий и их лейблов из файла. Тензоры в нашем случае имеют размеры 3х64.


In [None]:
with open('/content/drive/MyDrive/X_train_new (2).pickle', 'rb') as f:
  X_train, Y_train = pickle.load(f)


Создаем класс датасета и разделяем данные на тестовые и тренировочные. Создаем так же загрузчики данных.

---



In [None]:
class HistologyDataset(Dataset):
    def __init__(self, tensors, labels):
        self.tensors = tensors
        self.labels = labels

    def __getitem__(self, idx):
        return self.tensors[idx], self.labels[idx]

    def __len__(self):
        return len(self.labels)

X_train, X_test, y_train, y_test = train_test_split(X_train, Y_train, test_size=0.2, random_state=42)

train_dataset = HistologyDataset(X_train, y_train)
test_dataset = HistologyDataset(X_test, y_test)

batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)


Определяем архитектуру модели

In [None]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
        self.pool1 = nn.MaxPool2d(kernel_size=2)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.pool2 = nn.MaxPool2d(kernel_size=2)
        self.fc1 = nn.Linear(32 * 16 * 16, 128)
        self.fc2 = nn.Linear(128, 2)

    def forward(self, x):
        x = self.conv1(x)
        x = nn.ReLU()(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = nn.ReLU()(x)
        x = self.pool2(x)
        x = x.view(-1, 32 * 16 * 16)
        x = self.fc1(x)
        x = nn.ReLU()(x)
        x = self.fc2(x)
        return x

Создаем экземпляр модели

In [None]:
model = SimpleCNN()


Определяем функцию потерь и оптимизатор

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

Функии для обучения и оценки точности модели

In [None]:
def train(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    return running_loss / len(train_loader)

def evaluate(model, test_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for data in test_loader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    return running_loss / len(test_loader), accuracy

Обучим модель 

In [None]:
num_epochs = 10
lr = 0.001
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=lr)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

train_losses, test_losses, accuracies = [], [], []
for epoch in range(num_epochs):
    train_loss = train(model, train_loader, criterion, optimizer, device)
    test_loss, accuracy = evaluate(model, test_loader, criterion, device)
    train_losses.append(train_loss)
    test_losses.append(test_loss)
    accuracies.append(accuracy)
    print(f"Epoch {epoch + 1} Train Loss: {train_loss:.4f} Test Loss: {test_loss:.4f} Accuracy: {accuracy:.2f}%")


Epoch 1 Train Loss: 0.6101 Test Loss: 0.5156 Accuracy: 74.05%
Epoch 2 Train Loss: 0.5133 Test Loss: 0.5069 Accuracy: 76.70%
Epoch 3 Train Loss: 0.4938 Test Loss: 0.4778 Accuracy: 76.95%
Epoch 4 Train Loss: 0.4934 Test Loss: 0.4804 Accuracy: 77.30%
Epoch 5 Train Loss: 0.4751 Test Loss: 0.4529 Accuracy: 79.40%
Epoch 6 Train Loss: 0.4626 Test Loss: 0.4766 Accuracy: 78.60%
Epoch 7 Train Loss: 0.4493 Test Loss: 0.4413 Accuracy: 79.65%
Epoch 8 Train Loss: 0.4230 Test Loss: 0.4384 Accuracy: 79.50%
Epoch 9 Train Loss: 0.3973 Test Loss: 0.4444 Accuracy: 79.50%
Epoch 10 Train Loss: 0.3685 Test Loss: 0.4328 Accuracy: 80.55%


Загружаем матицу весов в файл, для дальнейшего использования в телеграмм-боте. 

In [None]:
weight = model.state_dict()
torch.save(weight, 'weight.pth')

Оценим качество обучения на тестовых данных

In [None]:
test_loss, accuracy = evaluate(model, test_loader, criterion, device)
print(f"Final Test Loss: {test_loss:.4f} Final Accuracy: {accuracy:.2f}%")

Final Test Loss: 0.4279 Final Accuracy: 80.55%
