In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
#Labraries
!pip install transformers
!pip install torch torchvision

import os
from pathlib import Path
import shutil
import torch
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from transformers import SwinForImageClassification, AutoFeatureExtractor
from PIL import Image
import pandas as pd
import numpy as np



In [None]:
#Path to images
data_path = '/content/drive/MyDrive/Cancer/AugImages'

In [None]:
#Checking for the existence of directories
if not os.path.exists(data_path):
    raise FileNotFoundError(f"Directory {data_path} not found on Google Drive.")



In [None]:
#Checking for the existence of subdirectories
subdirs = ['AugHSIL', 'AugLSIL', 'AugSCC', 'Neg250']
for subdir in subdirs:
    if not os.path.exists(os.path.join(data_path, subdir)):
        raise FileNotFoundError(f"Directory {subdir} not found on {data_path}.")


In [None]:
#Copying data to a local directory
local_data_path = '/content/AugImages'
if not os.path.exists(local_data_path):
    shutil.copytree(data_path, local_data_path)


In [None]:
#Checking the structure of local data
print("Contents of the local directoryи:")
print(os.listdir(local_data_path))
for subdir in subdirs:
    print(f"Contents {subdir}: {os.listdir(os.path.join(local_data_path, subdir))}")


Содержимое локальной директории:
['Neg250', 'AugSCC', 'AugHSIL', 'AugLSIL']
Содержимое AugHSIL: ['HSIL_7 (6)_aug_0.jpg', 'HSIL_1 (9)_aug_extra_39.jpg', 'HSIL_5 (17)_aug_extra_23.jpg', 'HSIL_6 (12)_aug_extra_34.jpg', 'HSIL_5 (16)_aug_extra_18.jpg', 'HSIL_8 (5)_aug_0.jpg', 'HSIL_7 (14)_aug_extra_63.jpg', 'HSIL_6 (1)_aug_extra_26.jpg', 'HSIL_5 (10)_aug_extra_55.jpg', 'HSIL_10 (12)_aug_extra_60.jpg', 'HSIL_10 (12)_aug_0.jpg', 'HSIL_6 (20)_aug_0.jpg', 'HSIL_5 (20)_aug_extra_0.jpg', 'HSIL_6 (11)_aug_0.jpg', 'HSIL_7 (4)_aug_0.jpg', 'HSIL_10 (18)_aug_extra_20.jpg', 'HSIL_4 (9)_aug_0.jpg', 'HSIL_4 (2)_aug_0.jpg', 'HSIL_7 (11)_aug_0.jpg', 'HSIL_10 (1)_aug_extra_28.jpg', 'HSIL_5 (19)_aug_0.jpg', 'HSIL_6 (12)_aug_0.jpg', 'HSIL_2 (2)_aug_0.jpg', 'HSIL_3 (4)_aug_0.jpg', 'HSIL_6 (27)_aug_0.jpg', 'HSIL_4 (16)_aug_extra_19.jpg', 'HSIL_5 (21)_aug_0.jpg', 'HSIL_7 (1)_aug_extra_15.jpg', 'HSIL_6 (2)_aug_0.jpg', 'HSIL_10 (10)_aug_0.jpg', 'HSIL_1 (13)_aug_extra_68.jpg', 'HSIL_2 (10)_aug_extra_31.jpg', 'HSIL_

In [None]:
#Parameters
batch_size = 16
num_epochs = 5

#Images preporation
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

class CustomDataset(Dataset):
    def __init__(self, root_dirs, class_labels, transform=None):
        self.transform = transform
        self.image_paths = []
        self.labels = []
        for root_dir, class_label in zip(root_dirs, class_labels):
            paths = [os.path.join(root, name)
                     for root, dirs, files in os.walk(root_dir)
                     for name in files if name.lower().endswith(('jpg', 'jpeg', 'png'))]
            self.image_paths.extend(paths)
            self.labels.extend([class_label] * len(paths))

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')
        label = self.labels[idx]
        if self.transform:
            image = self.transform(image)
        return image, label, img_path

In [None]:
#Creating a dataset with all classes
class_labels = [0, 1, 2, 3]  #Class labels for each type of cancer
datasets = CustomDataset([os.path.join(local_data_path, x) for x in subdirs], class_labels, transform)
dataloader = DataLoader(datasets, batch_size=batch_size, shuffle=True)


In [None]:
#Loading the model
model = SwinForImageClassification.from_pretrained('microsoft/swin-base-patch4-window7-224', num_labels=4, ignore_mismatched_sizes=True)


Some weights of SwinForImageClassification were not initialized from the model checkpoint at microsoft/swin-base-patch4-window7-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([4]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 1024]) in the checkpoint and torch.Size([4, 1024]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
#Setting up the computation device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


SwinForImageClassification(
  (swin): SwinModel(
    (embeddings): SwinEmbeddings(
      (patch_embeddings): SwinPatchEmbeddings(
        (projection): Conv2d(3, 128, kernel_size=(4, 4), stride=(4, 4))
      )
      (norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): SwinEncoder(
      (layers): ModuleList(
        (0): SwinStage(
          (blocks): ModuleList(
            (0-1): 2 x SwinLayer(
              (layernorm_before): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
              (attention): SwinAttention(
                (self): SwinSelfAttention(
                  (query): Linear(in_features=128, out_features=128, bias=True)
                  (key): Linear(in_features=128, out_features=128, bias=True)
                  (value): Linear(in_features=128, out_features=128, bias=True)
                  (dropout): Dropout(p=0.0, inplace=False)
                )
                (output): SwinSelfO

In [None]:
#Optimizer and loss function
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
criterion = torch.nn.CrossEntropyLoss()


In [None]:
#Model training
for epoch in range(num_epochs):
    model.train()
    for inputs, labels, _ in dataloader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs).logits
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}")

#Saving of the trained model
model.save_pretrained('/content/swin-trained')

Epoch 1/5, Loss: 0.208469420671463
Epoch 2/5, Loss: 0.019107971340417862
Epoch 3/5, Loss: 0.0032594443764537573
Epoch 4/5, Loss: 0.1489783525466919
Epoch 5/5, Loss: 0.03742654249072075


In [None]:
#Extracting feature vectors
model.eval()
feature_extractor = AutoFeatureExtractor.from_pretrained('microsoft/swin-base-patch4-window7-224')

def extract_features(data_loader):
    features_list = []
    names_list = []
    with torch.no_grad():
        for inputs, _, paths in data_loader:
            inputs = inputs.to(device)
            outputs = model(inputs, output_hidden_states=True)
            features = outputs.hidden_states[-1][:, 0, :].cpu().numpy()
            for feature, path in zip(features, paths):
                features_list.append(feature)
                names_list.append(Path(path).name)
    return features_list, names_list

dataloaders_with_paths = {x: DataLoader(CustomDataset([os.path.join(local_data_path, x)], [class_labels[i]], transform),
                                        batch_size=batch_size, shuffle=False)
                          for i, x in enumerate(subdirs)}




In [None]:
#Saving feature vectors to CSV files by category
output_dir = '/content/drive/MyDrive/Cancer/Features'
os.makedirs(output_dir, exist_ok=True)

for category in subdirs:
    features, names = extract_features(dataloaders_with_paths[category])
    df = pd.DataFrame(features)
    df.insert(0, 'name', names)
    df.to_csv(f'{output_dir}/{category}_features.csv', index=False)

print(f"Files have been moved to {output_dir}")


Файлы перемещены в /content/drive/MyDrive/Cancer/Features
