Modele takie jak maxvit_t, swin_t czy vit_b_16 są dostępne dopiero od nowszych wersji torchvision (od wersji 0.13 lub 0.14). Jeśli użyjesz wcześniejszych wersji, pojawią się błędy, np. AttributeError: module torchvision.models has no attribute maxvit_t.
Sugestia: Sprawdź wersję biblioteki torchvision w swoim środowisku:
import torchvision
print(torchvision.__version__)
Jeśli używasz starszej wersji, zaktualizuj ją:
pip install --upgrade torchvision

In [None]:
import torch
import torch.nn as nn
import torchvision.models as models
from torch.utils.data import Dataset
from PIL import Image
import pandas as pd
from torchvision import transforms

class MultiInputModel(nn.Module):
    def __init__(self, num_classes=11, base_model='efficientnet_v2_m', filter_num_base=4):
        super(MultiInputModel, self).__init__()
        
        # Inicjalizacja modelu RGB
        self.base_model = base_model
        self.rgb_model, self.base_model_output_size = self._initialize_rgb_model(base_model)
        print(f"Model: {base_model}, base_model_output_size: {self.base_model_output_size}")

        # Inicjalizacja modelu binarnego
        self.binary_model = nn.Sequential(
            nn.Conv2d(1, filter_num_base * 2, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(filter_num_base * 2, filter_num_base * 4, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.AdaptiveAvgPool2d((1, 1)),
            nn.Flatten(),
            nn.Linear(filter_num_base * 4, 128),  # Dopasowanie wyjścia do 128
            nn.ReLU()
        )
        self.binary_model_output_size = 128

        # Warstwa łącząca
        total_input_size = self.base_model_output_size * 2 + self.binary_model_output_size
        print(f"Total input size to fc: {total_input_size}")
        self.fc = nn.Sequential(
            nn.Linear(total_input_size, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )

    def _initialize_rgb_model(self, base_model):
        """
        Inicjalizuje wybrany model sieci RGB i zwraca model oraz rozmiar jego wyjścia.
        """
        if base_model.startswith('efficientnet'):  # Obsługa EfficientNet i EfficientNetV2
            model = getattr(models, base_model)(pretrained=True)
            model.classifier = nn.Identity()
            if base_model.startswith('efficientnet_v2'):
                return model, 1280  # Wyjście dla EfficientNetV2-M
            return model, 1280  # Wyjście dla EfficientNet-B0/B1
        
        elif base_model == 'googlenet':
            model = models.googlenet(pretrained=True)
            model.fc = nn.Identity()
            return model, 1024
        
        elif base_model == 'inception_v3':
            model = models.inception_v3(pretrained=True, aux_logits=False)  # Wyłącz dodatkowe głowice
            model.fc = nn.Identity()
            return model, 2048
        
        elif base_model == 'mobilenet_v2':
            model = models.mobilenet_v2(pretrained=True)
            model.classifier = nn.Identity()
            return model, 1280
        
        elif base_model == 'mobilenet_v3_large' or base_model == 'mobilenet_v3_small':
            model = getattr(models, base_model)(pretrained=True)
            model.classifier = nn.Identity()
            return model, 576
        
        elif base_model.startswith('resnet'):  # Obsługa ResNet (np. resnet18, resnet50)
            model = getattr(models, base_model)(pretrained=True)
            model.fc = nn.Identity()
            return model, 2048 if '50' in base_model or '101' in base_model else 512  # Rozmiar zależny od wariantu
        
        elif base_model == 'swin_t':
            model = models.swin_t(pretrained=True)
            model.head = nn.Identity()
            return model, 768
        
        elif base_model == 'vit_b_16':  # VisionTransformer
            model = models.vit_b_16(pretrained=True)
            model.heads = nn.Identity()
            return model, 768

        else:
            raise ValueError(f"Unsupported base model: {base_model}")

    def forward(self, t_image, b_image, s_image):
        # Ekstrakcja cech dla widoków RGB
        t_features = self.rgb_model(t_image)  # Widok T
        b_features = self.rgb_model(b_image)  # Widok B
        #print(f"T Features: {t_features.shape}, B Features: {b_features.shape}")

        # Ekstrakcja cech dla obrazu binarnego
        s_features = self.binary_model(s_image)
        #print(f"S Features: {s_features.shape}")

        # Połączenie cech
        combined_features = torch.cat([t_features, b_features, s_features], dim=1)
        #print(f"Combined Features: {combined_features.shape}")

        # Klasyfikacja
        output = self.fc(combined_features)
        return output

    @staticmethod
    def get_input_size(base_model):
        """
        Zwraca wymagane wymiary wejściowe dla danego modelu.
        
        Args:
            base_model (str): Nazwa modelu bazowego.
            
        Returns:
            tuple: Wymiary wejściowe modelu (wysokość, szerokość).
        """
        if base_model.startswith('efficientnet') or base_model.startswith('mobilenet'):
            return (224, 224)  # EfficientNet, MobileNet wymagają 224x224
            
        elif base_model == 'googlenet':
            return (224, 224)  # GoogLeNet wymaga 224x224
        
        elif base_model == 'inception_v3':
            return (299, 299)  # Inception V3 wymaga 299x299
        
        elif base_model == 'maxvit_t':
            return (224, 224)  # MaxVit wymaga 224x224
        
        elif base_model.startswith('resnet'):
            return (224, 224)  # ResNet (np. ResNet50/ResNet101) wymaga 224x224
        
        elif base_model.startswith('squeezenet'):
            return (224, 224)  # SqueezeNet wymaga 224x224
        
        elif base_model == 'swin_t':
            return (224, 224)  # SwinTransformer wymaga 224x224
        
        elif base_model == 'vit_b_16':  # VisionTransformer
            return (224, 224)  # VisionTransformer wymaga 224x224
        
        else:
            raise ValueError(f"Unsupported base model: {base_model}")
    def forward2(self, t_image, b_image, s_image):
        # Pobierz wymagany rozmiar wejściowy
        input_size = self.get_input_size(self.base_model)
        
        # Weryfikacja wejścia `t_image` i `b_image` (RGB) oraz `s_image` (binary)
        assert t_image.shape[-2:] == input_size, f"Expected T image to be of size {input_size}, but got {t_image.shape[-2:]}"
        assert b_image.shape[-2:] == input_size, f"Expected B image to be of size {input_size}, but got {b_image.shape[-2:]}"
        assert s_image.shape[-2:] == input_size, f"Expected S image to be of size {input_size}, but got {s_image.shape[-2:]}"
        
        # Ekstrakcja cech dla widoków RGB
        t_features = self.rgb_model(t_image)  # Widok T
        b_features = self.rgb_model(b_image)  # Widok B

        # Ekstrakcja cech dla obrazu binarnego
        s_features = self.binary_model(s_image)

        # Połączenie cech
        combined_features = torch.cat([t_features, b_features, s_features], dim=1)

        # Klasyfikacja
        output = self.fc(combined_features)
        return output
    
class MultiInputDataset(Dataset):
    def __init__(self, csv_file, transform_rgb=None, transform_binary=None):
        self.data = pd.read_csv(csv_file)

        # Tworzenie mapowania nazw klas na liczby całkowite
        self.class_to_idx = {class_name: idx for idx, class_name in enumerate(self.data['class'].unique())}

        self.transform_rgb = transform_rgb
        self.transform_binary = transform_binary

    def __len__(self):
        return len(self.data) // 3  # Każde ziarno ma 3 obrazy

    def __getitem__(self, idx):
        # Pobierz trzy obrazy
        base_idx = idx * 3
        t_path = self.data.iloc[base_idx]['path']
        b_path = self.data.iloc[base_idx + 1]['path']
        s_path = self.data.iloc[base_idx + 2]['path']

        t_image = Image.open(t_path).convert("RGB")
        b_image = Image.open(b_path).convert("RGB")
        s_image = Image.open(s_path).convert("L")  # Obraz binarny

        # Transformacje
        if self.transform_rgb:
            t_image = self.transform_rgb(t_image)
            b_image = self.transform_rgb(b_image)
        if self.transform_binary:
            s_image = self.transform_binary(s_image)

        # Pobierz nazwę klasy i przekształć na indeks numeryczny
        class_name = self.data.iloc[base_idx]['class']
        label = self.class_to_idx[class_name]  # Mapowanie nazwy klasy na numer
        label = torch.tensor(label, dtype=torch.long)  # Konwersja na tensor PyTorch

        return t_image, b_image, s_image, label

#Krok 2: Transformacje dla obrazów RGB i binarnych:
# Transformacje dla obrazów RGB
transform_rgb = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Transformacje dla obrazów binarnych
transform_binary = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

In [2]:
from torch.utils.data import DataLoader
import torch.optim as optim
import os
from tqdm import tqdm
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Włącz blokowanie błędów CUDA
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

# Załaduj dane
train_dataset = MultiInputDataset("CSV/dataset/train.csv", transform_rgb=transform_rgb, transform_binary=transform_binary)
val_dataset = MultiInputDataset("CSV/dataset/val.csv", transform_rgb=transform_rgb, transform_binary=transform_binary)
test_dataset = MultiInputDataset("CSV/dataset/test.csv", transform_rgb=transform_rgb, transform_binary=transform_binary)

# List of models to train
models_list = ['efficientnet_v2_m', 'mobilenet_v3_small', 'resnet34', 'swin_t', 'vit_b_16']#'efficientnet_b0',
batch_sizes = {
    'efficientnet_b0': 32,
    'mobilenet_v3_small': 32,
    'efficientnet_v2_m': 16,
    'resnet34': 32,
    'swin_t' :16,
    'vit_b_16': 8  
}
for model_name in models_list:
    # Inicjalizacja modelu
    model = MultiInputModel(num_classes=11, base_model=model_name)  # Liczba klas
    model = model.to("cuda")  # Jeśli używasz GPU

    #Dynamicznie przydzielany batch_size

    batch_size = batch_sizes[model_name]
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    # Optymalizator i funkcja straty
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Zapis logów
    log_file = f"training_results/training_log_{model_name}.txt"
    with open(log_file, "w") as f:
        f.write("epoch,train_loss,val_loss,train_accuracy,val_accuracy,train_precision,val_precision,train_recall,val_recall,train_f1,val_f1\n")

    # Wczesne zatrzymanie - parametry
    early_stop_patience = 5  # Liczba epok bez poprawy
    best_val_loss = float("inf")
    patience_counter = 0
    best_model_path = f"training_results/best_model_{model_name}.pth"

    # Pętla treningowa
    num_epochs = 50
    for epoch in range(num_epochs):
        # === TRENING ===
        print(f"Epoch {epoch + 1}/{num_epochs}")
        model.train()
        train_loss = 0
        train_true = []
        train_pred = []

        # Dodaj pasek postępu do pętli batchy
        with tqdm(total=len(train_loader), desc="Training", unit="batch") as pbar:
            for t_image, b_image, s_image, labels in train_loader:
                t_image, b_image, s_image, labels = (
                    t_image.to("cuda"),
                    b_image.to("cuda"),
                    s_image.to("cuda"),
                    labels.to("cuda")
                )

                # Oblicz predykcje i stratę
                outputs = model(t_image, b_image, s_image)
                loss = criterion(outputs, labels)

                # Backpropagation
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                train_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                train_true.extend(labels.cpu().numpy())
                train_pred.extend(predicted.cpu().numpy())
                pbar.set_postfix({"loss": f"{train_loss / (pbar.n + 1):.4f}"})  # Wyświetl średnią stratę
                pbar.update(1)  # Aktualizuj pasek postępu o 1 krok

        train_loss /= len(train_loader)  # Średnia strata w treningu
        train_accuracy = accuracy_score(train_true, train_pred)
        train_precision = precision_score(train_true, train_pred, average="weighted")
        train_recall = recall_score(train_true, train_pred, average="weighted")
        train_f1 = f1_score(train_true, train_pred, average="weighted")
        #print(f"Train Loss: {train_loss:.4f}")

        # === WALIDACJA ===
        model.eval()
        val_loss = 0
        val_true = []
        val_pred = []

        with torch.no_grad():  # Wyłącz gradienty
            with tqdm(total=len(val_loader), desc="Validation", unit="batch") as pbar_val:
                for t_image, b_image, s_image, labels in val_loader:
                    t_image, b_image, s_image, labels = (
                        t_image.to("cuda"),
                        b_image.to("cuda"),
                        s_image.to("cuda"),
                        labels.to("cuda")
                    )
                    outputs = model(t_image, b_image, s_image)
                    loss = criterion(outputs, labels)
                    val_loss += loss.item()

                    _, predicted = torch.max(outputs, 1)
                    val_true.extend(labels.cpu().numpy())
                    val_pred.extend(predicted.cpu().numpy())

                    pbar_val.update(1)  # Aktualizuj pasek postępu walidacji

        val_loss /= len(val_loader)  # Średnia strata w walidacji
        val_accuracy = accuracy_score(val_true, val_pred)
        val_precision = precision_score(val_true, val_pred, average="weighted")
        val_recall = recall_score(val_true, val_pred, average="weighted")
        val_f1 = f1_score(val_true, val_pred, average="weighted")
        #print(f"Val Loss: {val_loss:.4f}")

        # === LOGI ===
        #print(f"Epoch {epoch + 1}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")
        with open(log_file, "a") as f:
            f.write(
                f"{epoch + 1},{train_loss:.4f},{val_loss:.4f},{train_accuracy:.4f},{val_accuracy:.4f},"
                f"{train_precision:.4f},{val_precision:.4f},{train_recall:.4f},{val_recall:.4f},{train_f1:.4f},{val_f1:.4f}\n"
            )

        # === WCZESNE ZATRZYMANIE ===
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
            # Zapis najlepszego modelu
            torch.save(model, best_model_path) #Zapisanie modelu i architektury w pliku pth
            print(f"Best model saved at epoch {epoch + 1}")
        else:
            patience_counter += 1
            print(f"No improvement in val loss for {patience_counter} epoch(s)")

        if patience_counter >= early_stop_patience:
            print("Early stopping triggered. Training stopped.")
            break
    """
    # === TEST ===
    # Wczytaj najlepszy model
    model.load(torch.load(best_model_path))
    model.eval()

    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for t_image, b_image, s_image, labels in test_loader:
            t_image, b_image, s_image, labels = (
                t_image.to("cuda"),
                b_image.to("cuda"),
                s_image.to("cuda"),
                labels.to("cuda")
            )
            outputs = model(t_image, b_image, s_image)
            loss = criterion(outputs, labels)
            test_loss += loss.item()

            # Oblicz dokładność
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

    test_loss /= len(test_loader)
    accuracy = correct / total

    # Zapis wyniku testu
    log_file_test = f"training_results/test_log_{model_name}.txt"
    with open(log_file_test, "w") as f_t:
            f_t.write(f"Test Loss: {test_loss:.4f}, Test Accuracy: {accuracy:.4f}")
    """
    # Zwalnianie pamięci po zakończeniu pracy z modelem
    del model  # Usuń model z pamięci
    torch.cuda.empty_cache()  # Wyczyść pamięć GPU




Model: efficientnet_v2_m, base_model_output_size: 1280
Total input size to fc: 2688
Epoch 1/50


Training: 100%|██████████| 4344/4344 [54:35<00:00,  1.33batch/s, loss=1.9045]
Validation: 100%|██████████| 931/931 [06:03<00:00,  2.56batch/s]


Best model saved at epoch 1
Epoch 2/50


Training: 100%|██████████| 4344/4344 [54:26<00:00,  1.33batch/s, loss=1.2022]
Validation: 100%|██████████| 931/931 [06:05<00:00,  2.55batch/s]


Best model saved at epoch 2
Epoch 3/50


Training: 100%|██████████| 4344/4344 [54:25<00:00,  1.33batch/s, loss=0.7769]
Validation: 100%|██████████| 931/931 [06:05<00:00,  2.55batch/s]


Best model saved at epoch 3
Epoch 4/50


Training: 100%|██████████| 4344/4344 [54:23<00:00,  1.33batch/s, loss=0.5478]
Validation: 100%|██████████| 931/931 [06:05<00:00,  2.55batch/s]


Best model saved at epoch 4
Epoch 5/50


Training: 100%|██████████| 4344/4344 [54:25<00:00,  1.33batch/s, loss=0.4333]
Validation: 100%|██████████| 931/931 [06:05<00:00,  2.54batch/s]


Best model saved at epoch 5
Epoch 6/50


Training: 100%|██████████| 4344/4344 [54:23<00:00,  1.33batch/s, loss=0.3592]
Validation: 100%|██████████| 931/931 [06:05<00:00,  2.55batch/s]


Best model saved at epoch 6
Epoch 7/50


Training: 100%|██████████| 4344/4344 [54:24<00:00,  1.33batch/s, loss=0.3168]
Validation: 100%|██████████| 931/931 [06:04<00:00,  2.55batch/s]


No improvement in val loss for 1 epoch(s)
Epoch 8/50


Training: 100%|██████████| 4344/4344 [54:22<00:00,  1.33batch/s, loss=0.2781]
Validation: 100%|██████████| 931/931 [06:04<00:00,  2.56batch/s]


No improvement in val loss for 2 epoch(s)
Epoch 9/50


Training: 100%|██████████| 4344/4344 [54:20<00:00,  1.33batch/s, loss=0.2514]
Validation: 100%|██████████| 931/931 [06:04<00:00,  2.55batch/s]


No improvement in val loss for 3 epoch(s)
Epoch 10/50


Training: 100%|██████████| 4344/4344 [55:06<00:00,  1.31batch/s, loss=0.2297]
Validation: 100%|██████████| 931/931 [06:04<00:00,  2.56batch/s]


Best model saved at epoch 10
Epoch 11/50


Training: 100%|██████████| 4344/4344 [54:21<00:00,  1.33batch/s, loss=0.2118]
Validation: 100%|██████████| 931/931 [06:03<00:00,  2.56batch/s]


No improvement in val loss for 1 epoch(s)
Epoch 12/50


Training: 100%|██████████| 4344/4344 [54:20<00:00,  1.33batch/s, loss=0.1921]
Validation: 100%|██████████| 931/931 [06:17<00:00,  2.47batch/s]


No improvement in val loss for 2 epoch(s)
Epoch 13/50


Training: 100%|██████████| 4344/4344 [54:41<00:00,  1.32batch/s, loss=0.1831]
Validation: 100%|██████████| 931/931 [06:05<00:00,  2.55batch/s]


No improvement in val loss for 3 epoch(s)
Epoch 14/50


Training: 100%|██████████| 4344/4344 [54:20<00:00,  1.33batch/s, loss=0.1702]
Validation: 100%|██████████| 931/931 [06:04<00:00,  2.55batch/s]


Best model saved at epoch 14
Epoch 15/50


Training: 100%|██████████| 4344/4344 [54:22<00:00,  1.33batch/s, loss=0.1612]
Validation: 100%|██████████| 931/931 [06:04<00:00,  2.55batch/s]


No improvement in val loss for 1 epoch(s)
Epoch 16/50


Training: 100%|██████████| 4344/4344 [54:19<00:00,  1.33batch/s, loss=0.1582]
Validation: 100%|██████████| 931/931 [06:04<00:00,  2.56batch/s]


No improvement in val loss for 2 epoch(s)
Epoch 17/50


Training: 100%|██████████| 4344/4344 [54:21<00:00,  1.33batch/s, loss=0.1461]
Validation: 100%|██████████| 931/931 [06:04<00:00,  2.55batch/s]


No improvement in val loss for 3 epoch(s)
Epoch 18/50


Training: 100%|██████████| 4344/4344 [54:22<00:00,  1.33batch/s, loss=0.1360]
Validation: 100%|██████████| 931/931 [06:04<00:00,  2.55batch/s]


No improvement in val loss for 4 epoch(s)
Epoch 19/50


Training: 100%|██████████| 4344/4344 [54:46<00:00,  1.32batch/s, loss=0.1310] 
Validation: 100%|██████████| 931/931 [06:05<00:00,  2.55batch/s]


No improvement in val loss for 5 epoch(s)
Early stopping triggered. Training stopped.


Downloading: "https://download.pytorch.org/models/mobilenet_v3_small-047dcff4.pth" to /home/mk/.cache/torch/hub/checkpoints/mobilenet_v3_small-047dcff4.pth
100%|██████████| 9.83M/9.83M [00:00<00:00, 10.8MB/s]


Model: mobilenet_v3_small, base_model_output_size: 576
Total input size to fc: 1280
Epoch 1/50


Training: 100%|██████████| 2172/2172 [20:54<00:00,  1.73batch/s, loss=0.4512]
Validation: 100%|██████████| 466/466 [04:00<00:00,  1.94batch/s]


Best model saved at epoch 1
Epoch 2/50


Training: 100%|██████████| 2172/2172 [20:42<00:00,  1.75batch/s, loss=0.2062]
Validation: 100%|██████████| 466/466 [04:00<00:00,  1.94batch/s]


Best model saved at epoch 2
Epoch 3/50


Training: 100%|██████████| 2172/2172 [20:44<00:00,  1.75batch/s, loss=0.1614]
Validation: 100%|██████████| 466/466 [04:00<00:00,  1.94batch/s]


No improvement in val loss for 1 epoch(s)
Epoch 4/50


Training: 100%|██████████| 2172/2172 [20:44<00:00,  1.75batch/s, loss=0.1326]
Validation: 100%|██████████| 466/466 [04:00<00:00,  1.94batch/s]


No improvement in val loss for 2 epoch(s)
Epoch 5/50


Training: 100%|██████████| 2172/2172 [20:43<00:00,  1.75batch/s, loss=0.1180]
Validation: 100%|██████████| 466/466 [03:59<00:00,  1.94batch/s]


No improvement in val loss for 3 epoch(s)
Epoch 6/50


Training: 100%|██████████| 2172/2172 [20:44<00:00,  1.74batch/s, loss=0.1047]
Validation: 100%|██████████| 466/466 [04:00<00:00,  1.94batch/s]


No improvement in val loss for 4 epoch(s)
Epoch 7/50


Training: 100%|██████████| 2172/2172 [20:43<00:00,  1.75batch/s, loss=0.0948]
Validation: 100%|██████████| 466/466 [03:59<00:00,  1.94batch/s]


No improvement in val loss for 5 epoch(s)
Early stopping triggered. Training stopped.


Downloading: "https://download.pytorch.org/models/resnet34-b627a593.pth" to /home/mk/.cache/torch/hub/checkpoints/resnet34-b627a593.pth
100%|██████████| 83.3M/83.3M [00:07<00:00, 11.7MB/s]


Model: resnet34, base_model_output_size: 512
Total input size to fc: 1152
Epoch 1/50


Training: 100%|██████████| 2172/2172 [25:14<00:00,  1.43batch/s, loss=1.6726]
Validation: 100%|██████████| 466/466 [04:24<00:00,  1.76batch/s]


Best model saved at epoch 1
Epoch 2/50


Training: 100%|██████████| 2172/2172 [24:30<00:00,  1.48batch/s, loss=0.8187]
Validation: 100%|██████████| 466/466 [04:15<00:00,  1.82batch/s]


Best model saved at epoch 2
Epoch 3/50


Training: 100%|██████████| 2172/2172 [24:33<00:00,  1.47batch/s, loss=0.5215]
Validation: 100%|██████████| 466/466 [04:15<00:00,  1.82batch/s]


Best model saved at epoch 3
Epoch 4/50


Training: 100%|██████████| 2172/2172 [24:33<00:00,  1.47batch/s, loss=0.3857]
Validation: 100%|██████████| 466/466 [04:16<00:00,  1.82batch/s]


Best model saved at epoch 4
Epoch 5/50


Training: 100%|██████████| 2172/2172 [24:35<00:00,  1.47batch/s, loss=0.3127]
Validation: 100%|██████████| 466/466 [04:15<00:00,  1.82batch/s]


No improvement in val loss for 1 epoch(s)
Epoch 6/50


Training: 100%|██████████| 2172/2172 [24:34<00:00,  1.47batch/s, loss=0.2584]
Validation: 100%|██████████| 466/466 [04:15<00:00,  1.82batch/s]


No improvement in val loss for 2 epoch(s)
Epoch 7/50


Training: 100%|██████████| 2172/2172 [24:32<00:00,  1.47batch/s, loss=0.2182]
Validation: 100%|██████████| 466/466 [04:16<00:00,  1.82batch/s]


Best model saved at epoch 7
Epoch 8/50


Training: 100%|██████████| 2172/2172 [24:32<00:00,  1.47batch/s, loss=0.1866]
Validation: 100%|██████████| 466/466 [04:16<00:00,  1.82batch/s]


No improvement in val loss for 1 epoch(s)
Epoch 9/50


Training: 100%|██████████| 2172/2172 [24:35<00:00,  1.47batch/s, loss=0.1601]
Validation: 100%|██████████| 466/466 [04:16<00:00,  1.82batch/s]


Best model saved at epoch 9
Epoch 10/50


Training: 100%|██████████| 2172/2172 [24:31<00:00,  1.48batch/s, loss=0.1425]
Validation: 100%|██████████| 466/466 [04:16<00:00,  1.82batch/s]


Best model saved at epoch 10
Epoch 11/50


Training: 100%|██████████| 2172/2172 [24:31<00:00,  1.48batch/s, loss=0.1236]
Validation: 100%|██████████| 466/466 [04:15<00:00,  1.82batch/s]


No improvement in val loss for 1 epoch(s)
Epoch 12/50


Training: 100%|██████████| 2172/2172 [24:32<00:00,  1.48batch/s, loss=0.1104]
Validation: 100%|██████████| 466/466 [04:15<00:00,  1.82batch/s]


No improvement in val loss for 2 epoch(s)
Epoch 13/50


Training: 100%|██████████| 2172/2172 [24:31<00:00,  1.48batch/s, loss=0.0968]
Validation: 100%|██████████| 466/466 [04:15<00:00,  1.82batch/s]


No improvement in val loss for 3 epoch(s)
Epoch 14/50


Training: 100%|██████████| 2172/2172 [24:32<00:00,  1.47batch/s, loss=0.0890]
Validation: 100%|██████████| 466/466 [04:15<00:00,  1.82batch/s]


No improvement in val loss for 4 epoch(s)
Epoch 15/50


Training: 100%|██████████| 2172/2172 [24:32<00:00,  1.48batch/s, loss=0.0794]
Validation: 100%|██████████| 466/466 [04:16<00:00,  1.82batch/s]


No improvement in val loss for 5 epoch(s)
Early stopping triggered. Training stopped.


Downloading: "https://download.pytorch.org/models/swin_t-704ceda3.pth" to /home/mk/.cache/torch/hub/checkpoints/swin_t-704ceda3.pth
100%|██████████| 108M/108M [00:09<00:00, 11.7MB/s] 


Model: swin_t, base_model_output_size: 768
Total input size to fc: 1664
Epoch 1/50


Training: 100%|██████████| 4344/4344 [37:58<00:00,  1.91batch/s, loss=2.4007]
Validation: 100%|██████████| 931/931 [05:18<00:00,  2.92batch/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Best model saved at epoch 1
Epoch 2/50


Training: 100%|██████████| 4344/4344 [37:59<00:00,  1.91batch/s, loss=2.3969]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Validation: 100%|██████████| 931/931 [05:18<00:00,  2.93batch/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Best model saved at epoch 2
Epoch 3/50


Training: 100%|██████████| 4344/4344 [37:57<00:00,  1.91batch/s, loss=2.3969]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Validation: 100%|██████████| 931/931 [05:17<00:00,  2.93batch/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


No improvement in val loss for 1 epoch(s)
Epoch 4/50


Training: 100%|██████████| 4344/4344 [37:57<00:00,  1.91batch/s, loss=2.3969]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Validation: 100%|██████████| 931/931 [05:18<00:00,  2.93batch/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Best model saved at epoch 4
Epoch 5/50


Training: 100%|██████████| 4344/4344 [38:00<00:00,  1.90batch/s, loss=2.3969]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Validation:  86%|████████▋ | 805/931 [04:46<00:44,  2.81batch/s]


KeyboardInterrupt: 

In [None]:
# === TEST ===
# Wczytaj najlepszy model
model = torch.load(best_model_path)
model.eval()

test_loss = 0
correct = 0
total = 0
with torch.no_grad():
    for t_image, b_image, s_image, labels in test_loader:
        t_image, b_image, s_image, labels = (
            t_image.to("cuda"),
            b_image.to("cuda"),
            s_image.to("cuda"),
            labels.to("cuda")
        )
        outputs = model(t_image, b_image, s_image)
        loss = criterion(outputs, labels)
        test_loss += loss.item()

        # Oblicz dokładność
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

test_loss /= len(test_loader)
accuracy = correct / total

# Zapis wyniku testu
log_file_test = f"training_results/test_log_{model_name}.txt"
with open(log_file_test, "w") as f_t:
        f_t.write(f"Test Loss: {test_loss:.4f}, Test Accuracy: {accuracy:.4f}")

# Zwalnianie pamięci po zakończeniu pracy z modelem
del model  # Usuń model z pamięci
torch.cuda.empty_cache()  # Wyczyść pamięć GPU


*******************