In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision.models import resnet18

# Ustawienia
batch_size = 64
num_classes = 50
learning_rate = 0.01
epochs = 25

# Przygotowanie danych
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)

# Wybieram pierwsze 50 klas
trainset.targets = torch.tensor(trainset.targets)
train_indices = torch.where(trainset.targets < num_classes)[0]
trainloader = torch.utils.data.DataLoader(torch.utils.data.Subset(trainset, train_indices), batch_size=batch_size, shuffle=True)

testset.targets = torch.tensor(testset.targets)
test_indices = torch.where(testset.targets < num_classes)[0]
testloader = torch.utils.data.DataLoader(torch.utils.data.Subset(testset, test_indices), batch_size=batch_size, shuffle=False)

# Definicja modelu
class CNN(nn.Module):
    def __init__(self, num_classes):
        super(CNN, self).__init__()
        self.feature_extractor = resnet18(pretrained=False)
        self.feature_extractor.fc = nn.Linear(self.feature_extractor.fc.in_features, num_classes)

    def forward(self, x):
        return self.feature_extractor(x)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = CNN(num_classes=num_classes).to(device)

# Funkcja kosztu i optymalizator
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Trenowanie modelu
def train_model():
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for inputs, labels in trainloader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(trainloader)}")

# Testowanie modelu
def test_model():
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in testloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f"Accuracy: {100 * correct / total}%")

train_model()
test_model()


Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ./data/cifar-100-python.tar.gz


100%|██████████| 169M/169M [00:44<00:00, 3.78MB/s]


Extracting ./data/cifar-100-python.tar.gz to ./data
Files already downloaded and verified




Epoch 1/25, Loss: 3.590927236830182
Epoch 2/25, Loss: 3.065222014551577
Epoch 3/25, Loss: 2.6948544107129813
Epoch 4/25, Loss: 2.334001984742596
Epoch 5/25, Loss: 2.004494527111883
Epoch 6/25, Loss: 1.6793539179560473
Epoch 7/25, Loss: 1.3771285439086387
Epoch 8/25, Loss: 1.0697022126153912
Epoch 9/25, Loss: 0.7690570374279071
Epoch 10/25, Loss: 0.47923010797299387
Epoch 11/25, Loss: 0.28561446489885334
Epoch 12/25, Loss: 0.18480745138948226
Epoch 13/25, Loss: 0.1455725004463016
Epoch 14/25, Loss: 0.1554590116643235
Epoch 15/25, Loss: 0.11058170236932957
Epoch 16/25, Loss: 0.09902711668292351
Epoch 17/25, Loss: 0.11785474658021917
Epoch 18/25, Loss: 0.07839409912319477
Epoch 19/25, Loss: 0.08542499877036075
Epoch 20/25, Loss: 0.09230655690778972
Epoch 21/25, Loss: 0.08127504912421793
Epoch 22/25, Loss: 0.07493927152088993
Epoch 23/25, Loss: 0.0701323268539689
Epoch 24/25, Loss: 0.05750813269856698
Epoch 25/25, Loss: 0.06439444224369209
Accuracy: 55.06%


In [5]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics.pairwise import cosine_similarity

# Użycie wytrenowanego modelu jako ekstraktora cech
class FeatureExtractor(nn.Module):
    def __init__(self, model):
        super(FeatureExtractor, self).__init__()
        self.feature_extractor = nn.Sequential(*list(model.feature_extractor.children())[:-1])  # Bez ostatniej warstwy FC

    def forward(self, x):
        x = self.feature_extractor(x)
        return x.view(x.size(0), -1)

# Podział zbioru na A i B
half_classes = num_classes // 2
trainset_A_indices = np.where(np.array(trainset.targets) < half_classes)[0]
trainset_B_indices = np.where(np.array(trainset.targets) >= half_classes)[0]

trainloader_A = torch.utils.data.DataLoader(torch.utils.data.Subset(trainset, trainset_A_indices), batch_size=batch_size, shuffle=True)
trainloader_B = torch.utils.data.DataLoader(torch.utils.data.Subset(trainset, trainset_B_indices), batch_size=batch_size, shuffle=True)

testset_B_indices = np.where(np.array(testset.targets) >= half_classes)[0]
testloader_B = torch.utils.data.DataLoader(torch.utils.data.Subset(testset, testset_B_indices), batch_size=batch_size, shuffle=False)

# Trenujemy ekstraktor cech tylko na zbiorze A
model.eval()
feature_extractor_A = FeatureExtractor(model).to(device)

def extract_features(loader, feature_extractor):
    features, labels = [], []
    with torch.no_grad():
        for inputs, targets in loader:
            inputs, targets = inputs.to(device), targets.to(device)
            feat = feature_extractor(inputs).cpu().numpy()
            features.append(feat)
            labels.append(targets.cpu().numpy())
    return np.vstack(features), np.hstack(labels)

train_features_A, train_labels_A = extract_features(trainloader_A, feature_extractor_A)
test_features_A, test_labels_A = extract_features(testloader, feature_extractor_A)

# Testowanie dla różnych wielkości podzbiorów na zbiorze A
def sample_subset(features, labels, samples_per_class):
    sampled_features, sampled_labels = [], []
    unique_classes = np.unique(labels)
    for cls in unique_classes:
        indices = np.where(labels == cls)[0]
        chosen_indices = np.random.choice(indices, min(samples_per_class, len(indices)), replace=False)
        sampled_features.append(features[chosen_indices])
        sampled_labels.append(labels[chosen_indices])
    return np.vstack(sampled_features), np.hstack(sampled_labels)

def knn_classification(train_features, train_labels, test_features, test_labels, k=5):
    knn = KNeighborsClassifier(n_neighbors=k, metric='cosine')
    knn.fit(train_features, train_labels)
    predictions = knn.predict(test_features)
    accuracy = np.mean(predictions == test_labels)
    print(f'Accuracy dla k={k}: {accuracy * 100:.2f}%')

for subset_size in [1, 5, 10]:
    sampled_train_features_A, sampled_train_labels_A = sample_subset(train_features_A, train_labels_A, subset_size)
    print(f'Czesc A: Klasyfikacja dla podzbiorow o rozmiarze {subset_size}')
    knn_classification(sampled_train_features_A, sampled_train_labels_A, test_features_A, test_labels_A, k=5)


train_features_B, train_labels_B = extract_features(trainloader_B, feature_extractor_A)
test_features_B, test_labels_B = extract_features(testloader_B, feature_extractor_A)

# Klasyfikacja KNN na zbiorze B
for subset_size in [1, 5, 10]:
    sampled_train_features_B, sampled_train_labels_B = sample_subset(train_features_B, train_labels_B, subset_size)
    print(f'Czesc B: Klasyfikacja dla podzbiorow o rozmiarze {subset_size}')
    knn_classification(sampled_train_features_B, sampled_train_labels_B, test_features_B, test_labels_B, k=5)


Czesc A: Klasyfikacja dla podzbiorow o rozmiarze 1
Accuracy dla k=5: 7.78%
Czesc A: Klasyfikacja dla podzbiorow o rozmiarze 5
Accuracy dla k=5: 24.86%
Czesc A: Klasyfikacja dla podzbiorow o rozmiarze 10
Accuracy dla k=5: 27.46%
Czesc B: Klasyfikacja dla podzbiorow o rozmiarze 1
Accuracy dla k=5: 8.52%
Czesc B: Klasyfikacja dla podzbiorow o rozmiarze 5
Accuracy dla k=5: 20.43%
Czesc B: Klasyfikacja dla podzbiorow o rozmiarze 10
Accuracy dla k=5: 23.35%
