In [55]:
import torch
import yaml
from torchvision import transforms, datasets
import torchvision
import numpy as np
import os
from sklearn import preprocessing
from torch.utils.data.dataloader import DataLoader

In [56]:
#sys.path.append('../')
from resnet_base_network import ResNet18

In [57]:
batch_size = 512
data_transforms = torchvision.transforms.Compose([transforms.ToTensor()])

In [58]:
config = yaml.load(open("./config.yaml", "r"), Loader=yaml.FullLoader)

In [60]:
train_dataset = datasets.STL10('./data', split='train', download=True,
                               transform=data_transforms)

test_dataset = datasets.STL10('./data', split='test', download=False,
                               transform=data_transforms)

Using downloaded and verified file: ./data/stl10_binary.tar.gz
Extracting ./data/stl10_binary.tar.gz to ./data


In [61]:
print("Input shape:", train_dataset[0][0].shape)

Input shape: torch.Size([3, 96, 96])


In [62]:
train_loader = DataLoader(train_dataset, batch_size=batch_size,
                          num_workers=0, drop_last=False, shuffle=True)

test_loader = DataLoader(test_dataset, batch_size=batch_size,
                          num_workers=0, drop_last=False, shuffle=True)

In [63]:
device = 'cpu' #'cuda' if torch.cuda.is_available() else 'cpu'
encoder = ResNet18(**config['network'])
output_feature_dim = encoder.projetion.net[0].in_features



In [64]:
#load pre-trained parameters
load_params = torch.load(os.path.join('./Models/BYOL.pth'),
                         map_location=torch.device(torch.device(device)))

if 'online_network_state_dict' in load_params:
    encoder.load_state_dict(load_params['online_network_state_dict'])
    print("Parameters successfully loaded.")

# remove the projection head
encoder = torch.nn.Sequential(*list(encoder.children())[:-1])    
encoder = encoder.to(device)

  load_params = torch.load(os.path.join('./Models/BYOL.pth'),


In [65]:
encoder

Sequential(
  (0): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  

In [66]:
class LogisticRegression(torch.nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LogisticRegression, self).__init__()
        self.linear = torch.nn.Linear(input_dim, output_dim)
        
    def forward(self, x):
        return self.linear(x)

In [67]:
logreg = LogisticRegression(output_feature_dim, 10)
logreg = logreg.to(device)

In [68]:
def get_features_from_encoder(encoder, loader):
    
    x_train = []
    y_train = []

    # get the features from the pre-trained model
    for i, (x, y) in enumerate(loader):
        with torch.no_grad():
            feature_vector = encoder(x)
            x_train.extend(feature_vector)
            y_train.extend(y.numpy())

            
    x_train = torch.stack(x_train)
    y_train = torch.tensor(y_train)
    return x_train, y_train

In [69]:
encoder.eval()
x_train, y_train = get_features_from_encoder(encoder, train_loader)
x_test, y_test = get_features_from_encoder(encoder, test_loader)

if len(x_train.shape) > 2:
    x_train = torch.mean(x_train, dim=[2, 3])
    x_test = torch.mean(x_test, dim=[2, 3])
    
print("Training data shape:", x_train.shape, y_train.shape)
print("Testing data shape:", x_test.shape, y_test.shape)

Training data shape: torch.Size([5000, 512]) torch.Size([5000])
Testing data shape: torch.Size([8000, 512]) torch.Size([8000])


In [70]:
def create_data_loaders_from_arrays(X_train, y_train, X_test, y_test):

    train = torch.utils.data.TensorDataset(X_train, y_train)
    train_loader = torch.utils.data.DataLoader(train, batch_size=64, shuffle=True)

    test = torch.utils.data.TensorDataset(X_test, y_test)
    test_loader = torch.utils.data.DataLoader(test, batch_size=512, shuffle=False)
    return train_loader, test_loader

In [71]:
scaler = preprocessing.StandardScaler()
scaler.fit(x_train)
x_train = scaler.transform(x_train).astype(np.float32)
x_test = scaler.transform(x_test).astype(np.float32)

In [72]:
train_loader, test_loader = create_data_loaders_from_arrays(torch.from_numpy(x_train), y_train, torch.from_numpy(x_test), y_test)

In [74]:
optimizer = torch.optim.SGD(logreg.parameters(), lr=3e-4)
criterion = torch.nn.CrossEntropyLoss()

# optimizer = torch.optim.SGD(logreg.parameters(), lr=0.03, momentum=0.9, weight_decay=1e-4)
# scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)  # T_max = number of epochs

eval_every_n_epochs = 10


train_losses = []
test_accuracies = []


for epoch in range(500):
    epoch_loss = 0
    logreg.train()
#     train_acc = []
    for x, y in train_loader:

        x = x.to(device)
        y = y.to(device)
        
        # zero the parameter gradients
        optimizer.zero_grad()        
        
        logits = logreg(x)
        predictions = torch.argmax(logits, dim=1)
        
        loss = criterion(logits, y)
        
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    train_losses.append(epoch_loss / len(train_loader))
    
    # scheduler.step()
    
    if epoch % eval_every_n_epochs == 0:
        logreg.eval()
        correct = 0
        total = 0
        for x, y in test_loader:
            x = x.to(device)
            y = y.to(device)

            logits = logreg(x)
            predictions = torch.argmax(logits, dim=1)
            
            total += y.size(0)
            correct += (predictions == y).sum().item()
            
        acc = 100 * correct / total
        test_accuracies.append(acc)
        print(f"Epoch {epoch}, Testing accuracy: {acc:.2f}%")


Epoch 0, Testing accuracy: 37.16%
Epoch 10, Testing accuracy: 37.27%
Epoch 20, Testing accuracy: 37.45%
Epoch 30, Testing accuracy: 37.67%
Epoch 40, Testing accuracy: 37.80%
Epoch 50, Testing accuracy: 37.90%
Epoch 60, Testing accuracy: 38.01%
Epoch 70, Testing accuracy: 38.01%
Epoch 80, Testing accuracy: 38.05%
Epoch 90, Testing accuracy: 38.21%
Epoch 100, Testing accuracy: 38.25%
Epoch 110, Testing accuracy: 38.26%
Epoch 120, Testing accuracy: 38.40%
Epoch 130, Testing accuracy: 38.45%
Epoch 140, Testing accuracy: 38.61%
Epoch 150, Testing accuracy: 38.81%
Epoch 160, Testing accuracy: 38.86%
Epoch 170, Testing accuracy: 38.98%
Epoch 180, Testing accuracy: 39.04%
Epoch 190, Testing accuracy: 39.01%
Epoch 200, Testing accuracy: 39.14%
Epoch 210, Testing accuracy: 39.25%
Epoch 220, Testing accuracy: 39.26%
Epoch 230, Testing accuracy: 39.25%
Epoch 240, Testing accuracy: 39.49%
Epoch 250, Testing accuracy: 39.45%
Epoch 260, Testing accuracy: 39.46%
Epoch 270, Testing accuracy: 39.58%
Epo

In [75]:
# optimizer = torch.optim.SGD(logreg.parameters(), lr=3e-4)
# criterion = torch.nn.CrossEntropyLoss()

optimizer = torch.optim.SGD(logreg.parameters(), lr=0.03, momentum=0.9, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)  # T_max = number of epochs

eval_every_n_epochs = 10


train_losses = []
test_accuracies = []


for epoch in range(500):
    epoch_loss = 0
    logreg.train()
#     train_acc = []
    for x, y in train_loader:

        x = x.to(device)
        y = y.to(device)
        
        # zero the parameter gradients
        optimizer.zero_grad()        
        
        logits = logreg(x)
        predictions = torch.argmax(logits, dim=1)
        
        loss = criterion(logits, y)
        
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    train_losses.append(epoch_loss / len(train_loader))
    
    scheduler.step()
    
    if epoch % eval_every_n_epochs == 0:
        logreg.eval()
        correct = 0
        total = 0
        for x, y in test_loader:
            x = x.to(device)
            y = y.to(device)

            logits = logreg(x)
            predictions = torch.argmax(logits, dim=1)
            
            total += y.size(0)
            correct += (predictions == y).sum().item()
            
        acc = 100 * correct / total
        test_accuracies.append(acc)
        print(f"Epoch {epoch}, Testing accuracy: {acc:.2f}%")


Epoch 0, Testing accuracy: 30.18%
Epoch 10, Testing accuracy: 36.44%
Epoch 20, Testing accuracy: 38.27%
Epoch 30, Testing accuracy: 38.39%
Epoch 40, Testing accuracy: 37.02%
Epoch 50, Testing accuracy: 37.42%
Epoch 60, Testing accuracy: 36.92%
Epoch 70, Testing accuracy: 37.62%
Epoch 80, Testing accuracy: 37.58%
Epoch 90, Testing accuracy: 38.19%
Epoch 100, Testing accuracy: 38.04%
Epoch 110, Testing accuracy: 37.26%
Epoch 120, Testing accuracy: 38.20%
Epoch 130, Testing accuracy: 38.20%
Epoch 140, Testing accuracy: 38.05%
Epoch 150, Testing accuracy: 38.02%
Epoch 160, Testing accuracy: 38.31%
Epoch 170, Testing accuracy: 38.52%
Epoch 180, Testing accuracy: 38.55%
Epoch 190, Testing accuracy: 38.55%
Epoch 200, Testing accuracy: 38.59%
Epoch 210, Testing accuracy: 38.55%
Epoch 220, Testing accuracy: 38.41%
Epoch 230, Testing accuracy: 38.51%
Epoch 240, Testing accuracy: 38.17%
Epoch 250, Testing accuracy: 37.98%
Epoch 260, Testing accuracy: 38.11%
Epoch 270, Testing accuracy: 38.29%
Epo

In [None]:
optimizer = torch.optim.Adam(logreg.parameters(), lr=3e-4)
criterion = torch.nn.CrossEntropyLoss()
eval_every_n_epochs = 10

for epoch in range(200):
#     train_acc = []
    for x, y in train_loader:

        x = x.to(device)
        y = y.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        logits = logreg(x)
        predictions = torch.argmax(logits, dim=1)

        loss = criterion(logits, y)

        loss.backward()
        optimizer.step()

    total = 0
    if epoch % eval_every_n_epochs == 0:
        correct = 0
        for x, y in test_loader:
            x = x.to(device)
            y = y.to(device)

            logits = logreg(x)
            predictions = torch.argmax(logits, dim=1)

            total += y.size(0)
            correct += (predictions == y).sum().item()

        acc = 100 * correct / total
        print(f"Testing accuracy: {np.mean(acc)}")

In [None]:
import matplotlib.pyplot as plt

In [None]:
# Plotting the training loss
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.plot(range(0, 200, eval_every_n_epochs), train_losses, label='Training Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training Loss over Epochs')
plt.grid(True)

# Show plots
plt.tight_layout()
plt.show()

In [None]:
# Plotting the test accuracy
plt.subplot(1, 2, 2)
plt.plot(range(0, 200, eval_every_n_epochs), test_accuracies, label='Test Accuracy', color='orange')
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.title('Test Accuracy over Epochs')
plt.grid(True)

# Show plots
plt.tight_layout()
plt.show()