In [1]:
import torch
import clip
import numpy as np
import matplotlib.pyplot as plt
import os
import math
from torchvision.datasets import CIFAR10
from torchvision.transforms import *
from tqdm.notebook import tqdm
import PIL
import medmnist
from medmnist import INFO, Evaluator
import torch.utils.data as data
import random
np.random.seed(0)

# Set seed for PyTorch
torch.manual_seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
random.seed(42)

In [2]:
data_flag = 'pneumoniamnist'
download = True
info = INFO[data_flag]
task = info['task']            
n_channels = info['n_channels']
n_classes = len(info['label'])
DataClass = getattr(medmnist, info['python_class'])


print('NOT USING IMAGENET NORM')
mean = [0.5, 0.5, 0.5]
std = [0.5, 0.5, 0.5]
normalize = transforms.Normalize(mean=mean, std=std)

# preprocessing
data_transform = transforms.Compose([
transforms.Resize((224, 224), interpolation=PIL.Image.NEAREST),
transforms.ToTensor(),
normalize
    ])

train_dataset = DataClass(split='train', transform=data_transform, download=download, as_rgb=True)
test_dataset = DataClass(split='test', transform=data_transform, download=download,  as_rgb=True)
val_dataset = DataClass(split='val', transform=data_transform, download=download,  as_rgb=True)

train_loader = data.DataLoader(dataset=train_dataset, batch_size=128, shuffle=True)
val_loader = data.DataLoader(dataset=val_dataset, batch_size=128, shuffle=False)
test_loader = data.DataLoader(dataset=test_dataset, batch_size=128, shuffle=False)


print(len(train_dataset))
print("===================")
print(len(val_dataset))
print("===================")
print(len(test_dataset))


NOT USING IMAGENET NORM
Using downloaded and verified file: /home/santosh.sanjeev/.medmnist/pneumoniamnist.npz
Using downloaded and verified file: /home/santosh.sanjeev/.medmnist/pneumoniamnist.npz
Using downloaded and verified file: /home/santosh.sanjeev/.medmnist/pneumoniamnist.npz
4708
524
624


  transforms.Resize((224, 224), interpolation=PIL.Image.NEAREST),


In [3]:
import torch.nn as nn
from sklearn.metrics import f1_score, roc_auc_score

def evaluate_model(model, test_loader):

    test_correct = 0
    test_total = 0
    test_predictions = []
    test_targets = []
    test_f1_predictions = []

    model.eval()
    with torch.no_grad():
        for inputs, targets in tqdm(test_loader):
            outputs = model(inputs)
            m = nn.Softmax(dim=1)            
            predicted = m(outputs)
            # _, predicted = torch.max(outputs.data, 1)
            targets = targets.squeeze().long()
            test_total += targets.size(0)
            _,t = torch.max(outputs.data, 1)
            test_correct += (t == targets).sum().item()
            print(t)
            predicted = predicted[:,-1]

            test_predictions.extend(predicted.cpu().numpy())
            test_f1_predictions.extend(t.cpu().numpy())
            test_targets.extend(targets.cpu().numpy())
            # print(test_correct, test_total)
        test_accuracy = 100 * test_correct / test_total
        if task !='binary-class':
            test_f1 = f1_score(test_targets, test_f1_predictions, average = 'macro')
            test_auc = roc_auc_score(test_targets, test_predictions, average = 'macro', multi_class = 'ovr')
        else:
            test_f1 = f1_score(test_targets, test_f1_predictions)
            test_auc = roc_auc_score(test_targets, test_predictions)
        # Print or log validation metrics
        print(f"Test Accuracy: {test_accuracy:.2f}%, F1-Score: {test_f1:.4f}, AUC: {test_auc:.4f}")
        return test_accuracy, test_f1, test_auc


In [5]:
from models.model import ImageNetModel, CLIPModel
import torchvision.models as models

device = 'cuda' if torch.cuda.is_available() else 'cpu'

def load_pretrained_model(path, load_model = 'resnet18'):
    
    model =  getattr(models, load_model)(pretrained=True)
    model = torch.nn.Sequential(*list(model.children())[:-1])
    full_model = ImageNetModel(model, num_classes=n_classes)
    
    checkpoint = torch.load(path)  # Provide the path to your weights file
    full_model.load_state_dict(checkpoint['model_state_dict'])
    
    return full_model

In [6]:
models_folder = '/home/santosh.sanjeev/model-soups/my_soups/checkpoints/grid_models/'
model_paths = [os.path.join(models_folder, filename) for filename in os.listdir(models_folder) if filename.endswith('.pth') and not filename.endswith('.csv')]
model_names = [os.path.splitext(os.path.basename(path))[0] for path in model_paths]

state_dicts = []
for model_path, model_name in zip(model_paths, model_names):
    checkpoint = torch.load(model_path)
    state_dicts.append(checkpoint['model_state_dict'])

In [7]:
# import os
# import pandas as pd
# import matplotlib.pyplot as plt

# models_folder = '/home/santosh.sanjeev/model-soups/my_soups/checkpoints/grid_models/'
# model_paths = [os.path.join(models_folder, filename) for filename in os.listdir(models_folder) if filename.endswith('.pth') and not filename.endswith('.csv')]
# model_names = [os.path.splitext(os.path.basename(path))[0] for path in model_paths]
# results = []
# # results_df = pd.DataFrame(columns=['Model Name', 'Test Accuracy', 'Test F1', 'Test AUC'])

# for model_path, model_name in zip(model_paths, model_names):
#     model = load_pretrained_model(model_path)
#     test_accuracy, test_f1, test_auc = evaluate_model(model, test_loader)
#     results.append({'Model Name': model_name,
#                                     'Test Accuracy': test_accuracy,
#                                     'Test F1': test_f1,
#                                     'Test AUC': test_auc})
# results_df = pd.DataFrame(results)
# results_df


In [8]:
# # Plot the bar graph
# metrics = ['Test Accuracy', 'Test F1', 'Test AUC']
# for metric in metrics:
#     plt.figure(figsize=(10, 5))
#     plt.bar(results_df['Model Name'], results_df[metric])
#     plt.xlabel('Model Name')
#     plt.ylabel(metric)
#     plt.title(f'{metric} for Different Models')
#     plt.xticks(rotation=45)
#     plt.show()

In [24]:
def get_model(state_dicts, alphal):
  sd = {k : state_dicts[0][k].clone() * alphal[0] for k in state_dicts[0].keys()}
  model2 =  getattr(models, 'resnet18')(pretrained=True)
  model2 = torch.nn.Sequential(*list(model2.children())[:-1])
  full_model = ImageNetModel(model2, num_classes=2)
  print(state_dicts)
  for i in range(1,len(state_dicts)):
      for k in state_dicts[i].keys():
          print(k)
          sd[k] = sd[k] + state_dicts[i][k].clone() * alphal[i]
  full_model.load_state_dict(sd)
  full_model = full_model.to(device)
  return full_model

In [25]:
model_paths = ['/home/santosh.sanjeev/model-soups/my_soups/checkpoints/grid_models/model_23.pth', '/home/santosh.sanjeev/model-soups/my_soups/checkpoints/grid_models/model_22.pth']#, '/home/santosh.sanjeev/model-soups/my_soups/checkpoints/grid_models/model_1.pth']

state_dicts = []
for model_path, model_name in zip(model_paths, model_names):
    checkpoint = torch.load(model_path)
    state_dicts.append(checkpoint['model_state_dict'])

In [26]:
device = 'cpu'

alphal = [1 / len(state_dicts) for i in range(len(state_dicts))]
print(len(state_dicts))
model1 = get_model(state_dicts, alphal)
test_accuracy, test_f1, test_auc = evaluate_model(model1, test_loader)
results = []
results.append({'Model Name': model_name,'Test Accuracy': test_accuracy,'Test F1': test_f1, 'Test AUC': test_auc})

2
[OrderedDict([('feature_extractor.0.weight', tensor([[[[-1.3332e-02, -7.4823e-03, -3.1597e-03,  ...,  5.7048e-02,
            2.1085e-02, -8.6701e-03],
          [ 9.2632e-03,  8.7418e-03, -1.1062e-01,  ..., -2.6989e-01,
           -1.2404e-01,  8.6830e-03],
          [-8.7495e-03,  5.8266e-02,  2.9448e-01,  ...,  5.2048e-01,
            2.6108e-01,  6.8469e-02],
          ...,
          [-2.9756e-02,  1.5280e-02,  7.1788e-02,  ..., -3.3149e-01,
           -4.1445e-01, -2.5181e-01],
          [ 3.0026e-02,  4.1962e-02,  6.3836e-02,  ...,  4.1653e-01,
            4.0034e-01,  1.7297e-01],
          [-1.4290e-02, -2.6391e-03, -2.3034e-02,  ..., -1.4760e-01,
           -7.5130e-02,  1.2604e-03]],

         [[-1.4309e-02, -2.7951e-02, -3.5966e-02,  ...,  3.2971e-02,
            4.6761e-03, -2.1710e-02],
          [ 4.3842e-02,  3.2800e-02, -1.0523e-01,  ..., -3.1115e-01,
           -1.5544e-01,  3.6618e-03],
          [-2.6480e-03,  9.7568e-02,  4.0103e-01,  ...,  7.0851e-01,
           

  0%|          | 0/5 [00:00<?, ?it/s]

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1])
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1])


KeyboardInterrupt: 

In [25]:
def average_models(model_paths):
    models = [load_pretrained_model(path) for path in model_paths]
    state_dicts = [model.state_dict() for model in models]
    averaged_state_dict = {}
    
    for key in state_dicts[0].keys():
        param_sum = sum(state_dict[key] for state_dict in state_dicts)
        averaged_state_dict[key] = param_sum / len(models)
    
    averaged_model = load_pretrained_model(model_paths[0])
    averaged_model.load_state_dict(averaged_state_dict)
    
    return averaged_model



In [39]:
models_path = ['/home/santosh.sanjeev/model-soups/my_soups/checkpoints/grid_models/model_23.pth', '/home/santosh.sanjeev/model-soups/my_soups/checkpoints/grid_models/model_22.pth', '/home/santosh.sanjeev/model-soups/my_soups/checkpoints/grid_models/model_1.pth']
averaged_model = average_models(models_path)
test_accuracy, test_f1, test_auc = evaluate_model(averaged_model, test_loader)


  0%|          | 0/5 [00:00<?, ?it/s]

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1])
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1])
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1