# Team Details

Member 1 Name: Kiruthiharan Basker <br>
Member 1 Student Id: 7062652

Member 2 Name: Manish Kumar Bala Kumar <br>
Member 2 Student Id: 7062996

Member 3 Name: Sanju Kunjumman Jacob <br>
Member 3 Student Id: 7061424

Member 4 Name: Anjali Sankar Eswaramangalath <br>
Member 4 Student Id: 7062531

In [None]:
import pickle
import torch
import torchvision
import numpy as np
import matplotlib.pyplot as plt
import torchvision.models as models
from torch.utils.data import random_split
import torch.nn as nn
import torch.optim as optim
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.utils import resample
import io
from sklearn.ensemble import RandomForestClassifier
import random
from torch.optim import lr_scheduler

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
def load_shadow_dataset(DATA_PATH, N):

  with open(DATA_PATH, "rb") as f:
      dataset = pickle.load(f)

  train_dataset = []
  test_dataset = []

  train_dataloader = []
  test_dataloader = []

  train_size = int(0.8 * len(dataset))
  test_size = len(dataset) - train_size

  for i in range(N):
    random.shuffle(dataset)
    train_d, test_d = random_split(dataset, [train_size, test_size])

    train_dl = torch.utils.data.DataLoader(train_d, batch_size=64, shuffle=True, num_workers=2)
    test_dl = torch.utils.data.DataLoader(test_d, batch_size=64, shuffle=False, num_workers=2)

    train_dataset.append(train_d)
    test_dataset.append(test_d)

    train_dataloader.append(train_dl)
    test_dataloader.append(test_dl)

  return train_dataloader, test_dataloader


In [None]:
def train_resnet(train_dataloader, test_dataloader, num_classes, num_epochs):
    resnet_model = models.resnet34(pretrained=True)
    resnet_model.fc = nn.Linear(resnet_model.fc.in_features, num_classes)
    resnet_model.avgpool = nn.AdaptiveAvgPool2d(1)


    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(resnet_model.parameters(), lr=0.001, momentum=0.9)
    exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

    resnet_model.to(device)

    for epoch in range(num_epochs):
        resnet_model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        for images, labels in train_dataloader:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = resnet_model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

        train_loss = running_loss / len(train_dataloader)
        train_accuracy = 100. * correct / total

        print(f"Epoch [{epoch+1}/{num_epochs}]")
        print(f"Train Loss: {train_loss:.4f} | Train Accuracy: {train_accuracy:.2f}%")

    resnet_model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = resnet_model(images)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
    test_accuracy = 100. * correct / total
    print(f"Test Accuracy: {test_accuracy:.2f}%")

    return resnet_model

In [None]:
def train_mobilenet(train_dataloader, test_dataloader, num_classes, num_epochs):
    mobilenet_model = models.mobilenet_v2(pretrained=True)
    mobilenet_model.classifier[1] = nn.Linear(mobilenet_model.classifier[1].in_features, num_classes)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(mobilenet_model.parameters(), lr=0.001, momentum=0.9)
    exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

    mobilenet_model.to(device)

    for epoch in range(num_epochs):
        mobilenet_model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        for images, labels in train_dataloader:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = mobilenet_model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

        train_loss = running_loss / len(train_dataloader)
        train_accuracy = 100. * correct / total

        print(f"Epoch [{epoch+1}/{num_epochs}]")
        print(f"Train Loss: {train_loss:.4f} | Train Accuracy: {train_accuracy:.2f}%")

    mobilenet_model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = mobilenet_model(images)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
    test_accuracy = 100. * correct / total
    print(f"Test Accuracy: {test_accuracy:.2f}%")

    return mobilenet_model

In [None]:
def generate_dataset_from_shadow_models(model, dataloader, member):
  model.eval()
  x = []
  y = []
  with torch.no_grad():
      for images, labels in dataloader:
          images, labels = images.to(device), labels.to(device)
          outputs = model(images)
          probabilities = torch.nn.functional.softmax(outputs, dim=1)
          predictions = torch.argmax(outputs, 1)
          _, predicted = outputs.max(1)
          for i in range(len(labels)):
            y.append(member)
            x.append([labels[i].item()] + [predictions[i].item()] + probabilities[i].tolist())

  return x, y

In [None]:
# read eval data
def load_eval_data(DATA_PATH):
  with open(DATA_PATH, "rb") as f:
      dataset = pickle.load(f)

  eval_x_ds = [(item[0], item[1]) for item in dataset]
  eval_x_dl = torch.utils.data.DataLoader(eval_x_ds, batch_size=64, shuffle=False, num_workers=2)
  eval_y = [item[2] for item in dataset]
  return eval_x_dl, eval_y

# load target model
def load_target_model(MODEL_PATH, num_classes, model_type="resnet34"):

  if model_type == "resnet34":
    target_model = models.resnet34(num_classes=num_classes).to(device)
  else:
    target_model = models.mobilenet_v2(num_classes=num_classes).to(device)

  state_dict = torch.load(MODEL_PATH, map_location=device)
  target_model.load_state_dict(state_dict['net'])
  return target_model


In [None]:
def train_random_forest(X_train, y_train):
    t_model = RandomForestClassifier(n_estimators=50)
    t_model.fit(X_train, y_train)
    return t_model

In [None]:
# read eval data
def load_test_data(DATA_PATH):
  with open(DATA_PATH, "rb") as f:
      dataset = pickle.load(f)

  eval_x_ds = [(item[0], item[1]) for item in dataset]
  eval_x_dl = torch.utils.data.DataLoader(eval_x_ds, batch_size=64, shuffle=False, num_workers=2)
  return eval_x_dl

In [None]:
BASE_PATH = '/content/drive/MyDrive/amlm'
task_0 = {'id':'task0_resnet34_cifar10',
          'N_CLASS_LABELS': 10,
          'MODEL': 'resnet34',
          'DATASET': 'cifar10',
          'epochs': 25,
          'DATA_SET_PATH': f"{BASE_PATH}/pickle/cifar10/resnet34",
          'OUTPUT_PATH': f"{BASE_PATH}/shadow_0/",
          'TARGET_MODEL_PATH': f"{BASE_PATH}/models/resnet34_cifar10.pth"}

task_1 = {'id':'task1_mobilenetv2_cifar10',
          'N_CLASS_LABELS': 10,
          'MODEL': 'mobilenetv2',
          'DATASET': 'cifar10',
          'epochs': 25,
          'DATA_SET_PATH': f"{BASE_PATH}/pickle/cifar10/mobilenetv2",
          'OUTPUT_PATH': f"{BASE_PATH}/shadow_1/",
          'TARGET_MODEL_PATH': f"{BASE_PATH}/models/mobilenetv2_cifar10.pth"}

task_2 = {'id':'task2_resnet34_tinyimagenet',
          'N_CLASS_LABELS': 200,
          'MODEL': 'resnet34',
          'DATASET': 'tinyimagenet',
          'epochs': 8,
          'DATA_SET_PATH': f"{BASE_PATH}/pickle/tinyimagenet/resnet34",
          'OUTPUT_PATH': f"{BASE_PATH}/shadow_2/",
          'TARGET_MODEL_PATH': f"{BASE_PATH}/models/resnet34_tinyimagenet.pth"}

task_3 = {'id':'task3_mobilenetv2_tinyimagenet',
          'N_CLASS_LABELS': 200,
          'MODEL': 'mobilenetv2',
          'DATASET': 'tinyimagenet',
          'epochs': 15,
          'DATA_SET_PATH': f"{BASE_PATH}/pickle/tinyimagenet/mobilenetv2",
          'OUTPUT_PATH': f"{BASE_PATH}/shadow_3/",
          'TARGET_MODEL_PATH': f"{BASE_PATH}/models/mobilenetv2_tinyimagenet.pth"}

configs = [task_0, task_1, task_2, task_3]

def mia(task=0, stages=[0,1,2], N=1):
  OUTPUT_PATH = configs[task]['OUTPUT_PATH']
  N_CLASS_LABELS = configs[task]['N_CLASS_LABELS']
  TARGET_MODEL_PATH = configs[task]['TARGET_MODEL_PATH']
  MODEL_TYPE = configs[task]['MODEL']
  DATA_SET_PATH = configs[task]['DATA_SET_PATH']

  if 0 in stages:
    # load shadow dataset
    train_dataloaders, test_dataloaders = load_shadow_dataset(DATA_SET_PATH + '/shadow.p', N)

    # train shadow models
    shadow_models = []
    for i in range(N):
      if task == 0 or task == 2:
        shadow_models.append(train_resnet(train_dataloaders[i], test_dataloaders[i], N_CLASS_LABELS, num_epochs=configs[task]['epochs']))
      else:
        shadow_models.append(train_mobilenet(train_dataloaders[i], test_dataloaders[i], N_CLASS_LABELS, num_epochs=configs[task]['epochs']))

    # save shadow models
    for i in range(N):
      filename = f'shadow_model_{i}.sav'
      torch.save(shadow_models[i], OUTPUT_PATH+filename)

    # import shadow models
    loaded_models = []
    for i in range(N):
      filename = f'shadow_model_{i}.sav'
      loaded_models.append(torch.load(OUTPUT_PATH+filename))

    # Generate attack dataset
    X = []
    y = []

    for i in range(N):
      temp_x1, temp_y1 = generate_dataset_from_shadow_models(loaded_models[i], train_dataloaders[i], 1)
      temp_x2, temp_y2 = generate_dataset_from_shadow_models(loaded_models[i], test_dataloaders[i], 0)

      X = X + temp_x1 + temp_x2
      y = y + temp_y1 + temp_y2


    with open(OUTPUT_PATH + "/shadow_X", "wb") as fp:
      pickle.dump(X, fp)

    with open(OUTPUT_PATH + "/shadow_Y", "wb") as fp:
      pickle.dump(y, fp)


# --------------------------------------------------------------------------------------------------------
  if 1 in stages:
    # Load dataset generated by shadow model
    with open(OUTPUT_PATH + "/shadow_X", "rb") as fp:
      X = pickle.load(fp)

    with open(OUTPUT_PATH + "/shadow_Y", "rb") as fp:
      y = pickle.load(fp)

    # Create default dict
    X_dict = {}
    y_dict = {}
    for i in range(N_CLASS_LABELS):
      X_dict[i] = []
      y_dict[i] = []

    # add to dict by class
    for i in range(len(X)):
      X_dict[X[i][0]].append(X[i][2:])
      y_dict[X[i][0]].append(y[i])

    # train a model per class
    attack_models = []
    for i in range(N_CLASS_LABELS):
      X_train =  X_dict[i]
      y_train = y_dict[i]
      am = train_random_forest(X_train, y_train)
      attack_models.append(am)

    # save attack models

    for i in range(N_CLASS_LABELS):
      filename = f'attack_model_{i}.sav'
      torch.save(attack_models[i], OUTPUT_PATH+filename)

# -----------------------------------------------------------------------------
  if 2 in stages:
    # import attack models

    attack_models = []
    for i in range(N_CLASS_LABELS):
      filename = f'attack_model_{i}.sav'
      attack_models.append(torch.load(OUTPUT_PATH+filename))


    # load target model
    target_model = load_target_model(TARGET_MODEL_PATH, N_CLASS_LABELS, MODEL_TYPE)

    # Load eval dataset
    X_eval, y_eval = load_eval_data(DATA_SET_PATH + '/eval.p')
    X_eval, _ = generate_dataset_from_shadow_models(target_model, X_eval, 1)

    # Get predictions
    preds = []
    for i in range(len(X_eval)):
      y_pred = attack_models[X_eval[i][0]].predict([X_eval[i][2:]])
      preds.append(y_pred[0])

    print(len(preds), len(y_eval))

    acc = 0
    for i in range(len(preds)):
      if preds[i] == y_eval[i] : acc +=1
    accuracy = acc / len(y_eval)
    print("Accuracy:", accuracy)

    # Load and get predictions for test file
    X_test = load_test_data(DATA_SET_PATH+'/test.p')
    X_test, _ = generate_dataset_from_shadow_models(target_model, X_test, 1)

    preds_test = []
    for i in range(len(X_test)):
      y_pred = attack_models[X_test[i][0]].predict([X_test[i][2:]])
      preds_test.append(y_pred[0])

    np_preds = np.asarray(preds_test, dtype=np.int32)
    np.save(configs[task]['id'] + '.npy', np_preds)



In [None]:
mia(task = 0, stages = [0, 1, 2], N = 1)
mia(task = 1, stages = [0, 1, 2], N = 1)
mia(task = 2, stages = [0, 1, 2], N = 1)
mia(task = 3, stages = [0, 1, 2], N = 1)