

In [None]:
import torch
from torch.utils.data import Dataset
from typing import Tuple
import numpy as np
import requests
import pandas as pd


import torch.nn as nn
from torch.utils.data import DataLoader
import numpy as np

import matplotlib.pyplot as plt
import matplotlib

%matplotlib inline
%config InlineBackend.figure_format = 'svg'

In [None]:
import gdown

pub_id = '1OLZsYJteuUpnQnSoZTHC617zmSRPsSK0'
priv_id = '1wGNkKdKRn2ZpQ-GtP3l8UCpNHUWBgHyN'
model_id = '1-rFEKopl4PZ4e3FR_dKcLbO_Y4pXOgLo'


gdown.download(f'https://drive.google.com/uc?id={pub_id}', 'pub.pt', quiet=False)
gdown.download(f'https://drive.google.com/uc?id={priv_id}', 'priv.pt', quiet=False)
gdown.download(f'https://drive.google.com/uc?id={model_id}', 'model.pt', quiet=False)

Downloading...
From (original): https://drive.google.com/uc?id=1OLZsYJteuUpnQnSoZTHC617zmSRPsSK0
From (redirected): https://drive.google.com/uc?id=1OLZsYJteuUpnQnSoZTHC617zmSRPsSK0&confirm=t&uuid=5d91616f-ee8f-4635-a124-d16e2743fd76
To: /content/pub.pt
100%|██████████| 250M/250M [00:03<00:00, 63.3MB/s]
Downloading...
From (original): https://drive.google.com/uc?id=1wGNkKdKRn2ZpQ-GtP3l8UCpNHUWBgHyN
From (redirected): https://drive.google.com/uc?id=1wGNkKdKRn2ZpQ-GtP3l8UCpNHUWBgHyN&confirm=t&uuid=3ff11de9-f072-4052-9d07-b8a881cb38a9
To: /content/priv.pt
100%|██████████| 251M/251M [00:04<00:00, 54.0MB/s]
Downloading...
From (original): https://drive.google.com/uc?id=1-rFEKopl4PZ4e3FR_dKcLbO_Y4pXOgLo
From (redirected): https://drive.google.com/uc?id=1-rFEKopl4PZ4e3FR_dKcLbO_Y4pXOgLo&confirm=t&uuid=4ceaf0f0-cc7c-40c0-bf8e-f1fd594b9128
To: /content/model.pt
100%|██████████| 44.9M/44.9M [00:02<00:00, 20.5MB/s]


'model.pt'

In [None]:

#### LOADING THE MODEL

from torchvision.models import resnet18

model = resnet18(pretrained=False)
model.fc = torch.nn.Linear(512, 44)

ckpt = torch.load("/content/model.pt", map_location="cpu")

model.load_state_dict(ckpt)



<All keys matched successfully>

In [None]:

#### DATASETS

class TaskDataset(Dataset):
    def __init__(self, transform=None):

        self.ids = []
        self.imgs = []
        self.labels = []

        self.transform = transform

    def __getitem__(self, index) -> Tuple[int, torch.Tensor, int]:
        id_ = self.ids[index]
        img = self.imgs[index]
        if not self.transform is None:
            img = self.transform(img)
        label = self.labels[index]
        return id_, img, label

    def __len__(self):
        return len(self.ids)


class MembershipDataset(TaskDataset):
    def __init__(self, transform=None):
        super().__init__(transform)
        self.membership = []

    def __getitem__(self, index) -> Tuple[int, torch.Tensor, int, int]:
        id_, img, label = super().__getitem__(index)
        return id_, img, label, self.membership[index]


privData: MembershipDataset = torch.load("/content/priv.pt")
pubData: MembershipDataset = torch.load("/content/pub.pt")

In [None]:
# Define custom collate_fn to ignore the membership feature
def custom_collate(batch):
    batch = [(item[0], item[1], item[2]) for item in batch]
    return torch.utils.data.dataloader.default_collate(batch)

In [None]:
import torch
from torchvision import transforms

class NormalizedDataset(torch.utils.data.Dataset):
    def __init__(self, dataset):
        self.dataset = dataset
        self.normalize = transforms.Normalize(mean=[0.298, 0.2962, 0.2987], std=[0.2886, 0.2875, 0.2889])  # Adjust mean and std as needed

        self.ids = dataset.ids
        self.imgs = []
        self.labels = dataset.labels
        self.membership = dataset.membership

        self.normalize_images()

    def normalize_images(self):

        for id_, img, label, membership in self.dataset:
            img_normalized = self.normalize(img)
            self.imgs.append(img_normalized)

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        id_, img, label, membership = self.dataset[idx]
        img = self.imgs[idx]
        return id_, img, label, membership


privData = NormalizedDataset(privData)
pubData = NormalizedDataset(pubData)

In [None]:
member_images = []
non_member_images = []

for id_, image, label, membership in pubData:
    if membership == 1:
        member_images.append((id_, image, label, 1))
    elif membership == 0:
        non_member_images.append((id_, image, label, 0))

memberData = MembershipDataset()
memberData.ids, memberData.imgs, memberData.labels, memberData.membership = zip(*member_images)

nonMemberData = MembershipDataset()
nonMemberData.ids, nonMemberData.imgs, nonMemberData.labels, nonMemberData.membership = zip(*non_member_images)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
def evaluateModel(dataset, model, isPriv):


    model.eval()
    model.to(device)

    correct = 0
    total = 0

    if isPriv:
      data_loader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=False, collate_fn=custom_collate)


      with torch.no_grad():
        for _, images, labels in data_loader:

            images, labels = images.to(device), labels.to(device)

            outputs = model(images)

            _, predicted = torch.max(outputs.data, 1)

            total += labels.size(0)

            correct += (predicted == labels).sum().item()
    else:
      data_loader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=False)

      with torch.no_grad():
        for _, images, labels,_ in data_loader:

            images, labels = images.to(device), labels.to(device)

            outputs = model(images)

            _, predicted = torch.max(outputs.data, 1)

            total += labels.size(0)

            correct += (predicted == labels).sum().item()

    accuracy = correct / total

    print(f'Accuracy on privData: {accuracy:.2%}')

# evaluateModel(privData, model, True)
# evaluateModel(memberData, member_shadow_model, False)

In [None]:
# evaluateModel(nonMemberData, model, False)

Accuracy on privData: 69.29%


In [None]:
def initialize_shadow_model(num_classes=44):
    shadow_model = resnet18(pretrained=False)
    shadow_model.fc = nn.Linear(512, num_classes)
    return shadow_model.to(device)

In [None]:
def train_model(model, data_loader, epochs=15):
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = torch.nn.CrossEntropyLoss()
    model.train()
    for epoch in range(epochs):
        for _, images, labels, _ in data_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        print(f"Epoch {epoch+1}, Loss: {loss.item()}")
        evaluateModel(memberData, member_shadow_model, False)

In [None]:
member_shadow_model = initialize_shadow_model()

member_loader = DataLoader(pubData, batch_size=32, shuffle=True)

train_model(member_shadow_model, member_loader)



Epoch 1, Loss: 1.0860037803649902
Accuracy on privData: 50.35%
Epoch 2, Loss: 1.2829656600952148
Accuracy on privData: 55.29%
Epoch 3, Loss: 1.3135161399841309
Accuracy on privData: 58.99%
Epoch 4, Loss: 1.1688902378082275
Accuracy on privData: 60.95%
Epoch 5, Loss: 1.2450504302978516
Accuracy on privData: 65.13%
Epoch 6, Loss: 0.8963084816932678
Accuracy on privData: 67.74%
Epoch 7, Loss: 0.9382441639900208
Accuracy on privData: 68.42%
Epoch 8, Loss: 0.8328582048416138
Accuracy on privData: 71.77%
Epoch 9, Loss: 0.7715686559677124
Accuracy on privData: 72.85%
Epoch 10, Loss: 0.6999406814575195
Accuracy on privData: 71.86%
Epoch 11, Loss: 0.9029583930969238
Accuracy on privData: 77.82%
Epoch 12, Loss: 0.6915234327316284
Accuracy on privData: 80.44%
Epoch 13, Loss: 0.7025669813156128
Accuracy on privData: 81.92%
Epoch 14, Loss: 0.44177502393722534
Accuracy on privData: 83.79%
Epoch 15, Loss: 0.5634000897407532
Accuracy on privData: 86.33%


In [None]:
# evaluateModel(memberData, model, False)

Accuracy on privData: 74.67%


In [None]:
#Save shadow model weights
torch.save(member_shadow_model.state_dict(), "shadow.pt")

In [None]:
evaluateModel(nonMemberData, member_shadow_model, False)

Accuracy on privData: 86.55%


In [None]:
def extract_logits(dataset, model,isPriv):

    model.eval()

    logits_list = []

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    if isPriv:
      data_loader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=False, collate_fn=custom_collate)

      with torch.no_grad():
          for _, images, labels in data_loader:

              images = images.to(device)


              logits = model(images)

              logits_np = logits.cpu().numpy()

              logits_list.append(logits_np)

      logits_array = np.concatenate(logits_list)

      return logits_array

    else:

      data_loader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=False)

      with torch.no_grad():
          for _, images, labels,_ in data_loader:

              images = images.to(device)

              logits = model(images)

              logits_np = logits.cpu().numpy()

              logits_list.append(logits_np)

      logits_array = np.concatenate(logits_list)

      return logits_array

In [None]:
pub_logits = extract_logits(pubData, member_shadow_model,False)

In [None]:
pub_logits

array([[ -8.743673 , -16.612362 , -13.538902 , ..., -15.33601  ,
        -11.160714 ,  -5.182898 ],
       [-12.740398 , -13.098157 , -13.202584 , ...,  -8.839644 ,
        -13.401994 , -12.513231 ],
       [ -7.164668 ,  -6.143988 ,  -9.841757 , ...,   0.5349922,
         -7.3307934,  -6.7077346],
       ...,
       [-17.640074 ,  -8.83491  , -13.856387 , ...,   5.777608 ,
        -10.384461 , -12.551024 ],
       [ -8.420685 , -13.057574 , -13.989132 , ..., -11.4189005,
        -10.285997 ,  -8.116081 ],
       [-11.189415 , -16.74889  , -21.732048 , ..., -15.1533985,
        -12.245355 ,  -6.331552 ]], dtype=float32)

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import torch.nn.functional as F

membership_array = np.array(pubData.membership)

# probabilities = F.softmax(torch.tensor(pub_logits), dim=1).numpy()

X_train, X_test, y_train, y_test = train_test_split(pub_logits, membership_array, test_size=0.2, random_state=42)

log_reg_model = LogisticRegression(max_iter=1000)
log_reg_model.fit(X_train, y_train)

y_pred_proba = log_reg_model.predict_proba(X_test)[:, 1]  # Probability of class 1 (membership)

y_pred = log_reg_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)

print(f"Accuracy: {accuracy:.4f}")

print("Predicted probabilities:", y_pred_proba)

Accuracy: 0.5025
Predicted probabilities: [0.52567313 0.50813481 0.49605438 ... 0.49465893 0.4592844  0.50974473]


In [None]:
from sklearn.metrics import roc_auc_score, roc_curve

# Calculate AUC score
auc_score = roc_auc_score(y_test, y_pred_proba)
print("AUC Score:", auc_score)

# Calculate ROC curve
fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba)

# Find TPR at FPR = 0.05
target_fpr = 0.05
closest_index = np.argmin(np.abs(fpr - target_fpr))
tpr_at_target_fpr = tpr[closest_index]

print(f"TPR at FPR = {target_fpr}: {tpr_at_target_fpr}")

AUC Score: 0.5029711435696473
TPR at FPR = 0.05: 0.0431077694235589


In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import accuracy_score, roc_auc_score, roc_curve
from sklearn.model_selection import cross_val_score

# Initialize and train KNN model
knn_model = KNeighborsClassifier()
knn_cv_accuracy = cross_val_score(knn_model, X_train, y_train, cv=5, scoring='accuracy').mean()
print("KNN Cross-Validation Accuracy:", knn_cv_accuracy)

knn_model.fit(X_train, y_train)

y_pred_proba_knn = knn_model.predict_proba(X_test)[:, 1]

accuracy_knn = knn_model.score(X_test, y_test)
print("KNN Accuracy:", accuracy_knn)

auc_score_knn = roc_auc_score(y_test, y_pred_proba_knn)
print("KNN AUC Score:", auc_score_knn)

fpr_knn, tpr_knn, thresholds_knn = roc_curve(y_test, y_pred_proba_knn)

target_fpr = 0.05
closest_index_knn = np.argmin(np.abs(fpr_knn - target_fpr))
tpr_at_target_fpr_knn = tpr_knn[closest_index_knn]
print(f"KNN TPR at FPR = {target_fpr}: {tpr_at_target_fpr_knn}")

# Initialize and train Random Forest model
rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)

y_pred_proba_rf = rf_model.predict_proba(X_test)[:, 1]

accuracy_rf = rf_model.score(X_test, y_test)
print("Random Forest Accuracy:", accuracy_rf)

auc_score_rf = roc_auc_score(y_test, y_pred_proba_rf)
print("Random Forest AUC Score:", auc_score_rf)

fpr_rf, tpr_rf, thresholds_rf = roc_curve(y_test, y_pred_proba_rf)

closest_index_rf = np.argmin(np.abs(fpr_rf - target_fpr))
tpr_at_target_fpr_rf = tpr_rf[closest_index_rf]
print(f"Random Forest TPR at FPR = {target_fpr}: {tpr_at_target_fpr_rf}")

# Initialize and train LDA model
lda_model = LinearDiscriminantAnalysis()
lda_model.fit(X_train, y_train)

y_pred_proba_lda = lda_model.predict_proba(X_test)[:, 1]

accuracy_lda = lda_model.score(X_test, y_test)
print("LDA Accuracy:", accuracy_lda)

auc_score_lda = roc_auc_score(y_test, y_pred_proba_lda)
print("LDA AUC Score:", auc_score_lda)

fpr_lda, tpr_lda, thresholds_lda = roc_curve(y_test, y_pred_proba_lda)

# Find TPR at FPR = 0.05
closest_index_lda = np.argmin(np.abs(fpr_lda - target_fpr))
tpr_at_target_fpr_lda = tpr_lda[closest_index_lda]
print(f"LDA TPR at FPR = {target_fpr}: {tpr_at_target_fpr_lda}")


KNN Cross-Validation Accuracy: 0.5011249999999999
KNN Accuracy: 0.51175
KNN AUC Score: 0.5107880674254215
KNN TPR at FPR = 0.05: 0.033583959899749376
Random Forest Accuracy: 0.495
Random Forest AUC Score: 0.5084529278307989
Random Forest TPR at FPR = 0.05: 0.06666666666666667
LDA Accuracy: 0.50175
LDA AUC Score: 0.503096519353246
LDA TPR at FPR = 0.05: 0.0431077694235589


In [None]:
priv_logits = extract_logits(privData, member_shadow_model,True)

In [None]:
priv_logits

array([[-15.405442 , -15.395304 ,  -9.934013 , ...,  -7.875381 ,
        -17.036167 , -15.766642 ],
       [-19.847395 , -19.078176 , -12.235316 , ..., -10.453823 ,
        -21.728031 , -20.33313  ],
       [-10.183053 , -12.001919 , -14.970554 , ..., -10.643057 ,
        -10.364401 ,  -7.9737897],
       ...,
       [-10.019365 ,  -5.3200507,  -7.86508  , ...,   2.5151296,
         -6.7455444,  -8.708804 ],
       [-10.409491 ,  -8.14324  , -33.698948 , ...,  -4.649087 ,
        -12.750782 ,  -8.293964 ],
       [ -6.953123 ,  -4.634598 ,  -8.07011  , ...,   1.3038908,
         -6.215314 ,  -6.76795  ]], dtype=float32)

In [None]:
# Predict membership probabilities
#Extracting for random forest
priv_scores = rf_model.predict_proba(priv_logits)[:, 1]

In [None]:
priv_scores

array([0.47, 0.55, 0.35, ..., 0.24, 0.7 , 0.79])

In [None]:

#### EXAMPLE SUBMISSION

df = pd.DataFrame(
    {
        "ids": privData.ids,
        "score": priv_scores,
    }
)
df.to_csv("check.csv", index=None)

In [None]:
import requests

response = requests.post("http://35.184.239.3:9090/mia", files={"file": open("check.csv", "rb")}, headers={"token": "76282151"})
print(response.json())

{'TPR@FPR=0.05': 0.058, 'AUC': 0.5029855}
