In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader, Dataset
from torchvision import models, transforms
from tqdm import tqdm
import os
import torchvision.models as models
from typing import Tuple


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class TaskDataset(Dataset):
    def __init__(self, transform=None):

        self.ids = []
        self.imgs = []
        self.labels = []

        self.transform = transform

    def __getitem__(self, index) -> Tuple[int, torch.Tensor, int]:
        id_ = self.ids[index]
        img = self.imgs[index]
        if not self.transform is None:
            img = self.transform(img)
        label = self.labels[index]
        return id_, img, label

    def __len__(self):
        return len(self.ids)


class MembershipDataset(TaskDataset):
    def __init__(self, transform=None):
        super().__init__(transform)
        self.membership = []

    def __getitem__(self, index) -> Tuple[int, torch.Tensor, int, int]:
        id_, img, label = super().__getitem__(index)
        return id_, img, label

# Load public dataset
# List all files in the current directory
# print("Files in directory:", os.listdir("./task_1/"))
public_data = torch.load("task_1/pub.pt", map_location=device, weights_only=False)
private_data = torch.load("task_1/priv_out.pt", map_location=device, weights_only=False)

# Load model
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 44)
model.load_state_dict(torch.load("task_1/01_MIA_69.pt",  weights_only=False))

model.to(device)

print("Public samples:", len(public_data))
print("Private samples:", len(private_data))




Public samples: 20000
Private samples: 20000


In [2]:
def get_confidence_scores(model, dataloader):
    model.eval()
    confidences = []
    losses = []
    entropy_vals = []
    criterion = nn.CrossEntropyLoss(reduction="none")

    with torch.no_grad():
        for ids, images, labels in tqdm(dataloader):  # Now expecting 4 elements
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            probs = torch.softmax(outputs, dim=1)
            pred_classes = probs.argmax(dim=1)

            # Get the confidence of the predicted class
            confidence = probs[torch.arange(len(labels)), pred_classes]
            confidences.extend(confidence.cpu().numpy())

            # Compute per-sample loss
            loss = criterion(outputs, labels)
            losses.extend(loss.cpu().numpy())

            # Compute entropy (uncertainty measure)
            entropy = -torch.sum(probs * torch.log(probs + 1e-10), dim=1)
            entropy_vals.extend(entropy.cpu().numpy())

    return np.array(confidences), np.array(losses), np.array(entropy_vals)


In [3]:
# import torch.multiprocessing as mp
# mp.set_start_method("spawn", force=True)


# Convert public dataset to DataLoader
public_loader = DataLoader(public_data, batch_size=256, shuffle=False, num_workers=0, pin_memory=False)

# Get confidence scores, losses, and entropy
confidences, losses, entropies = get_confidence_scores(model, public_loader)

# Extract membership labels
membership_labels = np.array([sample[2] for sample in public_data])

# Create DataFrame
attack_train_df = pd.DataFrame({
    "confidence": confidences,
    "loss": losses,
    "entropy": entropies,
    "membership": membership_labels
})

print(attack_train_df.head())


100%|███████████████████████████████████████████████████████████████████████████████████| 79/79 [00:02<00:00, 37.35it/s]

   confidence       loss   entropy  membership
0    0.989788   7.835775  0.066668          18
1    0.999324  11.749366  0.006419          18
2    0.684836   5.230702  0.697159          11
3    0.998052  10.163871  0.016427          18
4    0.408767  11.488248  1.661287           1





In [4]:
!uv pip install scikit-learn

[2mUsing Python 3.12.9 environment at: /net/tscratch/people/tutorial004/ai[0m
[2mAudited [1m1 package[0m [2min 7ms[0m[0m


In [6]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score

# Split into train & validation sets
X = attack_train_df[["confidence", "loss", "entropy"]]
y = attack_train_df["membership"]

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# Train logistic regression model
attack_model = LogisticRegression(max_iter=1000)
attack_model.fit(X_train, y_train)

# Validate model
y_pred = attack_model.predict_proba(X_val)  # Keep full probability matrix
y_train = (y_train > 0).astype(int)  # Convert to 0/1
y_val = (y_val > 0).astype(int)      # Convert to 0/1


auc_score = roc_auc_score(y_val, y_pred[:, 1], multi_class="ovr")
print(f"Validation AUC: {auc_score:.4f}")


Validation AUC: 0.6539


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [7]:
# Convert private dataset to DataLoader
private_loader = DataLoader(private_data, batch_size=256, shuffle=False, num_workers=0, pin_memory=False)

# Compute features for private dataset
confidences, losses, entropies = get_confidence_scores(model, private_loader)

# Create DataFrame
attack_test_df = pd.DataFrame({
    "confidence": confidences,
    "loss": losses,
    "entropy": entropies
})

# Predict membership probability
attack_test_df["membership_score"] = attack_model.predict_proba(attack_test_df)[:, 1]

# Save submission
attack_test_df[["membership_score"]].to_csv("submission_test.csv", index=False)

print("Submission file saved as 'submission.csv'")


100%|███████████████████████████████████████████████████████████████████████████████████| 79/79 [00:01<00:00, 62.20it/s]


Submission file saved as 'submission.csv'


In [16]:
attack_test_df['score'] = attack_test_df['membership_score']

attack_test_df['ids'] = private_data.ids

attack_test_df_csv = attack_test_df[['ids', 'score']]

attack_test_df_csv.to_csv('submission_test.csv', index=False)

In [17]:
with open("./submission_test.csv", "rb") as f:
    submission = f.read()
    print(submission[:1000])

b'ids,score\n251500,2.0737709737836e-06\n188458,0.0011832338177035857\n197856,0.0006052600082675227\n195124,0.005617410652873563\n244309,0.0047739615261675385\n156489,3.244530218780111e-12\n301921,0.0007838206308935009\n104556,4.449307132976715e-05\n51198,4.88517458745045e-07\n18228,0.000796918920309298\n67500,0.23487232314509351\n40882,0.015589113263112788\n141134,9.391125995547876e-08\n165428,0.2875338813683129\n121874,8.920330646459018e-08\n111598,3.341836213271293e-10\n245332,0.6285293663222967\n155741,7.80467743660879e-13\n293665,0.0008104677153106853\n113568,4.8829541530267894e-08\n116817,9.33740151473922e-06\n222038,4.11093595625908e-07\n94598,7.776056924544796e-08\n248501,0.02298675663703747\n16315,3.772459375054173e-05\n69947,0.14425350562706088\n230764,0.04512536052208172\n253405,0.05804819636402895\n204647,0.017058276856885032\n67827,0.12091436111411591\n48938,0.004728105316820242\n35164,2.3480781038691894e-05\n280421,0.5841989396088785\n2697,8.53009649640435e-08\n26576,0.00

In [18]:
import requests

result = requests.post(
    "http://149.156.182.9:6060/task-1/submit",
    headers={"token": "i2SLZ1KbTzJeGkfPTWxE2Y53W9D0R5"},
    files={
        "csv_file": ("submission.csv", open("./submission_test.csv", "rb"))
    }
)

print(result.status_code, result.text)

200 tpr: 0.052333333333333336, auc: 0.5071444444444444
