# Code template for hand-in on reconstruction attacks.

Advanced Topics in Machine Learning, U. Copenhagen, fall 2024

Created by Rasmus Pagh with minor edits by Thomas Christensen

Queries on a hidden dataset x from {-1,+1}^100 can be made via the query method below
which calls a web API that allows dot product queries with vectors in {-1,+1}^100.
To protect data, Laplace noise is added to responses. Using the techniques you have
seen in the lecture it is possible to partially reconstruct the dataset using 200 queries.
To make sure that you get a unique challenge, choose any unique string as your challenge
identifier. The web API will keep track of the number of queries made for each identifier.

# Support function for querying the web API

In [31]:
import numpy as np
import requests as rq
import ast

def query(challenge_id, query_vector, submit=False):
    assert challenge_id.isalnum()
    assert np.max(np.minimum(np.abs(query_vector - 1), np.abs(query_vector + 1))) == 0

    if query_vector.ndim == 1:
        query_vector = query_vector.reshape(1, -1)

    payload = {'challengeid': challenge_id, 'submit': submit, 'query': str(query_vector.tolist())}
    response = rq.post("https://baconbreaker.pythonanywhere.com/query", data=payload).json()
    if submit is False:
        return np.array(ast.literal_eval(response['result']))
    else:
        return response['result']



# Making random queries to the API

In [35]:
# Fresh unique alphanumeric challenge_id each run
challenge_id = 'josephdgb111'  # letters/digits only

n = 256
m = 2 * n

rng = np.random.default_rng(42)  # feel free to try a few seeds across *different* challenge IDs
A = rng.choice([-1, +1], size=(m, n), replace=True).astype(int)

# Single API call with all 2n queries
r = query(challenge_id, A).astype(float).ravel()
print(f"Collected {len(r)} responses. A shape {A.shape}, r in [{r.min():.0f},{r.max():.0f}]")


Collected 512 responses. A shape (512, 256), r in [-133,109]


# Baseline reconstruction attack: Guess equals the best query

Copy the notebook and replace with your attack. The first submission for a given challenge_id is stored on the server and on the leaderboard: http://baconbreaker.pythonanywhere.com/leaderboard/?n=100

**Tip**: to solve a linear program you can use the following import:

```
from scipy.optimize import linprog
```



In [36]:
from scipy.optimize import linprog

# Variables: v (n), t (m)
# Minimize: sum_j t_j
# Subject to:
#   A v - t <= r
#  -A v - t <= -r
#   -1 <= v_i <= 1, t_j >= 0
n = A.shape[1]
m = A.shape[0]

c = np.concatenate([np.zeros(n), np.ones(m)])

A_ub = np.zeros((2*m, n + m))
b_ub = np.zeros(2*m)

# A v - t <= r
A_ub[:m, :n] = A
A_ub[:m, n:] = -np.eye(m)
b_ub[:m] = r

# -A v - t <= -r
A_ub[m:, :n] = -A
A_ub[m:, n:] = -np.eye(m)
b_ub[m:] = -r

bounds = [(-1.0, 1.0)]*n + [(0.0, None)]*m

res = linprog(c, A_ub=A_ub, b_ub=b_ub, bounds=bounds, method='highs')
assert res.success, f"LP failed: {res.message}"

v_hat = res.x[:n]
x_guess = np.where(v_hat >= 0, 1, -1).astype(int)
print("Standard L1 LP solved. Candidate x_guess ready.")


Standard L1 LP solved. Candidate x_guess ready.


In [37]:
result = query(challenge_id, x_guess, submit=True)

# The server typically returns the inner product <x_hat, x>.
try:
    inner = float(result)
except Exception:
    import re
    m_ = re.search(r'-?\d+', str(result))
    inner = float(m_.group()) if m_ else float('nan')

n = 256
num_correct = int((n + inner) / 2)
acc = (1.0 + inner / n) / 2.0

print("=== Reconstruction Report ===")
print(f"Inner product: {inner:.0f}")
print(f"Correct: {num_correct}/{n} ({100*acc:.2f}%)")
print(f"Challenge ID: {challenge_id}")
print(f"Leaderboard: http://baconbreaker.pythonanywhere.com/leaderboard/?n={n}")


=== Reconstruction Report ===
Inner product: 198
Correct: 227/256 (88.67%)
Challenge ID: josephdgb111
Leaderboard: http://baconbreaker.pythonanywhere.com/leaderboard/?n=256


In [8]:
import numpy as np 

def sample_laplace_noise(scale):
    return np.random.laplace(loc=0.0, scale=scale, size=None)

samples = [np.abs(sample_laplace_noise(3.0)) for _ in range(1000000)]
print(f"Mean absolute value of Laplace noise samples: {np.mean(samples):.2f}")

Mean absolute value of Laplace noise samples: 3.00
