In [30]:
#@title Default title text
from typing import List
import numpy as np
import pandas as pd
import math
import random


def convert_to_tuples(lst: List) -> tuple:
    return tuple(convert_to_tuples(x) if isinstance(x, list) else x for x in lst)


def get_seed(seed: int) -> int:
    random.seed(seed)
    return random.randint(0, 2**32-1)


def extract_fingerprint(R: List[str], R_leaked: List[str], p_value: float, secret_key: str, fingerprint_template: List[int]) -> List[int]:
    
    L = len(fingerprint_template)
    c0 = [0] * L
    c1 = [0] * L

    # Convert R and R_leaked to tuples
    R = [convert_to_tuples(x) for x in R]
    R_leaked = [convert_to_tuples(x) for x in R_leaked]
    
    # Construct the fingerprintable set P
    P = set(R).intersection(set(R_leaked))

    # Iterate over each record in P
    for ri in P:
        # Set pseudorandom seed s
        s = hash(secret_key + str(get_seed(1)))

        # Generate mask bit x
        x = int(U(s) % int(1/(2*p_value)) == 0) ^ 0

        # Recover mark bit B
        t = fingerprint_template[0]
        k = fingerprint_template[1]
        B = ord(''.join(ri[t])[t]) ^ ord(''.join(ri[k])[k])

        # Recover fingerprint bit fl
        fl = x ^ B

        # Update counts c0 and c1
        l = fingerprint_template[2]
        if fl == 1:
            c1[l] += 1
        else:
            c0[l] += 1

    # Calculate the fingerprint
    fingerprint = []
    for l in range(L):
        # Add randomness to the fingerprint
        fingerprint.append(int((c1[l] + c0[l] + np.random.normal(0, 0.01)) / (len(P) + 2) > 0.5))

    # Return the extracted fingerprint
    return fingerprint


# Load dataset into pandas dataframe
df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data', header=None)

#Extract features and labels from dataframe
X = df.iloc[:, 2:].values
y = df.iloc[:, 1].values

# Convert labels to 0s and 1s
y[y == '<=50K'] = 1
y[y == '>50K'] = 0

data = df[:5000].values.tolist()
# Create list of records
R = []
R_leaked = []
for i, row in enumerate(data):
    R_row = []
    # Age
    R_row.append([bin(int(row[0]))[2:]])
    R_row.append([bin(int(row[2]))[2:]])
    R_row.append([bin(int(row[4]))[2:]])
    # R_row.append([row[13]])
    R.append(R_row)
    if(i%3 == 0):
        R_leaked.append(R_row)


# Generate fingerprinted database
K = 8
T = 1
Y = 'mysecretkey'
L = 256
p = 1 / (math.exp(T / K) + 1)

# Template value (change this according to your requirements)
template_value = [0, 2, 0]

# Generate fingerprinted dataset
fingerprinted_dataset = []
for row in R:
    fingerprint = extract_fingerprint(row, R_leaked, p, Y, template_value)
    fingerprinted_dataset.append(row + [fingerprint])
    

print("\nExtracted R_leaked:")
print(R_leaked)

print(R_fingerprinted)




Extracted R_leaked:
[[['100111'], ['10010111011001100'], ['1101']], [['110101'], ['111001010011100001'], ['111']], [['110001'], ['100111000110111011'], ['101']], [['101010'], ['100110111011011001'], ['1101']], [['10111'], ['11101110110100000'], ['1101']], [['100010'], ['111011111011101111'], ['100']], [['100110'], ['111000011010111'], ['111']], [['110110'], ['1001001110001000010'], ['1001']], [['111011'], ['11010100111010111'], ['1001']], [['110110'], ['101011111111110011'], ['1010']], [['10111'], ['101110100011110101'], ['1100']], [['11110'], ['1110101000101111'], ['1010']], [['10101'], ['110000001001010000'], ['1010']], [['110000'], ['1000000110100000101'], ['1100']], [['11000'], ['101010001110111011'], ['1101']], [['111001'], ['1010010011111100111'], ['1101']], [['101001'], ['11000110011100011'], ['1011']], [['10010'], ['110111011010001100'], ['1001']], [['101111'], ['11010110100001000'], ['1001']], [['100011'], ['1101110000100000'], ['1011']], [['11110'], ['1110100001101000'], ['1