In [3]:
import pandas as pd
import numpy as np
import random
import math
from hashlib import sha256
import time
import matplotlib.pyplot as plt
import secrets
def U(seed):
    random.seed(seed)
    return random.randint(0, 2**32-1)


def generate_fingerprinted_database(R, K, T, Y, L, epsilon):
    # Calculate Bernoulli distribution parameter p
    p = 1 / (math.exp(float(epsilon) / K) + 1)

    # Construct fingerprintable set P
    P = set()
    for i, row in enumerate(R):
        for t in range(len(row)):
            for k in range(len(row[t])):
                P.add((i, t, k))

    # Generate internal ID for SP Step 2
    IDinternal = sha256(Y.encode('utf-8')).hexdigest()

    # Generate fingerprint for SP Step 3
    f = sha256((Y + IDinternal).encode('utf-8')).digest()

    # Convert fingerprint to bit array
    f = np.unpackbits(np.frombuffer(f, dtype=np.uint8))[:L]

    # Generate fingerprinted database
    start_time = time.time()
    for i, row in enumerate(R):
        for t in range(len(row)):
            for k in range(len(row[t])):
                if (i, t, k) in P:
                    # Set pseudorandom seed
                    s = sha256((Y + str(row[t][k]) + str(np.random.randint(1, 2 ** 32))).encode('utf-8')).hexdigest()
                    # Check if bit is marked for fingerprinting
                    if math.floor(1 / p) > 1 and np.random.randint(1, math.floor(1 / p)) == 1:
                        # Generate mask bit
                        x = 0 if np.random.randint(1, 3) == 1 else 1
                        # Generate fingerprint index
                        l = np.random.randint(0, L)
                        # Obtain mark bit
                        B = x ^ f[l]
                        # Insert fingerprint
                        row[t][k] ^= ord(B)
    elapsed_time = time.time() - start_time
    return elapsed_time, R


# Load dataset into pandas dataframe
df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data', header=None)

# Extract features and labels from dataframe
X = df.iloc[:, 2:].values
y = df.iloc[:, 1].values

# Convert labels to 0s and 1s
y[y == '<=50K'] = 1
y[y == '>50K'] = 0

data = df[:5000].values.tolist()

# Create list of records
R = []
for row in data:
    R_row = []
    # Age
    R_row.append([bin(int(row[0]))[2:]])
    R_row.append([bin(int(row[2]))[2:]])
    R_row.append([bin(int(row[4]))[2:]])
    R_row.append([bin(int(row[10]))[2:]])
    R_row.append([bin(int(row[11]))[2:]])
    R_row.append([bin(int(row[12]))[2:]])
    R.append(R_row)

# Generate fingerprinted databases for different epsilon values
# Generate fingerprinted databases for different epsilon values
K = 8
T = 1
Y = 'mysecretkey'
L = 256
times = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]
epsilons = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]
np.random.shuffle(R)

for epsilon in epsilons:
    R_fingerprinted = generate_fingerprinted_database(R, K, T, Y, L, epsilon)
    print(R_fingerprinted)


plt.plot(epsilons, times)
plt.xlabel('Epsilon')
plt.ylabel('Elapsed Time (seconds)')
plt.title('Fingerprinting Elapsed Time vs. Epsilon')
plt.show()



TypeError: ignored