In [30]:
import tenseal as ts
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report


In [32]:
import pandas as pd

# Load your datasets
dataset1 = pd.read_csv("dataset1.csv")
dataset2 = pd.read_csv("dataset2.csv")

# Show the first few rows to check
print(dataset1.head())
print(dataset2.head())


          dr        id         x0         x1         x2         x3         x4  \
0  17.247447  69713556  98.573369  64.701596   4.374005  76.705994  95.553074   
1  28.013769  15727422  66.085030  21.745970  89.547778  78.822314  34.201912   
2  49.076994  79950559  93.711319  86.027265  84.941576  91.624861  96.074557   
3  57.287502  67356212  85.910043   2.725665   8.727508  66.915904  95.193664   
4  85.693853  89014544  45.959033  47.678536  33.906799  88.199954  32.407915   

          x5         x6         x7         x8         x9  
0  21.684720  52.519343  59.404645  85.862287  96.072936  
1  42.006270  44.125023  39.561638  99.037961  85.499318  
2  18.482570  35.540330  61.822073  96.355584  25.236448  
3  91.372558  76.402465  12.546064  39.474765  59.076848  
4  24.949828  80.418640  81.350156  86.486812  56.812471  
         ar        id  fr
0  1.517801  69713556   0
1  4.614905  15727422   0
2  5.198290  79950559   1
3  4.323623  67356212   0
4  1.797798  89014544   0


In [34]:
# Merge datasets on 'id'
merged = pd.merge(dataset1, dataset2, on="id")

# Party 1 features (dr + x0...x9)
party1_features = ["dr"] + [f"x{i}" for i in range(10)]
X_p1 = merged[party1_features].values

# Party 2 features: 'ar'
X_p2 = merged[["ar"]].values

# Labels (fraud flag): 'fr' from Party 2
y = merged["fr"].values


In [36]:
import tenseal as ts

def create_context():
    context = ts.context(
        ts.SCHEME_TYPE.CKKS,
        poly_modulus_degree=8192,
        coeff_mod_bit_sizes=[60, 40, 40, 60],
    )
    context.generate_galois_keys()
    context.global_scale = 2**40
    return context

context = create_context()


In [38]:
import numpy as np

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def local_gradient(X, y, weights):
    preds = sigmoid(np.dot(X, weights))
    error = preds - y
    grad = np.dot(X.T, error) / len(y)
    return grad


In [40]:
def encrypt_gradient(grad, context):
    return ts.ckks_vector(context, grad.tolist())

def decrypt_gradient(enc_grad):
    return np.array(enc_grad.decrypt())


In [42]:
weights_p1 = np.zeros(X_p1.shape[1])  # e.g., 11 features
weights_p2 = np.zeros(X_p2.shape[1])  # e.g., 1 feature


In [44]:
learning_rate = 0.1
num_rounds = 5

for round in range(num_rounds):
    print(f"Round {round + 1}")

    # Party 1 computes gradient on their features
    grad_p1 = local_gradient(X_p1, y, weights_p1)
    enc_grad_p1 = encrypt_gradient(grad_p1, context)

    # Party 2 computes gradient on their features
    grad_p2 = local_gradient(X_p2, y, weights_p2)
    enc_grad_p2 = encrypt_gradient(grad_p2, context)

    # Server aggregates encrypted gradients (homomorphic addition)
    aggregated_enc_grad = enc_grad_p1 + enc_grad_p2

    # Server decrypts aggregated gradient
    aggregated_grad = decrypt_gradient(aggregated_enc_grad)

    # Update weights for both parties
    weights_p1 -= learning_rate * aggregated_grad[: len(weights_p1)]
    weights_p2 -= learning_rate * aggregated_grad[len(weights_p1):]

    print("Weights Party 1:", weights_p1)
    print("Weights Party 2:", weights_p2)
    print()


Round 1


ValueError: non-broadcastable output operand with shape (1,) doesn't match the broadcast shape (0,)