In [7]:
import tenseal as ts
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report


In [9]:
import pandas as pd

dataset1 = pd.read_csv("dataset1.csv")  
dataset2 = pd.read_csv("dataset2.csv")  

print("Party A sample:")
print(dataset1.head())

print("Party B sample:")
print(dataset2.head())


Party A sample:
          dr        id         x0         x1         x2         x3         x4  \
0  17.247447  69713556  98.573369  64.701596   4.374005  76.705994  95.553074   
1  28.013769  15727422  66.085030  21.745970  89.547778  78.822314  34.201912   
2  49.076994  79950559  93.711319  86.027265  84.941576  91.624861  96.074557   
3  57.287502  67356212  85.910043   2.725665   8.727508  66.915904  95.193664   
4  85.693853  89014544  45.959033  47.678536  33.906799  88.199954  32.407915   

          x5         x6         x7         x8         x9  
0  21.684720  52.519343  59.404645  85.862287  96.072936  
1  42.006270  44.125023  39.561638  99.037961  85.499318  
2  18.482570  35.540330  61.822073  96.355584  25.236448  
3  91.372558  76.402465  12.546064  39.474765  59.076848  
4  24.949828  80.418640  81.350156  86.486812  56.812471  
Party B sample:
         ar        id  fr
0  1.517801  69713556   0
1  4.614905  15727422   0
2  5.198290  79950559   1
3  4.323623  67356212  

In [13]:
context = ts.context(
    ts.SCHEME_TYPE.CKKS,
    poly_modulus_degree=8192,
    coeff_mod_bit_sizes=[60, 40, 40, 60]
)
context.generate_galois_keys()
context.global_scale = 2**40


In [15]:
#Each party computes their local features and encrypts them,  we simply sum all their numerical features into one feature vector
partyA_features = dataset1[['dr'] + [f'x{i}' for i in range(10)]].sum(axis=1).tolist()

# Encrypt the list of floats as a CKKS vector
encrypted_partyA = ts.ckks_vector(context, partyA_features)
partyB_features = dataset2['ar'].tolist()

encrypted_partyB = ts.ckks_vector(context, partyB_features)


In [17]:
#. Aggregator homomorphically sums the encrypted values
encrypted_sum = encrypted_partyA + encrypted_partyB


In [19]:
#Aggregator decrypts the combined sums
decrypted_sum = encrypted_sum.decrypt()
print("Decrypted combined features (first 5):", decrypted_sum[:5])



Decrypted combined features (first 5): [674.2172179401493, 633.2618885848356, 744.0918663981352, 599.9566094569293, 665.6617964323116]


In [21]:
#Use decrypted sums as features for model training
X = pd.DataFrame(decrypted_sum, columns=['combined_feature'])
y = dataset2['fr']  # fraud labels

clf = DecisionTreeClassifier()
clf.fit(X, y)

y_pred = clf.predict(X)
print(classification_report(y, y_pred))



              precision    recall  f1-score   support

           0       1.00      1.00      1.00       515
           1       1.00      1.00      1.00       485

    accuracy                           1.00      1000
   macro avg       1.00      1.00      1.00      1000
weighted avg       1.00      1.00      1.00      1000



In [23]:
y_pred = clf.predict(X)

from sklearn.metrics import classification_report
print(classification_report(y, y_pred))


              precision    recall  f1-score   support

           0       1.00      1.00      1.00       515
           1       1.00      1.00      1.00       485

    accuracy                           1.00      1000
   macro avg       1.00      1.00      1.00      1000
weighted avg       1.00      1.00      1.00      1000

