In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
#Normal ML model to compare the accuracy with the PHE and FHE encryption domain ML model
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
 #data preprocessing
df = pd.read_csv('/content/drive/MyDrive/archive/encoded_data.csv')
df = df.drop(columns=['User_ID'])
X = df.drop(columns=['target'])
y = df['target'].values

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

y_train = y_train.astype(int)
y_test = y_test.astype(int)

model = LogisticRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)

print("Predictions:", y_pred)
print("Accuracy:", accuracy)


Predictions: [1 0 0 0 1 0 0 0 1 1 0 1 1 1 1 1 1 1 0 1 1 1 0 0 0 1 0 1 0 0 1 0 1 1 1 1 1
 0 0 0 0 1 0 1 0 0 0 1 0 1 1 1 1 0 1 0 0 1 1 0 0 1 0 1 0 0 1 1 1 0 0 1 1 1
 0 1 1 0 1 0 1 1 0 1 0 1 1 0 0 1 0 1 0 0 1 1 1 1 0 1 0 1 0 1 1 1 0 1 1 1 1
 0 1 0 1 1 0 1 0 0 1 0 0 1 0 1 1 0 1 1 1 1 1 1 1 1 1 1 0 1 1 0 1 1 0 0 1 0
 1 1 0 0 0 1 1 1 1 1 1 1 1 0 0 0 1 1 1 0 0 0 1 1 1 0 0 0 0 1 0 1 0 1 0 0 0
 1 1 1 1 0 0 1 1 1 0 1 0 1 1 1 1 0 1 0 1 0 0 0 0 1 1 0 0 0 0 0 0 1 1 0 1 0
 0 0 0 0 1 0 0 1 0 0 0 1 0 0 0 1 1 1 1 0 1 0 1 1 1 0 0 1 0 0 1 1 0 1 0 0 1
 1 0 0 1 1 0 0 0 0 1 1 1 1 0 0 0 1 1 1 0 1 1 0 1 1 1 0 0 1 0 0 0 1 0 1 0 1
 1 0 0 0]
Accuracy: 0.9666666666666667


In [None]:
#Paillier partial Homomorphic Encryption
!pip install phe
import pandas as pd
import numpy as np
from phe import paillier
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
 #data preprocessing
df = pd.read_csv('/content/drive/MyDrive/archive/encoded_data.csv')
df = df.drop(columns=['User_ID'])
X = df.drop(columns=['target'])
y = df['target'].values
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

y_train = y_train.astype(int)
y_test = y_test.astype(int)

#Training Logistic Regression Model with unencrypted  data
model = LogisticRegression()
model.fit(X_train, y_train)

weights = model.coef_.flatten()
intercept = model.intercept_[0]

public_key, private_key = paillier.generate_paillier_keypair()
encrypted_b = public_key.encrypt(intercept)

#Normal dot product of weights and Test data
dot_product = np.dot(X_test, weights)
encrypted_dot_product = [public_key.encrypt(a) for a in dot_product]

# Paillier PHE for handling addition operation on encrypted data
def encrypted_inference(encrypted_dot_product, encrypted_b, private_key):
    decrypted_predictions = []
    for encrypted_a in encrypted_dot_product:
        encrypted_sum = encrypted_a + encrypted_b
        decrypted_value = private_key.decrypt(encrypted_sum)
        decrypted_predictions.append(1 if decrypted_value > 0 else 0)
    return decrypted_predictions

decrypted_predictions = encrypted_inference(encrypted_dot_product, encrypted_b, private_key)

accuracy = accuracy_score(y_test, decrypted_predictions)
print("Decrypted predictions:", decrypted_predictions)
print(f"Accuracy: {accuracy * 100:.2f}%")


Decrypted predictions: [1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0]
Accuracy: 96.67%


In [None]:
#CKKS Fully Homomorphic Encryption
!pip install tenseal pandas scikit-learn torch torchvision --upgrade
import pandas as pd
import numpy as np
import tenseal as ts
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression

#data preprocessing
df = pd.read_csv('/content/drive/MyDrive/archive/encoded_data.csv')
df = df.drop(columns=['User_ID'])
X = df.drop(columns=['target'])
y = df['target'].values
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

y_train = y_train.astype(int)
y_test = y_test.astype(int)

#Training Logistic Regression Model with unencrypted  data
model = LogisticRegression()
model.fit(X_train, y_train)

#Setting up the encryption domain
context = ts.context(
    ts.SCHEME_TYPE.CKKS,
    poly_modulus_degree=8192, #to avoid noise after operations
    coeff_mod_bit_sizes=[60, 40, 40, 60] #used for rescaling after operations
)
context.global_scale = 2**40  #Scaling Factor to handle floating values
context.generate_galois_keys()

# Encrypting Test Data Batchwise
def encrypt_batch(data, context, batch_size=500):
    encrypted_data = []
    for i in range(0, len(data), batch_size):
        batch = data[i:i+batch_size]
        encrypted_batch = [ts.ckks_vector(context, row.tolist()) for row in batch]
        encrypted_data.append(encrypted_batch)
    return encrypted_data

encrypted_X_test_batches = encrypt_batch(X_test, context)
encrypted_weights = ts.ckks_vector(context, model.coef_[0].tolist())
encrypted_bias = ts.ckks_vector(context, [model.intercept_[0]])

# CKKS FHE for handling operation on encrypted data
def homomorphic_inference(encrypted_X_batches, encrypted_weights, encrypted_bias):
    encrypted_predictions = []
    for encrypted_X_batch in encrypted_X_batches:
        batch_predictions = []
        for enc_row in encrypted_X_batch:
            enc_result = enc_row.dot(encrypted_weights) + encrypted_bias
            batch_predictions.append(enc_result)
        encrypted_predictions.append(batch_predictions)
    return encrypted_predictions

encrypted_predictions_batches = homomorphic_inference(
    encrypted_X_test_batches, encrypted_weights, encrypted_bias
)

# Decrypt Predictions
def decrypt_predictions(encrypted_predictions):
    decrypted_predictions = []
    for batch in encrypted_predictions:
        decrypted_batch = [enc_pred.decrypt()[0] for enc_pred in batch]
        decrypted_predictions.extend(decrypted_batch)
    return decrypted_predictions

decrypted_predictions = decrypt_predictions(encrypted_predictions_batches)

final_predictions = [1 if pred > 0.5 else 0 for pred in decrypted_predictions]
accuracy = np.mean(np.array(final_predictions) == y_test)
print(f"Accuracy: {accuracy * 100:.2f}%")


Accuracy: 97.00%


In [None]:
#CKKS Fully Homomorphic Encryption on nerual network
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import tenseal as ts
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score

#data preprocessing
df = pd.read_csv('/content/drive/MyDrive/archive/encoded_data.csv')
df = df.drop(columns=['User_ID'])
X = df.drop(columns=['target'])
y = df['target'].values
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)

# Define the Simple Neural Network Model
class SimpleNN(nn.Module):
    def __init__(self, input_size):
        super(SimpleNN, self).__init__()
        self.fc = nn.Linear(input_size, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.fc(x)
        return self.sigmoid(x)

# Initialize the model, loss function, and optimizer
input_size = X_train.shape[1]
model = SimpleNN(input_size)
criterion = nn.BCELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

# Train the model
epochs = 100
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}")

#Setting up the encryption domain
context = ts.context(
    ts.SCHEME_TYPE.CKKS,
    poly_modulus_degree=8192, #to avoid noise after operations
    coeff_mod_bit_sizes=[60, 40, 40, 60] #used for rescaling after operations
)
context.global_scale = 2**40 #Scaling Factor to handle floating values
context.generate_galois_keys()

# Encrypt Data
def encrypt_data(data, context):
    return [ts.ckks_vector(context, row.numpy().tolist()) for row in data]

encrypted_X_test = encrypt_data(X_test_tensor, context)

encrypted_weights = ts.ckks_vector(context, model.fc.weight.data.numpy().flatten().tolist())
encrypted_bias = ts.ckks_vector(context, model.fc.bias.data.numpy().tolist())

# CKKS FHE for handling operation on encrypted data
def homomorphic_inference(encrypted_X_batches, encrypted_weights, encrypted_bias):
    encrypted_predictions = []
    for encrypted_X_batch in encrypted_X_batches:
        batch_predictions = []
        for enc_row in encrypted_X_batch:
            enc_result = enc_row.dot(encrypted_weights) + encrypted_bias
            batch_predictions.append(enc_result)
        encrypted_predictions.append(batch_predictions)
    return encrypted_predictions

# Encrypting Test Data Batchwise
def encrypt_batch(data, context, batch_size=500):
    encrypted_data = []
    for i in range(0, len(data), batch_size):
        batch = data[i:i+batch_size]
        encrypted_batch = [ts.ckks_vector(context, row.tolist()) for row in batch]
        encrypted_data.append(encrypted_batch)
    return encrypted_data

encrypted_X_test_batches = encrypt_batch(X_test_tensor, context)

encrypted_predictions_batches = homomorphic_inference(
    encrypted_X_test_batches, encrypted_weights, encrypted_bias
)

#Decrypt Predictions
def decrypt_predictions(encrypted_predictions):
    decrypted_predictions = []
    for batch in encrypted_predictions:
        decrypted_batch = [enc_pred.decrypt()[0] for enc_pred in batch]
        decrypted_predictions.extend(decrypted_batch)
    return decrypted_predictions

decrypted_predictions = decrypt_predictions(encrypted_predictions_batches)

final_predictions = [1 if pred > 0.5 else 0 for pred in decrypted_predictions]
accuracy = np.mean(np.array(final_predictions) == y_test)
print(f"Accuracy: {accuracy * 100:.2f}%")


Epoch [10/100], Loss: 0.6552
Epoch [20/100], Loss: 0.6213
Epoch [30/100], Loss: 0.5909
Epoch [40/100], Loss: 0.5635
Epoch [50/100], Loss: 0.5390
Epoch [60/100], Loss: 0.5168
Epoch [70/100], Loss: 0.4968
Epoch [80/100], Loss: 0.4786
Epoch [90/100], Loss: 0.4621
Epoch [100/100], Loss: 0.4470
Accuracy: 80.67%
