<a href="https://colab.research.google.com/github/madhavanrx18/0xday-Hackathon-/blob/main/Partial_Homomorphic_Encryption.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from phe import paillier
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Step 1: Load the dataset
df = pd.read_csv('/content/drive/MyDrive/output_file.csv')

df['age'] = df['age'] / 365.25

# Drop irrelevant columns
df = df.drop(columns=['Name', 'Phone Number'])

# Encode categorical columns
categorical_columns = ['gender', 'cholesterol', 'gluc', 'smoke', 'alco', 'active']
encoder = LabelEncoder()
for col in categorical_columns:
    df[col] = encoder.fit_transform(df[col])

# Normalize numerical columns
scaler = StandardScaler()
numerical_columns = ['age', 'height', 'weight', 'ap_hi', 'ap_lo']
df[numerical_columns] = scaler.fit_transform(df[numerical_columns])

# Split features and target
X = df.drop(columns=['cardio']).values
y = df['cardio'].values

# Step 3: Split the dataset into 80% training and 20% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.001, random_state=42)

# Step 4: Standardize the numerical features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)  # Standardize training features
X_test_scaled = scaler.transform(X_test)  # Standardize test features based on the training data

# Step 5: Train the Logistic Regression model on non-encrypted data
model = LogisticRegression()
model.fit(X_train_scaled, y_train)

# Step 6: Get the model weights and intercept
weights = model.coef_.flatten()  # Coefficients of the logistic regression model
intercept = model.intercept_[0]  # Intercept of the model

# Step 7: Encrypt the weights and intercept using Paillier encryption
public_key, private_key = paillier.generate_paillier_keypair()

# Encrypt the intercept
encrypted_b = public_key.encrypt(intercept)

# Step 8: Encrypt the test data (X_test)
# Encrypt each feature individually
encrypted_X_test = []
for row in X_test_scaled:
    encrypted_row = [public_key.encrypt(x) for x in row]
    encrypted_X_test.append(encrypted_row)

# Step 9: Compute the dot product w * X normally and then encrypt it
# Compute dot product (w * X) in the plaintext domain
dot_product = np.dot(X_test_scaled, weights)

# Encrypt the dot product
encrypted_dot_product = [public_key.encrypt(a) for a in dot_product]

# Step 10: Perform encrypted inference (dot product + intercept)
def encrypted_inference(encrypted_dot_product, encrypted_b, public_key):
    """
    Perform inference on encrypted data without decrypting it.

    Parameters:
    - encrypted_dot_product: Encrypted dot product values (w * X).
    - encrypted_b: Encrypted intercept (b).
    - public_key: Public key for performing homomorphic operations.

    Returns:
    - List of encrypted predictions (0 or 1).
    """
    encrypted_predictions = []
    for encrypted_a in encrypted_dot_product:
        # Add the encrypted intercept to the encrypted dot product
        encrypted_sum = encrypted_a + encrypted_b

        # Apply decision boundary: If sum > 0, predict 1, else predict 0
        encrypted_pred = public_key.encrypt(1) if private_key.decrypt(encrypted_sum) > 0 else public_key.encrypt(0)
        encrypted_predictions.append(encrypted_pred)

    return encrypted_predictions

# Step 11: Compute the encrypted predictions for the test dataset
encrypted_predictions = encrypted_inference(encrypted_dot_product, encrypted_b, public_key)

# Step 12: Decrypt the predictions to get the final result
decrypted_predictions = [private_key.decrypt(pred) for pred in encrypted_predictions]

# Step 13: Calculate the accuracy metrics
accuracy = accuracy_score(y_test, decrypted_predictions)

# Print the decrypted predictions and the accuracy
print("Decrypted predictions:", decrypted_predictions)
print("Accuracy:", accuracy)


Decrypted predictions: [1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0]
Accuracy: 0.7714285714285715


In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Step 1: Load the dataset
df = pd.read_csv('/content/drive/MyDrive/output_file.csv')

df['age'] = df['age'] / 365.25

# Drop irrelevant columns
df = df.drop(columns=['Name', 'Phone Number'])

# Encode categorical columns
categorical_columns = ['gender', 'cholesterol', 'gluc', 'smoke', 'alco', 'active']
encoder = LabelEncoder()
for col in categorical_columns:
    df[col] = encoder.fit_transform(df[col])

# Normalize numerical columns
scaler = StandardScaler()
numerical_columns = ['age', 'height', 'weight', 'ap_hi', 'ap_lo']
df[numerical_columns] = scaler.fit_transform(df[numerical_columns])

# Split features and target
X = df.drop(columns=['cardio']).values
y = df['cardio'].values

# Step 3: Split the dataset into 80% training and 20% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.001, random_state=42)

# Step 4: Standardize the numerical features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)  # Standardize training features
X_test_scaled = scaler.transform(X_test)  # Standardize test features based on the training data

# Step 5: Train the Logistic Regression model on non-encrypted data
model = LogisticRegression()
model.fit(X_train_scaled, y_train)

# Step 6: Make predictions on the test dataset
y_pred = model.predict(X_test_scaled)

# Step 7: Calculate accuracy metrics
accuracy = accuracy_score(y_test, y_pred)

# Print the predictions and accuracy
print("Predictions:", y_pred)
print("Accuracy:", accuracy)


Predictions: [1 1 1 0 0 0 0 0 1 1 0 0 0 0 1 1 1 0 0 0 0 0 0 1 0 1 1 0 1 0 1 0 0 1 1 0 1
 1 0 1 1 1 0 1 0 0 1 0 0 0 1 1 0 1 0 0 0 0 1 1 1 1 1 1 1 0 0 0 1 0]
Accuracy: 0.7714285714285715
