In [None]:
# Install necessary libraries
!pip install tenseal pandas scikit-learn torch torchvision --upgrade

# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
import tenseal as ts

# Load the dataset
df = pd.read_csv("/content/output_file.csv")

# Preprocess the data
df['age'] = df['age'] / 365.25
df = df.drop(columns=['Name', 'Phone Number'])

# Encode categorical columns
categorical_columns = ['gender', 'cholesterol', 'gluc', 'smoke', 'alco', 'active']
encoder = LabelEncoder()
for col in categorical_columns:
    df[col] = encoder.fit_transform(df[col])

# Normalize numerical columns
scaler = StandardScaler()
numerical_columns = ['age', 'height', 'weight', 'ap_hi', 'ap_lo']
df[numerical_columns] = scaler.fit_transform(df[numerical_columns])

# Define features and target
X = df.drop(columns=['cardio'])
y = df['cardio']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the Logistic Regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Create a TenSEAL context for CKKS
context = ts.context(
    ts.SCHEME_TYPE.CKKS,
    poly_modulus_degree=8192,
    coeff_mod_bit_sizes=[60, 40, 40, 60]
)
context.generate_galois_keys()
context.global_scale = 2**40

# Encrypt the model coefficients (weights) and intercept
encrypted_weights = ts.ckks_vector(context, model.coef_[0])
encrypted_intercept = ts.ckks_vector(context, [model.intercept_[0]])

# Encrypt the test data
encrypted_X_test = [ts.ckks_vector(context, row) for row in X_test.values]

# Batched encrypted inference
batch_size = 10  # Process 10 rows at a time
encrypted_predictions = []

for i in range(0, len(encrypted_X_test), batch_size):
    batch = encrypted_X_test[i:i+batch_size]
    for enc_row in batch:
        enc_pred = enc_row.dot(encrypted_weights)  # Dot product
        enc_pred += encrypted_intercept  # Add intercept
        encrypted_predictions.append(enc_pred)

# Decrypt predictions
decrypted_predictions = [enc_pred.decrypt() for enc_pred in encrypted_predictions]

# Convert predictions to binary classification
final_predictions = [1 if pred[0] > 0.5 else 0 for pred in decrypted_predictions]

# Print final predictions
print("Predictions:", final_predictions)


Collecting pandas
  Downloading pandas-2.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (89 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m89.9/89.9 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
Downloading pandas-2.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.1/13.1 MB[0m [31m100.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pandas
  Attempting uninstall: pandas
    Found existing installation: pandas 2.2.2
    Uninstalling pandas-2.2.2:
      Successfully uninstalled pandas-2.2.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
cudf-cu12 24.10.1 requires pandas<2.2.3dev0,>=2.0, but you have pandas 2.2.3 which is incompatible.
google-colab 1.0.0 requires pandas==2.2.2, but you have pandas 2.2.3 

In [None]:
import pandas as pd
import numpy as np
import tenseal as ts
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression

# Step 1: Load and preprocess the dataset
df = pd.read_csv("/content/output_file.csv")

# Convert to DataFrame
# Convert 'age' from days to years
df['age'] = df['age'] / 365.25

# Drop irrelevant columns
df = df.drop(columns=['Name', 'Phone Number'])

# Encode categorical columns
categorical_columns = ['gender', 'cholesterol', 'gluc', 'smoke', 'alco', 'active']
encoder = LabelEncoder()
for col in categorical_columns:
    df[col] = encoder.fit_transform(df[col])

# Normalize numerical columns
scaler = StandardScaler()
numerical_columns = ['age', 'height', 'weight', 'ap_hi', 'ap_lo']
df[numerical_columns] = scaler.fit_transform(df[numerical_columns])

# Split features and target
X = df.drop(columns=['cardio']).values
y = df['cardio'].values

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)

# Step 2: Train a Logistic Regression Model
model = LogisticRegression()
model.fit(X_train, y_train)

# Step 3: Setup Homomorphic Encryption Context
context = ts.context(
    ts.SCHEME_TYPE.CKKS,
    poly_modulus_degree=8192,
    coeff_mod_bit_sizes=[60, 40, 40, 60]
)
context.global_scale = 2**40
context.generate_galois_keys()

# Step 4: Batch Encrypt Data and Model Parameters
def encrypt_batch(data, context):
    """Encrypts a batch of data rows."""
    return [ts.ckks_vector(context, row.tolist()) for row in data]

encrypted_X_test = encrypt_batch(X_test, context)
encrypted_weights = ts.ckks_vector(context, model.coef_[0])
encrypted_bias = ts.ckks_vector(context, [model.intercept_[0]])

# Step 5: Homomorphic Inference with Batch Processing
encrypted_predictions = []
for enc_row in encrypted_X_test:
    enc_result = enc_row.dot(encrypted_weights) + encrypted_bias
    encrypted_predictions.append(enc_result)

# Step 6: Decrypt Predictions
decrypted_predictions = [enc_pred.decrypt()[0] for enc_pred in encrypted_predictions]
final_predictions = [1 if pred > 0.5 else 0 for pred in decrypted_predictions]

# Step 7: Evaluate the Results
print(f"Decrypted Predictions: {decrypted_predictions}")
print(f"Final Predictions: {final_predictions}")
accuracy = np.mean(np.array(final_predictions) == y_test)
print(f"Accuracy: {accuracy * 100:.2f}%")


In [None]:
# Install necessary libraries
!pip install tenseal pandas scikit-learn torch torchvision --upgrade


Collecting tenseal
  Downloading tenseal-0.3.15-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (8.2 kB)
Collecting pandas
  Downloading pandas-2.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (89 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m89.9/89.9 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
Downloading tenseal-0.3.15-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (4.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.8/4.8 MB[0m [31m49.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pandas-2.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.1/13.1 MB[0m [31m97.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tenseal, pandas
  Attempting uninstall: pandas
    Found existing installation: pandas 2.2.2
    Uninstalling pandas-2.2.2:
      Successfully uninstalled pandas-2.

In [None]:
import pandas as pd
import numpy as np
import tenseal as ts
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression

# Step 1: Load and preprocess the dataset
df = pd.read_csv("/content/output_file.csv")

# Convert 'age' from days to years
df['age'] = df['age'] / 365.25

# Drop irrelevant columns
df = df.drop(columns=['Name', 'Phone Number'])

# Encode categorical columns
categorical_columns = ['gender', 'cholesterol', 'gluc', 'smoke', 'alco', 'active']
encoder = LabelEncoder()
for col in categorical_columns:
    df[col] = encoder.fit_transform(df[col])

# Normalize numerical columns
scaler = StandardScaler()
numerical_columns = ['age', 'height', 'weight', 'ap_hi', 'ap_lo']
df[numerical_columns] = scaler.fit_transform(df[numerical_columns])

# Split features and target
X = df.drop(columns=['cardio']).values
y = df['cardio'].values

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)

# Step 2: Train a Logistic Regression Model
model = LogisticRegression()
model.fit(X_train, y_train)

# Step 3: Setup Homomorphic Encryption Context
context = ts.context(
    ts.SCHEME_TYPE.CKKS,
    poly_modulus_degree=8192,
    coeff_mod_bit_sizes=[60, 40, 40, 60]
)
context.global_scale = 2**40
context.generate_galois_keys()

# Step 4: Batch Encrypt Data and Model Parameters
def encrypt_batch(data, context, batch_size=500):
    """Encrypts data in batches."""
    encrypted_data = []
    for i in range(0, len(data), batch_size):
        batch = data[i:i+batch_size]
        encrypted_batch = [ts.ckks_vector(context, row.tolist()) for row in batch]
        encrypted_data.append(encrypted_batch)
    return encrypted_data

# Encrypt X_test in batches
encrypted_X_test_batches = encrypt_batch(X_test, context)

# Encrypt model weights and bias
encrypted_weights = ts.ckks_vector(context, model.coef_[0].tolist())
encrypted_bias = ts.ckks_vector(context, [model.intercept_[0]])

# Step 5: Homomorphic Inference with Batch Processing
def homomorphic_inference(encrypted_X_batches, encrypted_weights, encrypted_bias):
    encrypted_predictions = []
    for encrypted_X_batch in encrypted_X_batches:
        batch_predictions = []
        for enc_row in encrypted_X_batch:
            enc_result = enc_row.dot(encrypted_weights) + encrypted_bias
            batch_predictions.append(enc_result)
        encrypted_predictions.append(batch_predictions)
    return encrypted_predictions

# Perform inference on the encrypted test batches
encrypted_predictions_batches = homomorphic_inference(
    encrypted_X_test_batches, encrypted_weights, encrypted_bias
)

# Step 6: Decrypt Predictions
def decrypt_predictions(encrypted_predictions):
    decrypted_predictions = []
    for batch in encrypted_predictions:
        decrypted_batch = [enc_pred.decrypt()[0] for enc_pred in batch]
        decrypted_predictions.extend(decrypted_batch)
    return decrypted_predictions

# Decrypt the predictions
decrypted_predictions = decrypt_predictions(encrypted_predictions_batches)

# Step 7: Evaluate the Results
final_predictions = [1 if pred > 0.5 else 0 for pred in decrypted_predictions]
accuracy = np.mean(np.array(final_predictions) == y_test)
print(f"Accuracy: {accuracy * 100:.2f}%")


Accuracy: 67.05%


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression

# Step 1: Load and preprocess the dataset
df = pd.read_csv("/content/output_file.csv")

# Convert 'age' from days to years
df['age'] = df['age'] / 365.25

# Drop irrelevant columns
df = df.drop(columns=['Name', 'Phone Number'])

# Encode categorical columns
categorical_columns = ['gender', 'cholesterol', 'gluc', 'smoke', 'alco', 'active']
encoder = LabelEncoder()
for col in categorical_columns:
    df[col] = encoder.fit_transform(df[col])

# Normalize numerical columns
scaler = StandardScaler()
numerical_columns = ['age', 'height', 'weight', 'ap_hi', 'ap_lo']
df[numerical_columns] = scaler.fit_transform(df[numerical_columns])

# Split features and target
X = df.drop(columns=['cardio']).values
y = df['cardio'].values

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)

# Step 2: Train a Logistic Regression Model
model = LogisticRegression()
model.fit(X_train, y_train)

# Step 3: Evaluate the Model
predictions = model.predict(X_test)
accuracy = np.mean(predictions == y_test)
print(f"Accuracy: {accuracy * 100:.2f}%")


Accuracy: 71.63%


In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import tenseal as ts
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score

# Step 1: Load and preprocess the dataset
df = pd.read_csv("/content/output_file.csv")

# Convert 'age' from days to years
df['age'] = df['age'] / 365.25

# Drop irrelevant columns
df = df.drop(columns=['Name', 'Phone Number'])

# Encode categorical columns
categorical_columns = ['gender', 'cholesterol', 'gluc', 'smoke', 'alco', 'active']
encoder = LabelEncoder()
for col in categorical_columns:
    df[col] = encoder.fit_transform(df[col])

# Normalize numerical columns
scaler = StandardScaler()
numerical_columns = ['age', 'height', 'weight', 'ap_hi', 'ap_lo']
df[numerical_columns] = scaler.fit_transform(df[numerical_columns])

# Split features and target
X = df.drop(columns=['cardio']).values
y = df['cardio'].values

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)

# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)

# Step 2: Define the Simple Neural Network Model
class SimpleNN(nn.Module):
    def __init__(self, input_size):
        super(SimpleNN, self).__init__()
        self.fc = nn.Linear(input_size, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.fc(x)
        return self.sigmoid(x)

# Initialize the model, loss function, and optimizer
input_size = X_train.shape[1]
model = SimpleNN(input_size)
criterion = nn.BCELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

# Train the model
epochs = 100
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}")

# Step 3: Setup Homomorphic Encryption Context
context = ts.context(
    ts.SCHEME_TYPE.CKKS,
    poly_modulus_degree=8192,
    coeff_mod_bit_sizes=[60, 40, 40, 60]
)
context.global_scale = 2**40
context.generate_galois_keys()

# Step 4: Encrypt Data and Model Parameters
def encrypt_data(data, context):
    """Encrypts data using Tensail."""
    return [ts.ckks_vector(context, row.numpy().tolist()) for row in data]

# Encrypt training and testing data
encrypted_X_train = encrypt_data(X_train_tensor, context)
encrypted_X_test = encrypt_data(X_test_tensor, context)

# Encrypt model weights and bias
encrypted_weights = ts.ckks_vector(context, model.fc.weight.data.numpy().flatten().tolist())
encrypted_bias = ts.ckks_vector(context, model.fc.bias.data.numpy().tolist())

# Step 5: Homomorphic Inference with Batch Processing
def homomorphic_inference(encrypted_X_batches, encrypted_weights, encrypted_bias):
    encrypted_predictions = []
    for encrypted_X_batch in encrypted_X_batches:
        batch_predictions = []
        for enc_row in encrypted_X_batch:
            enc_result = enc_row.dot(encrypted_weights) + encrypted_bias
            batch_predictions.append(enc_result)
        encrypted_predictions.append(batch_predictions)
    return encrypted_predictions

# Encrypt test data in batches
def encrypt_batch(data, context, batch_size=500):
    """Encrypts data in batches."""
    encrypted_data = []
    for i in range(0, len(data), batch_size):
        batch = data[i:i+batch_size]
        encrypted_batch = [ts.ckks_vector(context, row.tolist()) for row in batch]
        encrypted_data.append(encrypted_batch)
    return encrypted_data

# Encrypt X_test in batches
encrypted_X_test_batches = encrypt_batch(X_test_tensor, context)

# Perform inference on the encrypted test batches
encrypted_predictions_batches = homomorphic_inference(
    encrypted_X_test_batches, encrypted_weights, encrypted_bias
)

# Step 6: Decrypt Predictions
def decrypt_predictions(encrypted_predictions):
    decrypted_predictions = []
    for batch in encrypted_predictions:
        decrypted_batch = [enc_pred.decrypt()[0] for enc_pred in batch]
        decrypted_predictions.extend(decrypted_batch)
    return decrypted_predictions

# Decrypt the predictions
decrypted_predictions = decrypt_predictions(encrypted_predictions_batches)

# Step 7: Evaluate the Results
final_predictions = [1 if pred > 0.5 else 0 for pred in decrypted_predictions]
accuracy = np.mean(np.array(final_predictions) == y_test)
print(f"Accuracy: {accuracy * 100:.2f}%")


Epoch [10/100], Loss: 0.6845
Epoch [20/100], Loss: 0.6818
Epoch [30/100], Loss: 0.6794
Epoch [40/100], Loss: 0.6771
Epoch [50/100], Loss: 0.6749
Epoch [60/100], Loss: 0.6729
Epoch [70/100], Loss: 0.6710
Epoch [80/100], Loss: 0.6692
Epoch [90/100], Loss: 0.6676
Epoch [100/100], Loss: 0.6660
Accuracy: 51.29%
