# HEDPriv Framework - Interactive Demo

This notebook demonstrates the complete HEDPriv pipeline:
1. Data Preprocessing
2. CKKS Homomorphic Encryption
3. Encrypted Computation
4. Differential Privacy

**Author:** Samuel Selasi  
**Date:** 2026  
**Framework Version:** 0.1.0

## 1. Setup and Imports

In [None]:
import sys
import os

# Add parent directory to path
sys.path.append(os.path.dirname(os.getcwd()))

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from src.preprocessing import DataPreprocessor
from src.ckks_encryption import CKKSEncryptor
from src.differential_privacy import GaussianMechanism
from src.hedpriv_pipeline import HEDPrivPipeline

# Set style
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)

print("✓ All imports successful!")

## 2. Data Preprocessing

In [None]:
# Initialize preprocessor
preprocessor = DataPreprocessor(random_state=42)

# Load data (synthetic for demo)
data = preprocessor.load_heart_disease_data()

print("Dataset shape:", data.shape)
print("\nFirst few rows:")
data.head()

In [None]:
# Preprocess data
X_train, X_test = preprocessor.preprocess(data, test_size=0.2)

print(f"Training set: {X_train.shape}")
print(f"Test set: {X_test.shape}")
print(f"\nFeatures: {preprocessor.get_feature_names()}")
print(f"\nSample statistics:")
print(f"  Mean: {X_train.mean():.4f}")
print(f"  Std: {X_train.std():.4f}")

## 3. CKKS Homomorphic Encryption

In [None]:
# Initialize encryptor
encryptor = CKKSEncryptor(
    poly_modulus_degree=8192,
    coeff_mod_bit_sizes=[60, 40, 40, 60],
    global_scale=2**40
)

# Create context
context = encryptor.create_context()
print("\nContext info:")
print(encryptor.get_context_info())

In [None]:
# Encrypt data (use subset for demo speed)
X_train_subset = X_train[:100]  # First 100 samples

encrypted_data = encryptor.encrypt_dataset(X_train_subset)
print(f"\nEncrypted {len(encrypted_data)} samples")

## 4. Encrypted Computation

In [None]:
# Compute encrypted mean
encrypted_mean = encryptor.encrypted_mean(encrypted_data)

# Decrypt
decrypted_mean = encryptor.decrypt_vector(encrypted_mean)
plaintext_mean = X_train_subset.mean(axis=0)

print("Mean Computation Results:")
print(f"  Plaintext:  {plaintext_mean}")
print(f"  Decrypted:  {decrypted_mean}")
print(f"  Error:      {np.linalg.norm(plaintext_mean - decrypted_mean):.6f}")

## 5. Differential Privacy

In [None]:
# Initialize DP mechanism
dp_mechanism = GaussianMechanism(epsilon=1.0, delta=1e-5)

# Add noise to decrypted mean
private_mean = dp_mechanism.add_noise_to_mean(
    decrypted_mean,
    n_samples=len(X_train_subset),
    data_range=(-3, 3)
)

print("\nPrivate Mean Results:")
print(f"  Plaintext:     {plaintext_mean}")
print(f"  HE Only:       {decrypted_mean}")
print(f"  HE + DP:       {private_mean}")
print(f"  Total Error:   {np.linalg.norm(plaintext_mean - private_mean):.6f}")

## 6. Complete Pipeline Demo

In [None]:
# Run complete pipeline
pipeline = HEDPrivPipeline(
    poly_modulus_degree=8192,
    epsilon=1.0,
    delta=1e-5
)

results = pipeline.run_complete_pipeline()

## 7. Visualization

In [None]:
# Visualize results
feature_names = preprocessor.get_feature_names()
x = np.arange(len(feature_names))
width = 0.25

fig, ax = plt.subplots(figsize=(12, 6))

ax.bar(x - width, results['plaintext_mean'], width, label='Plaintext', alpha=0.8)
ax.bar(x, results['decrypted_mean'], width, label='HE Only', alpha=0.8)
ax.bar(x + width, results['private_mean'], width, label='HE + DP', alpha=0.8)

ax.set_xlabel('Features', fontweight='bold')
ax.set_ylabel('Mean Value', fontweight='bold')
ax.set_title('Comparison: Plaintext vs HE vs HE+DP', fontsize=14, fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels(feature_names)
ax.legend()
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# Error analysis
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Plot 1: Error comparison
errors = ['HE Error', 'DP Error', 'Total Error']
values = [
    results['errors']['he_error'],
    results['errors']['dp_error'],
    results['errors']['total_error']
]

axes[0].bar(errors, values, color=['blue', 'orange', 'red'], alpha=0.7)
axes[0].set_ylabel('Mean Squared Error', fontweight='bold')
axes[0].set_title('Error Analysis', fontsize=12, fontweight='bold')
axes[0].grid(True, alpha=0.3, axis='y')

# Plot 2: Time breakdown
times = list(results['metrics'].keys())
time_values = list(results['metrics'].values())

axes[1].barh(times, time_values, color='steelblue', alpha=0.7)
axes[1].set_xlabel('Time (seconds)', fontweight='bold')
axes[1].set_title('Performance Breakdown', fontsize=12, fontweight='bold')
axes[1].grid(True, alpha=0.3, axis='x')

plt.tight_layout()
plt.show()

## 8. Privacy-Utility Tradeoff Analysis

In [None]:
# Test different epsilon values
epsilon_values = [0.1, 0.5, 1.0, 2.0, 5.0, 10.0]
errors = []

for eps in epsilon_values:
    pipeline_test = HEDPrivPipeline(epsilon=eps, delta=1e-5)
    pipeline_test.setup()
    
    # Use pre-loaded data
    encrypted_test = pipeline_test.encrypt_data(X_train[:100])
    decrypted = pipeline_test.compute_encrypted_mean(encrypted_test)
    private = pipeline_test.add_differential_privacy(decrypted, n_samples=100)
    
    error = np.linalg.norm(X_train[:100].mean(axis=0) - private)
    errors.append(error)
    print(f"ε={eps}: Error={error:.6f}")

In [None]:
# Plot privacy-utility tradeoff
plt.figure(figsize=(10, 6))
plt.plot(epsilon_values, errors, marker='o', linewidth=2, markersize=10)
plt.xlabel('Privacy Budget (ε)', fontsize=12, fontweight='bold')
plt.ylabel('Error (MSE)', fontsize=12, fontweight='bold')
plt.title('Privacy-Utility Tradeoff', fontsize=14, fontweight='bold')
plt.grid(True, alpha=0.3)
plt.xscale('log')
plt.yscale('log')

# Add annotations
for eps, err in zip(epsilon_values, errors):
    plt.annotate(f'ε={eps}', (eps, err), textcoords="offset points", 
                xytext=(0,10), ha='center', fontsize=9)

plt.tight_layout()
plt.show()

## 9. Summary and Conclusions

In [None]:
print("="*70)
print(" HEDPriv Framework - Demo Summary")
print("="*70)
print(f"\nDataset: {len(X_train)} training samples, {X_train.shape[1]} features")
print(f"\nSecurity Parameters:")
print(f"  Polynomial Modulus Degree: 8192")
print(f"  Security Level: {encryptor.context.security_level} bits")
print(f"\nPrivacy Parameters:")
print(f"  Epsilon (ε): {pipeline.dp_mechanism.epsilon}")
print(f"  Delta (δ): {pipeline.dp_mechanism.delta}")
print(f"\nAccuracy:")
print(f"  HE Error: {results['errors']['he_error']:.6f}")
print(f"  DP Error: {results['errors']['dp_error']:.6f}")
print(f"\nPerformance:")
print(f"  Total Time: {results['metrics']['total_time']:.3f}s")
print(f"  Encryption: {results['metrics']['encryption_time']:.3f}s")
print(f"  Computation: {results['metrics']['computation_time']:.3f}s")
print("\n" + "="*70)

## 10. Next Steps

1. **Experiment with different parameters**: Try different values for `poly_modulus_degree`, `epsilon`, and `delta`
2. **Test with your own data**: Load custom CSV files using `preprocessor.load_heart_disease_data('your_file.csv')`
3. **Run full experiments**: Execute `experiments/privacy_utility_tradeoff.py` and `experiments/performance_benchmark.py`
4. **Extend functionality**: Add support for other statistical queries (median, quantiles, etc.)

---

**Framework:** HEDPriv v0.1.0  
**Documentation:** See README.md and DEPLOYMENT_GUIDE.md  
**Repository:** https://github.com/yourusername/HEDPriv