In [744]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import diffprivlib.models as dp

def gaussian_mech(v, sensitivity, epsilon, delta):
    return v + np.random.normal(loc=0, scale=sensitivity * np.sqrt(2*np.log(1.25/delta)) / epsilon)

maternal_health = pd.read_csv('maternalHealthDataSet.csv')

In [745]:
X = maternal_health[['Age', 'SystolicBP', 'DiastolicBP', 'BS', 'BodyTemp', 'HeartRate']]
y = maternal_health['RiskLevel']

# Split data into train/test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

## Method 3 (Model trained with original data)

In [755]:
clf = DecisionTreeClassifier()
clf.fit(X_train, y_train)

# Evaluate the model 
accuracy = clf.score(X_test, y_test)
print(f'Accuracy: {accuracy:.2f}')

Accuracy: 0.83


## Model 4 (DP model trained with original data)

In [759]:
classes = (0, 1, 2) # encoding of low-risk, mid-risk, high-risk
bounds = ([], [])
for col in X.columns:
    bounds[0].append(maternal_health[col].min())
    bounds[1].append(maternal_health[col].max())

In [760]:
dp_clf = dp.DecisionTreeClassifier(epsilon=float("inf"), bounds=bounds, classes=classes)
dp_clf.fit(X_train, y_train)

# Evaluate the model
accuracy = dp_clf.score(X_test, y_test)
print(f'Accuracy: {accuracy:.2f}')

Accuracy: 0.50
