# Neural Network With Boosted Trees Adversarial

# Library Imports

In [2]:

import tensorflow as tf
import tensorflow_decision_forests as tfdf
import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, roc_auc_score,accuracy_score, precision_score, recall_score
import pandas as pd
import numpy as np
from imblearn.over_sampling import SMOTE
#from aif360.metrics import ClassificationMetric
#from aif360.datasets import BinaryLabelDataset


# Load and Pre-processing Data Function


In [3]:
def process_and_load_data(data, target = str):
    #The input should only be a Pandas DataFrame 
    try:
        #This creates split datasets for training, testing, and validation
        #Additionally it prepares the input data sets for model fitting and predicting
        (data == pd.DataFrame)
        X = data.drop(target, axis = 1)
        y = data[target]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42)

        scaler = StandardScaler()
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)
        X_val =  scaler.transform(X_val)

        return X_train, X_test, X_val, y_train, y_test, y_val
    
    #Throws Error if Input is not a Pandas DataFrame
    except:
        print("Input needs to be a Pandas DataFrame.")

 


# Model Accuracy & Fairness Assessment Functions

## Function to test the Model's Accuracy

In [None]:
def model_assessment(input, predictions, actuals):
    
    male_indices = input[input['Gender'] == 1].index.tolist()
    female_indices = input[input['Gender'] == 2].index.tolist()

    #Overall
    overall_accuracy = accuracy_score(actuals, predictions)
    overall_recall = recall_score(actuals, predictions)
    overall_precision = precision_score(actuals, predictions)

    #Female
    female_preds = predictions[female_indices]
    female_actuals = actuals[female_indices]
    female_accuracy = accuracy_score(female_actuals, female_preds)
    female_recall = recall_score(female_actuals, female_preds)
    female_precision = precision_score(female_actuals, female_preds)

    #Male
    male_preds = predictions[male_indices]
    male_actuals = actuals[male_indices]
    male_accuracy = accuracy_score(male_actuals, male_preds)
    male_recall = recall_score(male_actuals, male_preds)
    male_precision = precision_score(male_actuals, male_preds)

    results = pd.DataFrame({
        'Group': ['Overall', 'Female', 'Male'],
        'Accuracy': [overall_accuracy, female_accuracy, male_accuracy],
        'Recall': [overall_recall, female_recall, male_recall],
        'Precision': [overall_precision, female_precision, male_precision]
    })

    return results





## Function to test the Model's Fairness between Males and Females

In [8]:
def fairness_metrics(input_df, predictions):

    binary_dataset = BinaryLabelDataset(df=input_df, 
                                    label_names=['Coronary heart disease'], 
                                    protected_attribute_names=['Gender'])

    # Create predictions dataset
    pred_df = input_df.drop(['Coronary heart disease'])
    pred_df['Coronary heart disease'] = predictions
    binary_predictions = BinaryLabelDataset(df=pred_df, 
                                            label_names=['Coronary heart disease'], 
                                            protected_attribute_names=['Gender'])

    # Compute metrics
    metric = ClassificationMetric(binary_dataset, binary_predictions, 
                                unprivileged_groups=[{'Gender': 2}], 
                                privileged_groups=[{'Gender': 1}]) 
    
    demographic_parity_difference = metric.statistical_parity_difference()
    equal_opportunity_difference = metric.equal_opportunity_difference()
    predictive_parity = metric.statistical_parity_difference()
    disparate_impact = metric.disparate_impact()


    #Output Metrics in a Pandas DataFrame
    fairness_table = pd.DataFrame({
        'Metric': ['Demographic Parity Difference', 'Equal Opportunity Difference',
                   'Predictive Parity', 'Disparate Impact'],
        'Value': [demographic_parity_difference, equal_opportunity_difference,
                  predictive_parity, disparate_impact]
    })

    return fairness_table
   


# Neural Network Models

## Simple Baseline Neural Network

In [9]:
# Simple baseline model for 30 inputs
simple_nn = Sequential()
simple_nn.add(Dense(units=32, activation='relu', input_shape=(30,)))  # Input layer
simple_nn.add(Dropout(0.3))  # Dropout to prevent overfitting
simple_nn.add(Dense(units=16, activation='relu'))  # Hidden layer
simple_nn.add(Dense(1, activation='sigmoid'))  # Output layer

# Compile the model
adam = keras.optimizers.Adam(learning_rate=0.001)
simple_nn.compile(loss='binary_crossentropy', optimizer=adam, metrics=["accuracy"])

# Summary of the model
simple_nn.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


# Accuracy Assessment