In [51]:
import pandas as pd

# Read the training test and validation sets from data folder
X_train = pd.read_csv('../data/tra_tst_val/X_train_normalised.csv')
X_test = pd.read_csv('../data/tra_tst_val/X_test_normalised.csv')
X_val = pd.read_csv('../data/tra_tst_val/X_val_normalised.csv')

y_train = pd.read_csv('../data/tra_tst_val/y_train_normalised.csv')
y_test = pd.read_csv('../data/tra_tst_val/y_test_normalised.csv')
y_val = pd.read_csv('../data/tra_tst_val/y_val_normalised.csv')

In [25]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
import numpy as np

# Defining a simple neural network function to test different hyperparameters
def build_model(input_dim, layers, activation, optimizer_lr):
    model = Sequential()
    model.add(Dense(layers[0], input_dim=input_dim, activation=activation))
    for units in layers[1:]:
        model.add(Dense(units, activation=activation))
    model.add(Dense(1, activation='sigmoid')) # Binary classification
    model.compile(optimizer=Adam(learning_rate=optimizer_lr), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Initial hyperparameters
input_dim = X_train.shape[1] # Number of features
layers = [64, 32] # Two layers with 64 and 32 neurons
activation = 'relu'
optimizer_lr = 0.001

# Building the initial model
model = build_model(input_dim, layers, activation, optimizer_lr)

# Training the model
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=50, batch_size=10, verbose=0)

# Evaluating the model on the validation set
val_accuracy = np.max(history.history['val_accuracy'])

val_accuracy



0.7699999809265137

In [26]:
model.save('bb_model')

INFO:tensorflow:Assets written to: bb_model/assets


In [3]:
from tensorflow.keras.models import load_model

model_path = '../black_box/bb_model'

# Load the model
model = load_model(model_path)

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=2)

# Print the results
print(f'Test Loss: {test_loss}')
print(f'Test Accuracy: {test_accuracy}')

7/7 - 0s - loss: 0.6368 - accuracy: 0.6750 - 293ms/epoch - 42ms/step
Test Loss: 0.6367844939231873
Test Accuracy: 0.675000011920929


In [29]:
# Predict on training set
train_predictions = model.predict(X_train)
train_predictions_df = pd.DataFrame(train_predictions, columns=['pred_label'])
train_predictions_df.to_csv('../data/tra_tst_val/pred_train.csv', index=False)

test_predictions = model.predict(X_test)
test_predictions_df = pd.DataFrame(test_predictions)
test_predictions_df.to_csv('../data/tra_tst_val/pred_test.csv', index=False)

val_predictions = model.predict(X_val)
val_predictions_df = pd.DataFrame(val_predictions)
val_predictions_df.to_csv('../data/tra_tst_val/pred_val.csv', index=False)




In [23]:
from joblib import load

# Load the dataset
file_path = '../data/german.csv'
input_df = pd.read_csv(file_path, delimiter=';')
y = input_df.Label
input_df.drop(columns=['Label'], inplace=True)

original_input = input_df.copy()

# Features to be scaled
features_to_scale = ['Age', 'Credit', 'LoanDuration']

# Load the scaler
scaler_path = '/Users/juls/Documents/Repositories/Contesting-Black-Box-Decisions/black_box/bb_scaler.joblib'  # Update this path
scaler = load(scaler_path)
input_df[features_to_scale] = scaler.fit_transform(input_df[features_to_scale])

# Make prediction
prediction_result = model.predict(input_df)

original_input['Label'] = y
original_input['Prediction'] = prediction_result
original_input['BinaryPrediction'] = original_input['Prediction'].apply(lambda x: 1 if x >=0.5 else 0)

original_input.to_csv('../data/bb_results.csv', index=None)




In [8]:
# Toy example for the prediction of a single case
import numpy as np
from joblib import load
# columns=["Sex", "Single", "Unemployed", "Age", "Credit", "LoanDuration", "PurposeOfLoan", "InstallmentRate", "Housing"]
X_single = np.array([1,1,0,50,12000, 30, 1, 3, 2])  # Replace 'your_single_case' with your actual data
# Convert to DataFrame
input_df = pd.DataFrame([X_single], columns=["Sex", "Single", "Unemployed", "Age", "Credit", "LoanDuration",
                                               "PurposeOfLoan", "InstallmentRate", "Housing"])


original_input = input_df.copy()

# Features to be scaled
features_to_scale = ['Age', 'Credit', 'LoanDuration']

# Load the scaler
scaler_path = 'bb_scaler.joblib'  # Update this path
scaler = load(scaler_path)
input_df[features_to_scale] = scaler.fit_transform(input_df[features_to_scale])

 # Make prediction
prediction_result = model.predict(input_df)
binary_prediction = 1 if prediction_result[0] >= 0.5 else 0

# Optionally, retrieve the class with highest probability if it's a classification model
predicted_class = np.argmax(prediction_result, axis=1)

# Print the predicted results
print(f'Predicted probabilities: {prediction_result}')
print(f'Predicted class index: {predicted_class}')

Predicted probabilities: [[0.5912936]]
Predicted class index: [0]


In [52]:
original_input = X_val.copy()

# Features to be scaled
features_to_scale = ['Age', 'Credit', 'LoanDuration']

# Load the scaler
scaler_path = 'bb_scaler.joblib'  # Update this path
scaler = load(scaler_path)
X_val[features_to_scale] = scaler.fit_transform(X_val[features_to_scale])


In [53]:
# Iterate over the rows of the DataFrame and make predictions
results = []
for index, row in X_val.iterrows():
    # Reshape the row to fit model input, assuming input needs to be 1D array for a single sample
    #input_df = pd.DataFrame(np.array(row), columns=["Sex", "Single", "Unemployed", "Age", "Credit", "LoanDuration","PurposeOfLoan", "InstallmentRate", "Housing"])

    #print(input_df)
    sample = row.values.reshape(1, -1)

    # Predict the output for the single case
    prediction = model.predict(sample)

    # Optionally, retrieve the class with highest probability if it's a classification model
    predicted_class = np.argmax(prediction, axis=1)

    # Store predictions or any additional required analysis
    results.append((index, prediction.flatten(), predicted_class[0]))

# Convert results to a DataFrame for better visualization and analysis
results_df = pd.DataFrame(results, columns=['Index', 'Predictions', 'Predicted Class'])
print(results_df)

     Index   Predictions  Predicted Class
0        0  [0.45084292]                0
1        1  [0.97850955]                0
2        2  [0.65167385]                0
3        3   [0.5669112]                0
4        4   [0.6598755]                0
..     ...           ...              ...
195    195  [0.76888883]                0
196    196   [0.7240479]                0
197    197   [0.9282527]                0
198    198   [0.9121483]                0
199    199  [0.78390825]                0

[200 rows x 3 columns]


In [54]:
results_df[results_df['Predictions'] < 0.5 ]

Unnamed: 0,Index,Predictions,Predicted Class
0,0,[0.45084292],0
9,9,[0.4997557],0
13,13,[0.4506695],0
20,20,[0.33093703],0
30,30,[0.17586467],0
39,39,[0.49448648],0
44,44,[0.33672407],0
54,54,[0.24492508],0
68,68,[0.03953727],0
70,70,[0.48512974],0
