<a href="https://colab.research.google.com/github/ramonVDAKKER/teaching-data-science-emas/blob/main/notebooks/EMAS_intro_data_science_neural_networks.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Neural networks - getting started with Keras

The notebook contains basic
illustrations corresponding to the topic <i>neural networks</i>.

# 0. Preparations:

# 0. Imports

In [None]:
# Import necessary modules
import pandas as pd
import numpy as np
import keras
from keras.layers import Dense
from keras.models import Sequential
#from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score, precision_recall_curve
from sklearn.ensemble import RandomForestClassifier
import matplotlib.pyplot as plt
%matplotlib inline
from keras import regularizers
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score

# 1. Load data

In [None]:
df = pd.read_csv("https://raw.githubusercontent.com/nsethi31/Kaggle-Data-Credit-Card-Fraud-Detection/master/creditcard.csv")

# 2. Construct train and test set

In [None]:
seed = 123
X_train, X_test, y_train, y_test = train_test_split(df.drop(columns=["Class"]), df["Class"], test_size=0.3, random_state=seed)
p = X_train.shape[1] # no. of features
print("data_train shape: {}".format(X_train.shape))
print("data_test shape: {}".format(X_test.shape))

# 3. Random forest as benchmark

In [None]:
rf = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=123)

In [None]:
rf.fit(X_train, y_train)

In [None]:
rf_scores_test = rf.predict_proba(X_test)[:, 1]

In [None]:
def draw_precision_recall(scores, y):
    precision, recall, thresholds = precision_recall_curve(y, scores)
    plt.fill_between(recall, precision, alpha=0.2, color="b")
    plt.xlabel("Recall")
    plt.ylabel("Precision")
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.0])
    plt.title("Precision-Recall curve")
    plt.show()
draw_precision_recall(rf_scores_test, y_test)

In [None]:
# AUC:
roc_auc_score(y_test, rf_scores_test)

# 4. Neural network (1)

Let us try to fit a neural network.

In [None]:
# initialize NN
nn = Sequential()
# add first layer with xxx nodes and using ReLu as activation function
nn.add(Dense(10, activation='relu', input_shape=(p,), kernel_initializer='uniform'))
# add second layer with xxx nodes and using ReLu as activation function
nn.add(Dense(10, activation='relu', kernel_initializer='uniform'))
# add output layer (binary target)
nn.add(Dense(1, activation='sigmoid'))

In [None]:
# Compile the model
nn.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# Verify that model contains information from compiling
print("Loss function: " + nn.loss)

In [None]:
X_train = X_train.astype('float64')
X_test = X_test.astype('float64')

sc = StandardScaler()
X_train_sc = sc.fit_transform(X_train)
X_test_sc = sc.transform(X_test)

In [None]:
# Fit the model
nn.fit(X_train_sc, y_train,  epochs=25, validation_split=0.3)

In [None]:
# Calculate predictions: predictions
nn_scores_test = nn.predict(X_test_sc)

In [None]:
draw_precision_recall(nn_scores_test, y_test)

In [None]:
roc_auc_score(y_test, nn_scores_test)