In [183]:
import keras
import numpy as np
from sklearn import model_selection, preprocessing, metrics, utils
import pandas as pd
import matplotlib.pyplot as plt
import fraudutils as futils
rs = 1

### Loading Dataset

In [184]:
train, test, val = futils.load_train_test_val_dataset('../../../data/processed/give-me-some-credit/')

X_train = train.drop('SeriousDlqin2yrs', axis=1)
y_train = train['SeriousDlqin2yrs']

X_test = test.drop('SeriousDlqin2yrs', axis=1)
y_test = test['SeriousDlqin2yrs']

X_val = val.drop('SeriousDlqin2yrs', axis=1)
y_val = val['SeriousDlqin2yrs']

In [185]:
y_train = np.array([[x[0], x[1], 0] for x in keras.utils.to_categorical(y_train)])
y_test = np.array([[x[0], x[1], 0] for x in keras.utils.to_categorical(y_test)])
y_val = np.array([[x[0], x[1], 0] for x in keras.utils.to_categorical(y_val)])

### Defining sizes

In [186]:
# Input for generator network
noise_size = 100
label_size = 3
representation_size = 10
hidden_layer = 128

### Using Paper's Hyperparameters

In [187]:
adam = keras.optimizers.Adam(lr=0.00001)

### Defining Generator Model

In [188]:
gen_input_size = noise_size
# Generating input layer with functional approach
gen_input = keras.layers.Input(shape=(gen_input_size,))
# Dropbout for fun
gen_drop_layer = keras.layers.Dropout(0.1)(gen_input)
# Generating hidden layer using functional approach
gen_hidden = keras.layers.Dense(hidden_layer, activation='tanh')(gen_drop_layer)
# Generating output layer using functional approach
gen_output = keras.layers.Dense(representation_size, activation='sigmoid')(gen_hidden)
# Defining model
gen_model = keras.models.Model(inputs=gen_input, outputs=gen_output)
gen_model.compile(optimizer=adam, loss='mse', metrics=['accuracy'])

### Defining Discriminator Model

In [189]:
dis_input_size = representation_size 
# Discriminator input layer
dis_input = keras.layers.Input(shape=(representation_size,))
# Discriminator hidden layer
dis_drop_layer = keras.layers.Dropout(0.1)(dis_input)
dis_hidden = keras.layers.Dense(hidden_layer, activation='tanh')(dis_input)
# Discriminator output layer
dis_output = keras.layers.Dense(3, activation='softmax')(dis_hidden)
# Defining model
dis_model = keras.models.Model(inputs=dis_input, outputs=dis_output)
dis_model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy'])

In [190]:
# Combined Network
dis_model.trainable = False
gan_input = keras.layers.Input(shape=(noise_size,))

x = gen_model(gan_input)
gan_output = dis_model(x)

gan = keras.models.Model(inputs=gan_input, outputs=gan_output)
gan.compile(loss='categorical_crossentropy', optimizer=adam)

In [191]:
generator_losses = []
discriminator_losses = []

def train(epochs=1, batch_size=128):
    batch_count = int(X_train.shape[0] /  batch_size)
    print("Epochs: ", epochs)
    print("Batch size: ", batch_size)
    print("Batches per Epoch: ", batch_count)
    
    for e in range(1, epochs + 1):
        print('\n', '-' * 15, 'Epoch {}'.format(e), '-' * 15)
        for _ in range(batch_count):
            noise = np.random.normal(0, 1, size=[batch_size, noise_size])
            indexes = np.random.randint(0, X_train.shape[0], size=batch_size)
            real_batch = np.array(X_train)[indexes]
            generated_batch = gen_model.predict(noise)
            
            X_joined = np.concatenate([real_batch, generated_batch])
            y_joined = np.concatenate([y_train[indexes], np.array([[0, 0, 0.9]] * batch_size)])

            dis_model.trainable = True
            dis_loss = dis_model.train_on_batch(X_joined, y_joined)
            
            noise = np.random.normal(0, 1, size=[batch_size, noise_size])
            y_gen = np.array([[0.05, 0.05, 0.9]] * batch_size)
            dis_model.trainable = False
            gen_loss = gan.train_on_batch(noise, y_gen)
        generator_losses.append(gen_loss)
        discriminator_losses.append(dis_loss)

In [192]:
train(5000)

Epochs:  5000
Batch size:  128
Batches per Epoch:  703

 --------------- Epoch 1 ---------------

 --------------- Epoch 2 ---------------

 --------------- Epoch 3 ---------------

 --------------- Epoch 4 ---------------

 --------------- Epoch 5 ---------------

 --------------- Epoch 6 ---------------

 --------------- Epoch 7 ---------------

 --------------- Epoch 8 ---------------

 --------------- Epoch 9 ---------------

 --------------- Epoch 10 ---------------

 --------------- Epoch 11 ---------------

 --------------- Epoch 12 ---------------

 --------------- Epoch 13 ---------------

 --------------- Epoch 14 ---------------

 --------------- Epoch 15 ---------------

 --------------- Epoch 16 ---------------

 --------------- Epoch 17 ---------------

 --------------- Epoch 18 ---------------

 --------------- Epoch 19 ---------------

 --------------- Epoch 20 ---------------

 --------------- Epoch 21 ---------------

 --------------- Epoch 22 ---------------

 ------

KeyboardInterrupt: 

In [193]:
prediction = dis_model.predict(X_train).argmax(1)

In [194]:
train, test, val = futils.load_train_test_val_dataset('../../../data/processed/give-me-some-credit/')

X_train = train.drop('SeriousDlqin2yrs', axis=1)
y_train = train['SeriousDlqin2yrs']

X_test = test.drop('SeriousDlqin2yrs', axis=1)
y_test = test['SeriousDlqin2yrs']

X_val = val.drop('SeriousDlqin2yrs', axis=1)
y_val = val['SeriousDlqin2yrs']

In [195]:
import sklearn.metrics
sklearn.metrics.roc_auc_score(y_train, prediction)

0.6908419466391521

In [196]:
y_train

0        0
1        0
2        0
3        0
4        0
5        0
6        0
7        0
8        0
9        0
10       0
11       0
12       0
13       0
14       0
15       0
16       0
17       0
18       0
19       0
20       0
21       0
22       0
23       0
24       0
25       0
26       0
27       1
28       0
29       0
        ..
89970    0
89971    0
89972    0
89973    0
89974    0
89975    0
89976    0
89977    0
89978    0
89979    0
89980    0
89981    0
89982    0
89983    0
89984    0
89985    0
89986    0
89987    0
89988    0
89989    0
89990    0
89991    0
89992    0
89993    0
89994    0
89995    0
89996    0
89997    0
89998    0
89999    0
Name: SeriousDlqin2yrs, Length: 90000, dtype: int64