In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

In [108]:
import tensorflow as tf
from tensorflow.keras.activations import sigmoid, relu
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam, SGD
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler, MaxAbsScaler
from tensorflow.keras.losses import binary_crossentropy
from sklearn.metrics import confusion_matrix

tf.__version__

'2.1.0'

In [31]:
df = pd.read_csv('uci_credit_card.csv')
bfeatures = np.loadtxt('bfeatures.txt')

In [4]:
pd.options.display.max_rows = None
pd.options.display.max_columns = None

# Dense layers

## The linear algebra of dense layers

In [5]:
borrower_features = np.array([[2, 2, 43]], dtype=np.float32)

In [6]:
# initialize weights and biases
bias1 = tf.Variable(1.0, tf.float32)
weights1 = tf.Variable(tf.ones((3, 2)), tf.float32)

In [9]:
# perform matmul of borrower_features and weights1
prod1 = tf.matmul(borrower_features, weights1)

# apply sigmoid fxn to prod1
dense1 = sigmoid(prod1 + bias1)

print(f'dense1 output shape: {dense1.shape}')

dense1 output shape: (1, 2)


In [10]:
# initialize 2nd set of weights and biases
bias2 = tf.Variable(1.0)
weights2 = tf.Variable(tf.ones((2, 1)))

# perform matmul and apply activation (pred)
prod2 = tf.matmul(dense1, weights2)

pred = sigmoid(prod2 + bias2)
print(f'prediction: {pred.numpy()[0,0]}\nactual: 1')

prediction: 0.9525741338729858
actual: 1


## The low-level approach with multiple examples

In [12]:
borrower_features = tf.constant([[3, 3, 23],
                                 [2, 1, 24],
                                 [1, 1, 49],
                                 [1, 1, 49],
                                 [2, 1, 29]], dtype=tf.float32)

In [14]:
# initialize weights and bias
weights1 = tf.Variable([[-0.6, 0.6],
                        [0.8, -0.3],
                        [-0.09, -0.08]], tf.float32)
bias1 = tf.Variable([0.1], tf.float32)

In [21]:
# compute for the first dense layer
prod1 = tf.matmul(borrower_features, weights1)
dense1 = sigmoid(prod1 + bias1)

# print the shapes
print(f'shape of input: {borrower_features.shape}')
print(f'shape of weights1: {weights1.shape}')
print(f'shape of bias1: {bias1.shape}')
print(f'shape of dense1: {dense1.shape}')

shape of input: (5, 3)
shape of weights1: (3, 2)
shape of bias1: (1,)
shape of dense1: (5, 2)


## Using the dense layer operation

In [23]:
print(df.shape)
df.head()

(30000, 25)


Unnamed: 0,ID,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,PAY_5,PAY_6,BILL_AMT1,BILL_AMT2,BILL_AMT3,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,default.payment.next.month
0,1,20000.0,2,2,1,24,2,2,-1,-1,-2,-2,3913.0,3102.0,689.0,0.0,0.0,0.0,0.0,689.0,0.0,0.0,0.0,0.0,1
1,2,120000.0,2,2,2,26,-1,2,0,0,0,2,2682.0,1725.0,2682.0,3272.0,3455.0,3261.0,0.0,1000.0,1000.0,1000.0,0.0,2000.0,1
2,3,90000.0,2,2,2,34,0,0,0,0,0,0,29239.0,14027.0,13559.0,14331.0,14948.0,15549.0,1518.0,1500.0,1000.0,1000.0,1000.0,5000.0,0
3,4,50000.0,2,2,1,37,0,0,0,0,0,0,46990.0,48233.0,49291.0,28314.0,28959.0,29547.0,2000.0,2019.0,1200.0,1100.0,1069.0,1000.0,0
4,5,50000.0,1,2,1,57,-1,0,-1,0,0,0,8617.0,5670.0,35835.0,20940.0,19146.0,19131.0,2000.0,36681.0,10000.0,9000.0,689.0,679.0,0


In [37]:
borrower_features = tf.convert_to_tensor(bfeatures, tf.float32)

In [40]:
# define dense layers
dense1 = Dense(7, activation='sigmoid')(borrower_features)
dense2 = Dense(3, activation='sigmoid')(dense1)
preds = Dense(1, activation='sigmoid')(dense2)

print(f'dense1 shape: {dense1.shape}')
print(f'dense2 shape: {dense2.shape}')
print(f'pred shape: {preds.shape}')

dense1 shape: (100, 7)
dense2 shape: (100, 3)
pred shape: (100, 1)


# Activation functions

## Binary classification problems

In [56]:
bill_amounts = df[['BILL_AMT1', 'BILL_AMT2', 'BILL_AMT3']].to_numpy()
default = df[['default.payment.next.month']].to_numpy()

In [59]:
# construct input layer from features
inputs = tf.constant(bill_amounts, tf.float32)

# define dense layers
dense1 = Dense(3, activation='relu')(inputs)
dense2 = Dense(2, activation='relu')(dense1)
outputs = Dense(1, activation='sigmoid')(dense2)

# print error for first five examples
error = default[:5] - outputs.numpy()[:5]
print(f'error:\n{error}')

error:
[[1.]
 [1.]
 [0.]
 [0.]
 [0.]]


## Multiclass classification problems

In [62]:
borrower_features = df[['BILL_AMT1', 'BILL_AMT2', 'BILL_AMT3', 'BILL_AMT4', 'BILL_AMT5',
                        'PAY_AMT1', 'PAY_AMT2', 'PAY_AMT3', 'PAY_AMT4', 'PAY_AMT5']].to_numpy()

In [63]:
# construct input layer from features
inputs = tf.constant(borrower_features, tf.float32)

# dense layers
dense1 = Dense(10, activation='sigmoid')(inputs)
dense2 = Dense(8, activation='relu')(dense1)
outputs = Dense(6, activation='softmax')(dense2)

# first 5 preds
print(outputs.numpy()[:5])

[[0.18549547 0.07212923 0.2760962  0.13315901 0.11850524 0.21461488]
 [0.12895556 0.11723492 0.20758584 0.2127885  0.17234328 0.16109186]
 [0.12840065 0.10450698 0.21721505 0.20250018 0.17251293 0.17486422]
 [0.17900315 0.10597353 0.2257594  0.151632   0.14196397 0.19566797]
 [0.13613571 0.0591994  0.3965878  0.12837094 0.06669483 0.21301134]]


# Optimizers

## The dangers of local minima

In [66]:
# use momentum hehe

# Training a network in `tensorflow`

## Initialization in `tensorflow`

In [166]:
# initialize weights and biases
w1 = tf.Variable(np.random.normal(size=(23, 7)), dtype=tf.float32)
b1 = tf.Variable(tf.ones([7]))
w2 = tf.Variable(np.random.normal(size=(7, 1)), dtype=tf.float32)
b2 = tf.Variable(0.0)

## Defining the model and loss function

In [92]:
X = df.iloc[:4000, 1:24].to_numpy(np.float32)
y = df.iloc[:4000, 24].to_numpy(np.float32)

b_features, test_features, b_targets, test_targets = train_test_split(X, y,
                                                                      test_size=0.25,
                                                                      stratify=y,
                                                                      random_state=42)

In [93]:
# scale data
s = MaxAbsScaler()

b_features = s.fit_transform(b_features)
test_features = s.transform(test_features)

In [116]:
b_features = tf.convert_to_tensor(b_features, tf.float32)

In [107]:
# define the model
def model(w1, b1, w2, b2, features = b_features):
    # relu to layer1
    layer1 = relu(tf.matmul(features, w1) + b1)
    # apply dropout
    dropout = Dropout(0.25)(layer1)
    
    return sigmoid(tf.matmul(dropout, w2) + b2)

def loss_fxn(w1, b1, w2, b2, features = b_features, targets = b_targets):
    preds = model(w1, b1, w2, b2)
    
    return binary_crossentropy(targets, preds)

## Training neural networks with `tensorflow`

In [167]:
opt = Adam(0.001)

# train the model
for j in range(100):
    opt.minimize(lambda: loss_fxn(w1, b1, w2, b2),
                 var_list=[w1, b1, w2, b2])

# make preds with model
model_preds = model(w1, b1, w2, b2, test_features)

In [168]:
print(confusion_matrix(test_targets, model_preds.numpy().round()))

[[769   8]
 [219   4]]
