In [29]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/company-bankruptcy-prediction/data.csv


In [30]:
from keras.layers import BatchNormalization, Dense, Dropout
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from sklearn import metrics   
from sklearn.model_selection import KFold

In [31]:
df = pd.read_csv('../input/company-bankruptcy-prediction/data.csv')
Y = df['Bankrupt?']
X = df.drop(['Bankrupt?'], axis=1)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state = 42)

In [32]:
inputs = np.concatenate((X_train, X_test), axis=0)
targets = np.concatenate((Y_train, Y_test), axis=0)

acc_per_fold = []
loss_per_fold = []

fold_no = 1
num_folds = 10

kfold = KFold(n_splits=num_folds, shuffle=True)

# K-fold Cross Validation model evaluation
for train, test in kfold.split(inputs, targets):

    # Cool model stuff (Did some off notebook tweaking to get the best model!)
    model = Sequential()

    model.add(Dense(64, activation='relu', input_shape=(95,)))
    model.add(BatchNormalization())
    model.add(Dropout(0.25))

    model.add(Dense(64, activation='relu')) 
    model.add(BatchNormalization())
    model.add(Dropout(0.25))

    model.add(Dense(1, activation='sigmoid'))
    

    checkpoint = ModelCheckpoint('BankruptcyModel.h5',
                             monitor='val_loss',
                             mode='min',
                             save_best_only=True,
                             verbose=1)
    earlystop = EarlyStopping(monitor='val_loss',
                          min_delta=0,
                          patience=25,
                          verbose=1,
                          restore_best_weights=True
                          )
    reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                              factor=0.2,
                              patience=3,
                              verbose=1,
                              min_delta=0.0001)
    # This will help with accuracy
    callbacks = [earlystop,checkpoint,reduce_lr]
    
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

    # Asthetics
    print('------------------------------------------------------------------------')
    print(f'Training Fold {fold_no} / {num_folds}')

    history = model.fit(inputs[train], targets[train],
              epochs=25,
              callbacks = callbacks,
              verbose=1)

    # Generalization metrics
    scores = model.evaluate(inputs[test], targets[test], verbose=1)
    
    print(f'Score for Fold {fold_no}: {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1]*100}%')
    acc_per_fold.append(scores[1] * 100)
    loss_per_fold.append(scores[0])

    fold_no = fold_no + 1

------------------------------------------------------------------------
Training Fold 1 / 10
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Score for Fold 1: loss of 0.11468835920095444; accuracy of 97.3607063293457%
------------------------------------------------------------------------
Training Fold 2 / 10
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Score for Fold 2: loss of 0.14338241517543793; accuracy of 96.77419066429138%
------------------------------------------------------------------------
Tr

Score for Fold 3: loss of 0.11359000205993652; accuracy of 97.06745147705078%
------------------------------------------------------------------------
Training Fold 4 / 10
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Score for Fold 4: loss of 0.13220366835594177; accuracy of 96.62756323814392%
------------------------------------------------------------------------
Training Fold 5 / 10
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Score for Fold 5: loss of 0.14365985989570618; accuracy of 96.187680959701

Epoch 25/25
Score for Fold 6: loss of 0.14040081202983856; accuracy of 96.334308385849%
------------------------------------------------------------------------
Training Fold 7 / 10
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Score for Fold 7: loss of 0.14629295468330383; accuracy of 96.48093581199646%
------------------------------------------------------------------------
Training Fold 8 / 10
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Score for Fold 8: loss of 0.1412932276725769; accuracy of 96.920

Epoch 23/25
Epoch 24/25
Epoch 25/25
Score for Fold 9: loss of 0.13862299919128418; accuracy of 96.62756323814392%
------------------------------------------------------------------------
Training Fold 10 / 10
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Score for Fold 10: loss of 0.1315024346113205; accuracy of 96.91630005836487%


In [33]:
# Asthetics again!
print('------------------------------------------------------------------------')
print('Score per Fold')
print('------------------------------------------------------------------------')
# Print for how many ever folds
for i in range(0, len(acc_per_fold)):
    #print('------------------------------------------------------------------------')
    print(f'> Fold {i+1} - Loss: {loss_per_fold[i]} - Accuracy: {acc_per_fold[i]}%')
print('------------------------------------------------------------------------')
# This'll give us a good idea of the leeway for our accuracy
print('Average scores for all folds:')
print(f'> Accuracy: {np.mean(acc_per_fold)} (+- {np.std(acc_per_fold)})')
print(f'> Loss: {np.mean(loss_per_fold)}')
print('------------------------------------------------------------------------')
lowAvg = (np.mean(acc_per_fold)) - (np.std(acc_per_fold))
highAvg = (np.mean(acc_per_fold)) + (np.std(acc_per_fold))
lowAvg = (round(lowAvg, 2))
highAvg = (round(highAvg, 2))
print(f'> Expect anywhere from: {lowAvg}% - {highAvg}% model accuracy')

------------------------------------------------------------------------
Score per Fold
------------------------------------------------------------------------
> Fold 1 - Loss: 0.11468835920095444 - Accuracy: 97.3607063293457%
> Fold 2 - Loss: 0.14338241517543793 - Accuracy: 96.77419066429138%
> Fold 3 - Loss: 0.11359000205993652 - Accuracy: 97.06745147705078%
> Fold 4 - Loss: 0.13220366835594177 - Accuracy: 96.62756323814392%
> Fold 5 - Loss: 0.14365985989570618 - Accuracy: 96.18768095970154%
> Fold 6 - Loss: 0.14040081202983856 - Accuracy: 96.334308385849%
> Fold 7 - Loss: 0.14629295468330383 - Accuracy: 96.48093581199646%
> Fold 8 - Loss: 0.1412932276725769 - Accuracy: 96.92082405090332%
> Fold 9 - Loss: 0.13862299919128418 - Accuracy: 96.62756323814392%
> Fold 10 - Loss: 0.1315024346113205 - Accuracy: 96.91630005836487%
------------------------------------------------------------------------
Average scores for all folds:
> Accuracy: 96.72975242137909 (+- 0.3344307178376142)
> Loss

In [34]:
# Nice! This avg acc tell's us our general for the entire dataset so that we aren't just getting 99% in one split & 20% in the other. Fun Stuff :)\

In [None]:
inputs = np.concatenate((X_train, X_test), axis=0)
targets = np.concatenate((Y_train, Y_test), axis=0)

acc_per_fold = []
loss_per_fold = []

fold_no = 1
num_folds = 25

kfold = KFold(n_splits=num_folds, shuffle=True)

# K-fold Cross Validation model evaluation
for train, test in kfold.split(inputs, targets):

    # Cool model stuff (Did some off notebook tweaking to get the best model!)
    model = Sequential()

    model.add(Dense(64, activation='relu', input_shape=(95,)))
    model.add(BatchNormalization())
    model.add(Dropout(0.25))

    model.add(Dense(64, activation='relu')) 
    model.add(BatchNormalization())
    model.add(Dropout(0.25))

    model.add(Dense(1, activation='sigmoid'))
    

    checkpoint = ModelCheckpoint('BankruptcyModel.h5',
                             monitor='val_loss',
                             mode='min',
                             save_best_only=True,
                             verbose=1)
    earlystop = EarlyStopping(monitor='val_loss',
                          min_delta=0,
                          patience=25,
                          verbose=1,
                          restore_best_weights=True
                          )
    reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                              factor=0.2,
                              patience=3,
                              verbose=1,
                              min_delta=0.0001)
    # This will help with accuracy
    callbacks = [earlystop,checkpoint,reduce_lr]
    
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

    # Asthetics
    print('------------------------------------------------------------------------')
    print(f'Training Fold {fold_no} / {num_folds}')

    history = model.fit(inputs[train], targets[train],
              epochs=25,
              callbacks = callbacks,
              verbose=1)

    # Generalization metrics
    scores = model.evaluate(inputs[test], targets[test], verbose=1)
    
    print(f'Score for Fold {fold_no}: {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1]*100}%')
    acc_per_fold.append(scores[1] * 100)
    loss_per_fold.append(scores[0])

    fold_no = fold_no + 1

------------------------------------------------------------------------
Training Fold 1 / 25
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Score for Fold 1: loss of 0.08270473778247833; accuracy of 97.43589758872986%
------------------------------------------------------------------------
Training Fold 2 / 25
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Score for Fold 2: loss of 0.06459654867649078; accuracy of 98.90109896659851%
------------------------------------------------------------------------
T

Score for Fold 3: loss of 0.17094926536083221; accuracy of 95.60439586639404%
------------------------------------------------------------------------
Training Fold 4 / 25
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Score for Fold 4: loss of 0.1298188865184784; accuracy of 97.0695972442627%
------------------------------------------------------------------------
Training Fold 5 / 25
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Score for Fold 5: loss of 0.20627889037132263; accuracy of 95.9706962108612%

Epoch 25/25
Score for Fold 6: loss of 0.07267742604017258; accuracy of 98.16849827766418%
------------------------------------------------------------------------
Training Fold 7 / 25
Epoch 1/25


In [None]:
# Asthetics again!
print('------------------------------------------------------------------------')
print('Score per Fold')
print('------------------------------------------------------------------------')
# Print for how many ever folds
for i in range(0, len(acc_per_fold)):
    #print('------------------------------------------------------------------------')
    print(f'> Fold {i+1} - Loss: {loss_per_fold[i]} - Accuracy: {acc_per_fold[i]}%')
print('------------------------------------------------------------------------')
# This'll give us a good idea of the leeway for our accuracy
print('Average scores for all folds:')
print(f'> Accuracy: {np.mean(acc_per_fold)} (+- {np.std(acc_per_fold)})')
print(f'> Loss: {np.mean(loss_per_fold)}')
print('------------------------------------------------------------------------')
lowAvg = (np.mean(acc_per_fold)) - (np.std(acc_per_fold))
highAvg = (np.mean(acc_per_fold)) + (np.std(acc_per_fold))
lowAvg = (round(lowAvg, 2))
highAvg = (round(highAvg, 2))
print(f'> Expect anywhere from: {lowAvg}% - {highAvg}% model accuracy')

In [None]:
# And Voila! Increased fold number: Got better generalization, similar average as last time, with a wider range of accuracy :)\