# Neural Nets

Notebook by MacKenzye Leroy exploring basic neural nets with our capstone data <br>
1/26/21

In [1]:
#Import
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_probability as tfp
from platform import python_version

In [2]:
#check versions

In [3]:
python_version() #I was using 3.8.8

'3.8.8'

In [4]:
tf.__version__ #I was using 2.7.0

'2.7.0'

In [5]:
#Set directory

In [6]:
os.chdir('/Users/mackenzyeleroy/Documents/Capstone/Getting_data_for_FSL')

In [7]:
#Function for getting precitions, true positive, true negative,false posisitve, false negative, accuracy, recall and precsion
def getTestResults(fitted_model, test_df, test_labels):
    result = pd.DataFrame(fitted_model.predict(test_df), test_labels).reset_index()

    result['prediction'] = (result[0] > .5).astype(int)
    result = result.rename(columns = {0: 'score', 'index': 'outcome'})
    result['correct'] = result['outcome'] == result['prediction']
    result['true_positive'] = result[result['prediction'] == 1]['prediction'] == result[result['prediction'] == 1]['correct']
    result['false_positive'] = result[result['prediction'] == 1]['prediction'] != result[result['prediction'] == 1]['correct']
    result['true_negative'] = result[result['prediction'] == 0]['prediction'] == 1- result[result['prediction'] == 0]['correct']
    result['false_negative'] = result[result['prediction'] == 0]['prediction'] == result[result['prediction'] == 0]['correct']

    result = result.fillna(False)

    #TP, FP, FN, TN
    true_postive= round(result.true_positive.sum(), 4)
    false_postive = round(result.false_positive.sum(), 4)
    true_negative = round(result.true_negative.sum(), 4)
    false_negative = round(result.false_negative.sum(), 4)

    #Accuracy/Preciosn/Recall
    accuracy = round(result.correct.sum()/len(result), 4)
    precision = round(true_postive/(true_postive + false_postive), 4)
    recall = round(true_postive/(true_postive + false_negative), 4)
    
    print(f"""Accuracy: {accuracy} \n
            True Positive: {true_postive} False Positive: {false_postive} \n
            False Negative: {false_negative} True negative: {true_negative} \n
            Precision {precision} recall {recall}""")

First, looking at a 2-D Neural Net (aka one parameter)

In [8]:
#define Model

BATCH_SIZE = 5
def get_basic_model():
    model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape = (199, )),
    #tf.keras.layers.Dense(128, activation='sigmoid'),
    tf.keras.layers.Dense(56, activation='sigmoid'),
    tf.keras.layers.Dense(28, activation='sigmoid'),
    tf.keras.layers.Dense(1)
  ])

    model.compile(optimizer='adam',
                loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                metrics=['accuracy'])
    return model

Notes about model:
* First layer flattens data into 1-D vector
* Second layer (1st hidden) is comprised of 56 nodes and the activation function is a sigmoid
* Third layer (2nd hidden) is comprised of 28 nodes and the activation function is a sigmoid
* Final ouptput layer is one node


While the sigmoid function is less popular today (was very popular in 90's) it worked better for me. 

In [9]:
#Initalize dataframe to save results
result = pd.DataFrame(columns = ['Parameter', 'Training Loss', 'Training Accuracy'])

#function for readinf in files and getting into manner that's ready for model defined above
def importAndPrepFiles(parameter_number):   
    #file names
    test_file = f"PARM{parameter_number}_TEST.tsv"
    train_file = f"PARM{parameter_number}_TRAIN.tsv"
    
    #read in as a pandas dataframe
    test_dataset = pd.read_csv(test_file, sep='\t', header = None)
    train_dataset = pd.read_csv(train_file, sep='\t', header = None)

    #change names to strings because tensorflow doesn't like woring with unnamed indexed columns
    column_names = ['f' + str(x) for x in train_dataset.columns]
    column_names[0] = 'outcome'

    #change column names
    train_dataset.columns = column_names
    test_dataset.columns = column_names

    #change -1 (fault) to 0-Tensorflow's Binary Corss Entropy Function needs 0/1
    train_dataset['outcome'] = train_dataset['outcome'].replace(-1, 0)
    test_dataset['outcome'] = test_dataset['outcome'].replace(-1, 0)

    #pop labels off and save separetely
    target_train = train_dataset.pop('outcome')
    target_test = test_dataset.pop('outcome')
    
    #convert datasets and labels to tensors
    train_dataset = tf.convert_to_tensor(train_dataset)
    target_train = tf.convert_to_tensor(target_train)
    test_dataset = tf.convert_to_tensor(test_dataset)
    target_test = tf.convert_to_tensor(target_test)
    
    #return list
    return [train_dataset, target_train, test_dataset, target_test]

In [10]:
#the following code reads in each parameter dataframe, fits a neural net with the data and return the accuracy and loss (on the training set)
#Note: TensorFlow doesn't like me running model.fit in a for loop, so a warning may be thrown

for x in range(32):
    
    #import/prep files
    tensorList = importAndPrepFiles(x)
    
    train_dataset = tensorList[0]
    target_train = tensorList[1]
    #initiaite model
    model = get_basic_model()

    model.fit(train_dataset, target_train, epochs=50, verbose = 0)
    train_loss, train_acc = model.evaluate(train_dataset, target_train, verbose = 0)
    result.loc[x] = [x, train_loss, train_acc]
    print(f"Parameter: {x} Training Loss: {train_loss} Training Accuracy: {train_acc}")
    

Parameter: 0 Training Loss: 0.5639533400535583 Training Accuracy: 0.6865671873092651
Parameter: 1 Training Loss: 0.6904732584953308 Training Accuracy: 0.46268656849861145
Parameter: 2 Training Loss: 0.5519682168960571 Training Accuracy: 0.7164179086685181
Parameter: 3 Training Loss: 0.23164735734462738 Training Accuracy: 1.0
Parameter: 4 Training Loss: 0.5672956705093384 Training Accuracy: 0.7164179086685181
Parameter: 5 Training Loss: 0.24158737063407898 Training Accuracy: 1.0
Parameter: 6 Training Loss: 0.5977708101272583 Training Accuracy: 0.7164179086685181
Parameter: 7 Training Loss: 0.32627204060554504 Training Accuracy: 0.89552241563797
Parameter: 8 Training Loss: 0.5583853721618652 Training Accuracy: 0.46268656849861145
Parameter: 9 Training Loss: 0.5682279467582703 Training Accuracy: 0.46268656849861145
Parameter: 10 Training Loss: 0.5658130645751953 Training Accuracy: 0.46268656849861145
Parameter: 11 Training Loss: 0.5834546089172363 Training Accuracy: 0.46268656849861145
Pa

In [11]:
#check for lowest loss
result['Training Loss'].argmin()

3

In [12]:
#check for highest accuracy
result['Training Accuracy'].argmax()

3

In [13]:
#Parameters 3 and 5 look good so let's check those indvidually

In [14]:
#import and prep files
parameter3_tensors = importAndPrepFiles(3)
parameter5_tensors = importAndPrepFiles(5)

In [15]:
#establish training and test sets for parameter 3
train_dataset_param3 = parameter3_tensors[0]
target_train_param3 = parameter3_tensors[1]
test_dataset_param3 = parameter3_tensors[2]
target_test_param3 = parameter3_tensors[3]

model = get_basic_model()

model.fit(train_dataset_param3, target_train_param3, epochs=50, verbose = 0)
#check training accuracy/loss
train_loss, train_acc = model.evaluate(train_dataset_param3, target_train_param3, verbose = 2)

3/3 - 0s - loss: 0.2062 - accuracy: 1.0000 - 102ms/epoch - 34ms/step


In [16]:
#check against test set

In [17]:
test_loss, test_acc = model.evaluate(test_dataset_param3, target_test_param3, verbose = 2)

2/2 - 0s - loss: 0.5961 - accuracy: 0.6364 - 19ms/epoch - 10ms/step


In [18]:
getTestResults(model, test_dataset_param3,target_test_param3)

Accuracy: 0.6364 

            True Positive: 4 False Positive: 2 

            False Negative: 10 True negative: 17 

            Precision 0.6667 recall 0.2857


In [19]:
#Repeat for parameter 5:

In [20]:
#establish training and test sets for parameter 3
train_dataset_param5 = parameter5_tensors[0]
target_train_param5 = parameter5_tensors[1]
test_dataset_param5 = parameter5_tensors[2]
target_test_param5 = parameter5_tensors[3]

model = get_basic_model()

model.fit(train_dataset_param5, target_train_param5, epochs=50, verbose = 0)
#check training accuracy/loss
train_loss, train_acc = model.evaluate(train_dataset_param5, target_train_param5, verbose = 2)

3/3 - 0s - loss: 0.1491 - accuracy: 1.0000 - 101ms/epoch - 34ms/step


In [21]:
#check against test set

In [22]:
test_loss, test_acc = model.evaluate(test_dataset_param3, target_test_param3, verbose = 2)

2/2 - 0s - loss: 0.6839 - accuracy: 0.6364 - 18ms/epoch - 9ms/step


## 3D

In [23]:
## Load in each csv as dataframe and save it to list. 

training_df = []
test_df = []
for x in range(32):
    
    #file names
    test_file = f"PARM{x}_TEST.tsv"
    train_file = f"PARM{x}_TRAIN.tsv"
    
    #read in as dataframe
    test_dataset = pd.read_csv(test_file, sep='\t', header = None)
    train_dataset = pd.read_csv(train_file, sep='\t', header = None)
    
    #change names to strings because tensorflow doesn't like woring with unnamed indexed columns
    column_names = ['f' + str(x) for x in train_dataset.columns]
    column_names[0] = 'outcome'
   
    #change column names
    train_dataset.columns = column_names
    test_dataset.columns = column_names
    
    #change -1 (fault) to 0-Tensorflow's Binary Corss Entropy Function needs 0/1
    train_dataset['outcome'] = train_dataset['outcome'].replace(-1, 0)
    test_dataset['outcome'] = test_dataset['outcome'].replace(-1, 0)
    
    #pop labels off and save separetely
    target_train = train_dataset.pop('outcome')
    target_test = test_dataset.pop('outcome')
    
    #append to list
    training_df.append(train_dataset)
    test_df.append(test_dataset)

In [24]:
#convert lists of dataframes to 3d array
training_3d = np.array(training_df)
test_3d = np.array(test_df)


In [25]:
#swap axes-need 67 (number of observations) in x position
training_3d = np.swapaxes(training_3d,0,1)
test_3d = np.swapaxes(test_3d,0,1)

In [26]:
#convert to tensor and check shape
training_3d = tf.convert_to_tensor(training_3d)
training_3d.shape

TensorShape([67, 32, 199])

In [27]:
#convert to tensor and check shape
test_3d = tf.convert_to_tensor(test_3d)
test_3d.shape

TensorShape([33, 32, 199])

In [28]:
#Define Model of 3d Input

BATCH_SIZE = 5
def get_basic_model_3d():
    model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape = (32, 199, )),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(56, activation='relu'),
    tf.keras.layers.Dense(28, activation='relu'),
    tf.keras.layers.Dense(1)
  ])

    model.compile(optimizer='adam',
                loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                metrics=['accuracy',])
    return model

Notes about Model:
* First layer is a flatten layer again, but this time flattens across an extra dimension (resulting vector is 400K+ elements long
* Second layer is comprised of 128 nodes with a relu activation function
* Final layers are the same as 2D model above as far as nodes go, but activition function is relu now

For the 3d model, relu worked better than sigmoid this time

In [29]:
#Really good Result Model: (with 200 epochs)
#def get_basic_model_3d():
#    model = tf.keras.Sequential([
#    tf.keras.layers.Flatten(input_shape = (32, 199, )),
#    tf.keras.layers.Dense(128, activation='relu'),
#    tf.keras.layers.Dense(56, activation='relu'),
#    tf.keras.layers.Dense(28, activation='relu'),
#    tf.keras.layers.Dense(1)
#  ])

#    model.compile(optimizer='adam',
#                loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
#                metrics=['accuracy',])
#    return model

In [30]:
BATCH_SIZE = 20

#initiate model
model = get_basic_model_3d()

#fit model
model.fit(training_3d, target_train, epochs=200, verbose = 0, batch_size = BATCH_SIZE)

#check training loss and accuracy
train_loss, train_acc = model.evaluate(training_3d, target_train, verbose = 2)

3/3 - 0s - loss: 7.4548e-10 - accuracy: 1.0000 - 112ms/epoch - 37ms/step


#### Training:

In [31]:
#get full results for training
getTestResults(model, training_3d, target_train)

Accuracy: 1.0 

            True Positive: 36 False Positive: 0 

            False Negative: 0 True negative: 31 

            Precision 1.0 recall 1.0


#### Test:

In [32]:
test_loss, test_acc = model.evaluate(test_3d, target_test, verbose = 2)

2/2 - 0s - loss: 181.5024 - accuracy: 0.5758 - 19ms/epoch - 10ms/step


In [33]:
getTestResults(model, test_3d, target_test)

Accuracy: 0.5758 

            True Positive: 6 False Positive: 6 

            False Negative: 8 True negative: 13 

            Precision 0.5 recall 0.4286
