#####Pre-requisites (Installation)

In [None]:
!pip install --upgrade pip

In [None]:
!pip install tensorflow

In [None]:
!pip install tf-nightly

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf
from tensorflow import keras

print(tf.__version__)

In [None]:
from google.colab import drive
drive.mount('/gdrive/')

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
vacand = pd.read_csv('/gdrive/My Drive/joint1c.csv',sep = ',', skipinitialspace=True)

In [None]:
vacand = pd.read_csv ('/gdrive/My Drive/joint1c.csv', sep='\s*,\s*', skipinitialspace=True, engine='python')

In [None]:
vacand

In [None]:
vacand = vacand.drop('PEPTIDE',axis = 1)
vacand

In [None]:
target_column = ['CATEGORY'] 
predictors = list(set(list(vacand.columns))-set(target_column))
vacand[predictors] = vacand[predictors]/vacand[predictors].max()
vacand.describe()

In [None]:
X = pd.DataFrame(vacand[predictors].values)
Y = pd.DataFrame(vacand[target_column].values)

In [None]:
X.shape, Y.shape

In [None]:
from sklearn.model_selection import train_test_split


X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 20)

X_train, X_val, Y_train, Y_val  = train_test_split(X_train, Y_train, test_size=0.25, random_state=20)

In [None]:
from keras.utils import to_categorical

Y_train = to_categorical(Y_train)
Y_test = to_categorical(Y_test)
Y_val = to_categorical(Y_val)

In [None]:
count_classes = Y_test.shape[1]
print(count_classes)

In [None]:
count_classes = Y_val.shape[1]
print(count_classes)

In [None]:
from sklearn.preprocessing import MinMaxScaler


In [None]:
scaler = MinMaxScaler()
scaler.fit(X_train)

X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

In [None]:
X_train.min(), X_train.max()

####Model building & Training

In [None]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from keras.models import Sequential 
from keras.layers import Dense,Dropout


In [None]:
from tensorflow.keras import layers
import tensorflow as tf

#model = Sequential()

model = tf.keras.Sequential()

# Adds a densely-connected layer with 10 units to the model:
model.add(layers.Dense(10, activation='relu'))
model.add(layers.Dropout(0.5))

# Add another:
model.add(layers.Dense(3, activation='relu'))
model.add(layers.Dropout(0.5))

# Add a layer with 2 output units:
model.add(layers.Dense(2,  activation='sigmoid'))

In [None]:
model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss='binary_crossentropy',
              metrics=['BinaryAccuracy'])

Accuracy, Loss & other results


In [None]:
history1 = model.fit(X_train, Y_train, epochs=80,validation_data=(X_val, Y_val),verbose = 0)

In [None]:
model.summary()

In [None]:
#pred_train= model.predict(X_train)
#scores = model.evaluate(X_train, Y_train, verbose=0)
#print('Accuracy on training data: {}% \n Error on training data: {}'.format(scores[1], 1 - scores[1])) 

pred_train= model.predict(X_val)
scores = model.evaluate(X_val, Y_val, verbose=0)
print('Accuracy on validation data: {}% \n Error on validation data: {}'.format(scores[1], 1 - scores[1])) 
 
pred_test= model.predict(X_test)
scores2 = model.evaluate(X_test, Y_test, verbose=0)
print('\n Accuracy on test data: {}% \n Error on test data: {}'.format(scores2[1], 1 - scores2[1]))

print('\n test loss, test acc:', scores2)

In [None]:
model.metrics_names

# list all data in history
print(history1.history.keys())

In [None]:
losses = pd.DataFrame(history1.history)
losses.plot()

In [None]:
for i in range(0,len(pred_test)):
  for j in range(0,len(pred_test[i])):
    if(pred_test[i,j]<=0.5):
      pred_test[i,j]=0
    else:
      pred_test[i,j]=1


In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(Y_test, pred_test)

In [None]:
pred_train.shape, pred_test.shape

In [None]:
Y_train.shape,Y_test.shape

In [None]:
X_train.shape, X_test.shape

In [None]:
predictions = pd.DataFrame(model.predict(X_test))
predictions

In [None]:
pred_test

In [None]:
from sklearn.metrics import classification_report

print(classification_report(Y_test,pred_test))

In [None]:
Y_test_new=[]
pred_test_new=[]
for i in Y_test:
  for j in range(0,len(i)):
    if(i[j]==1):
      Y_test_new.append(j)
for i in pred_test:
  for j in range(0,len(i)):
    if(i[j]==1):
      pred_test_new.append(j)

In [None]:
from sklearn.metrics import confusion_matrix
import sklearn.metrics as metrics
confusion_matrix = metrics.confusion_matrix(Y_test_new,pred_test_new)
confusion_matrix

In [None]:
#import matplotlib.pyplot as plt 
!pip install scikit-plot

In [None]:
import scikitplot as skplt
skplt.metrics.plot_confusion_matrix(Y_test_new,pred_test_new, normalize=True)
plt.show()

In [None]:
from sklearn.metrics import roc_curve, auc

fpr_nn,tpr_nn, thresholds = roc_curve(Y_test_new, pred_test_new)
roc_auc_nn = auc(fpr_nn,tpr_nn)
roc_auc_nn


In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(8,8))
plt.title('Receiver Operating Characteristic')
plt.plot(fpr_nn,tpr_nn, color='red',label = 'AUC = %0.2f' % roc_auc_nn)
plt.legend(loc = 'lower right')
plt.plot([0, 1], [0, 1],linestyle='--')
plt.axis('tight')
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')

In [None]:
from sklearn.metrics import average_precision_score, precision_recall_curve

average_precision = average_precision_score(Y_test_new,pred_test_new)

print('Average precision-recall score: {}'.format(average_precision))

In [None]:
precision, recall, _ = precision_recall_curve(Y_test_new,pred_test_new)

plt.step(recall, precision, color='b', alpha=0.2,
         where='post')
plt.fill_between(recall, precision, step='post', alpha=0.2,
                 color='b')

plt.xlabel('Recall')
plt.ylabel('Precision')
plt.ylim([0.0, 1.05])
plt.xlim([0.0, 1.0])
plt.title('2-class Precision-Recall curve: AP={0:0.2f}'.format(
          average_precision))

In [None]:
!mkdir -p saved_model
model.save('saved_mymodel/my_model') 