In [0]:
import sklearn
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import f1_score
from time import time
from scipy.stats import randint as sp_randint
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import classification_report
import random
import statistics

import pandas as pd
import io
import requests
import warnings
import tensorflow as tf

from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Conv1D, MaxPooling1D, Flatten

warnings.filterwarnings("ignore")    

## Keras Sequential

In [0]:
#Due to keras not having f1 score, recall, and precision built in, it must be defined. Use keras backend for shortening code
from keras import backend as K

#Define recall by the ratio of true positive to possible positives. k.epsilon is a fuzzy constant used to prevent dividing by 0
def recall_m(y_true, y_pred):
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        return recall

#Define precision as the ratio of true positives to predicted positives
def precision_m(y_true, y_pred):
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        return precision

#Define f1 score as the ratio of the product to the sum of precision and recall, scaled by 2
def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [0]:
url="https://raw.githubusercontent.com/atfrank/CS-Annotate/master/data/train_features.csv"
s=requests.get(url).content
training_X = pd.read_csv(io.StringIO(s.decode('utf-8')), sep = ' ')
trainX = training_X.values

url = "https://raw.githubusercontent.com/atfrank/CS-Annotate/master/data/train_target.csv"
s=requests.get(url).content
training_y = pd.read_csv(io.StringIO(s.decode('utf-8')), sep = ' ')

url="https://raw.githubusercontent.com/atfrank/CS-Annotate/master/data/test_features.csv"
s=requests.get(url).content
testing_X = pd.read_csv(io.StringIO(s.decode('utf-8')), sep = ' ')
testX = testing_X.values

url = "https://raw.githubusercontent.com/atfrank/CS-Annotate/master/data/test_target.csv"
s=requests.get(url).content
testing_y = pd.read_csv(io.StringIO(s.decode('utf-8')), sep = ' ')

In [0]:
#%%capture
data = pd.DataFrame(columns = ['Structural Feature', 'Average f-1 score', 'Std Dev'])
for structural_feature in ('astack','nastack','pair','pucker_C1p_exo','pucker_C2p_endo','pucker_C2p_exo','pucker_C3p_endo','pucker_C3p_exo','pucker_C4p_exo','sasa','syn_anti'):
  #Can put data link here:
  training_y_2 = training_y[[structural_feature]]
  trainy = training_y_2.values
  testing_y_2 = testing_y[[structural_feature]]
  testy = testing_y_2.values
  # setup scaler
  temp = list()
  scaler = StandardScaler()
  scaler.fit(trainX)
  trainX_scaled = scaler.transform(trainX)
  testX_scaled = scaler.transform(testX)
  for i in range(0,2):
    model = Sequential()
    model.add(Dense(50, input_shape = (167,), activation = 'sigmoid'))
    model.add(Dense(100, activation = 'sigmoid'))
    model.add(Dense(50, activation = 'sigmoid'))
    model.add(Dense((1), activation = 'sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', f1_m,precision_m, recall_m])
    model.fit(trainX_scaled, trainy, nb_epoch = 10)
    y_true, y_pred = np.int_(testy), model.predict(testX_scaled)
    report = classification_report(y_true, y_pred.round(), output_dict=True)
    temp.append(report['weighted avg']['f1-score'])
  print(structural_feature, statistics.mean(temp), statistics.stdev(temp))
  data = data.append(pd.DataFrame([[structural_feature, statistics.mean(temp), statistics.stdev(temp)]], columns = ['Structural Feature', 'Average f-1 score', 'Std Dev']))






Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where



Epoch 1/10





Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
astack 0.7641398769098746 0.003974859716257084
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
nastack 0.8335320512639367 0.0037039546589474684
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
pair 0.8351383485735675 0.0013514311543191787
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/

In [0]:
data

Unnamed: 0,Structural Feature,Average f-1 score,Std Dev
0,astack,0.76414,0.003975
0,nastack,0.833532,0.003704
0,pair,0.835138,0.001351
0,pucker_C1p_exo,0.978156,0.0
0,pucker_C2p_endo,0.960065,0.0
0,pucker_C2p_exo,0.725277,0.0
0,pucker_C3p_endo,0.726012,0.004374
0,pucker_C3p_exo,0.967274,0.0
0,pucker_C4p_exo,0.945595,0.0
0,sasa,0.938084,0.001871
