In [1]:
import numpy as np
from scipy.sparse import load_npz

np.set_printoptions(threshold=np.nan)

In [2]:
from sklearn.model_selection import KFold
from sklearn.svm import LinearSVC
from sklearn.calibration import CalibratedClassifierCV
from sklearn import metrics

In [3]:
from imblearn.over_sampling import RandomOverSampler, SMOTE

### Reading data

In [4]:
Xtrain = load_npz('features_silico_duplicated.npz')
Xtrain

<151627x1676 sparse matrix of type '<class 'numpy.float64'>'
	with 1901453 stored elements in Compressed Sparse Row format>

In [5]:
Ytrain = np.load('classes_silico_duplicated.npy')
Ytrain.shape

(151627, 71)

In [6]:
Xtest = load_npz('features_inga_dropped.npz')
Xtest

<842x1676 sparse matrix of type '<class 'numpy.float64'>'
	with 14100 stored elements in Compressed Sparse Row format>

In [7]:
Ytest = np.load('classes_inga.npy')
Ytest.shape

(842, 71)

### Training Model

In [8]:
svc = CalibratedClassifierCV(LinearSVC(penalty='l2'))

In [9]:
def predicti(clf, Xtrain, Xtest, ytrain, ytest, threshold):
    
    if np.unique(ytrain).size == 1:
        ytrainpred = np.full(ytrain.shape, ytrain[0])
        ytestpred = np.full(ytest.shape, ytrain[0])
    else:
        ros = RandomOverSampler()
        Xtrain_resampled, ytrain_resampled = ros.fit_sample(Xtrain, ytrain)
        clf.fit(Xtrain_resampled, ytrain_resampled)
#         ytrainpred = clf.predict(Xtrain)
#         ytestpred = clf.predict(Xtest)
        ytrainprob = clf.predict_proba(Xtrain)
        ytestprob = clf.predict_proba(Xtest)
        ytrainpred = (ytrainprob[:,1]>=threshold).astype('int')
        ytestpred = (ytestprob[:,1]>=threshold).astype('int')
    
    return ytrainpred, ytestpred

In [10]:
def predict(clf, Xtrain, Xtest, Ytrain, Ytest, threshold=0.5):
    accuracytrain = []
    accuracytest = []
    hammingtrain = []
    hammingtest = []
    f1train = []
    f1test = []
    precisiontrain = []
    precisiontest = []
    recalltrain = []
    recalltest = []

    Ytrainpred_arr = []
    Ytestpred_arr = []

    for i in range(71):

        ytrain = Ytrain[:,i]
        ytest = Ytest[:,i]

        ytrainpred, ytestpred = predicti(clf, Xtrain, Xtest, ytrain, ytest, threshold=threshold)

        Ytrainpred_arr.append(ytrainpred)
        Ytestpred_arr.append(ytestpred)

    Ytrainpred = np.array(Ytrainpred_arr).T
    Ytestpred = np.array(Ytestpred_arr).T

    print('Accuracy: \t \t {} \t {}'.format(metrics.accuracy_score(Ytrain, Ytrainpred), 
                                            metrics.accuracy_score(Ytest, Ytestpred)))
    print('Hamming: \t \t {} \t {}'.format(1 - metrics.hamming_loss(Ytrain, Ytrainpred), 
                                           1 - metrics.hamming_loss(Ytest, Ytestpred)))
    print('Precision: \t \t {} \t {}'.format(metrics.precision_score(Ytrain, Ytrainpred, average='micro'), 
                                             metrics.precision_score(Ytest, Ytestpred, average='micro')))
    print('Recall: \t \t {} \t {}'.format(metrics.recall_score(Ytrain, Ytrainpred, average='micro'), 
                                          metrics.recall_score(Ytest, Ytestpred, average='micro')))
    print('F1: \t \t \t {} \t {}'.format(metrics.f1_score(Ytrain, Ytrainpred, average='micro'), 
                                         metrics.f1_score(Ytest, Ytestpred, average='micro')))

In [11]:
for t in [0.5, 0.4, 0.3, 0.2, 0.1]:
    print('threshold = {}'.format(t))
    predict(svc, Xtrain, Xtest, Ytrain, Ytest, threshold=t)

threshold = 0.5


  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self

Accuracy: 	 	 0.2472053130379154 	 0.0011876484560570072
Hamming: 	 	 0.9421321799965575 	 0.8805158743434478
Precision: 	 	 0.43411087810067583 	 0.3222935676467275
Recall: 	 	 0.9351283398765546 	 0.7174022066198595
F1: 	 	 	 0.5929561166971907 	 0.4447726389428682
threshold = 0.4


  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self

Accuracy: 	 	 0.1900321182902781 	 0.0023752969121140144
Hamming: 	 	 0.9294813244918939 	 0.87937840821652
Precision: 	 	 0.3859331342935545 	 0.3205656385703151
Recall: 	 	 0.9550320978494956 	 0.7219157472417251
F1: 	 	 	 0.5497212336892052 	 0.4439818027604287
threshold = 0.3


  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self

Accuracy: 	 	 0.16564332209962607 	 0.0
Hamming: 	 	 0.911443268353949 	 0.8593054765648522
Precision: 	 	 0.33432499656704734 	 0.2894811994288434
Recall: 	 	 0.9733984564180242 	 0.7625376128385155
F1: 	 	 	 0.4977068123347943 	 0.41965086593527906
threshold = 0.2


  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self

Accuracy: 	 	 0.04722773648492683 	 0.0
Hamming: 	 	 0.8754750933002103 	 0.8593556588939815
Precision: 	 	 0.26406548593672075 	 0.2898840083666096
Recall: 	 	 0.986453986212866 	 0.7645436308926781
F1: 	 	 	 0.41660838879341516 	 0.42037777471391147
threshold = 0.1


  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self.a_ * T + self.b_))
  return 1. / (1. + np.exp(self

Accuracy: 	 	 0.003752629808675236 	 0.0
Hamming: 	 	 0.8205980260864387 	 0.8599076645144024
Precision: 	 	 0.2002984332255658 	 0.2910355339620844
Recall: 	 	 0.9958885900646078 	 0.7660481444332999
F1: 	 	 	 0.3335179539207719 	 0.42181567138419057
