In [1]:
import h5py
from sklearn.svm import SVC
from sklearn.cross_validation import train_test_split
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import hamming_loss
from sklearn.preprocessing import scale
import numpy as np



In [2]:
f = h5py.File("dataset_294.h5")
x = f['x'].value
y = f['y'].value
f.close()

In [3]:
x = scale(x)  #feature scaling
x_train , x_test, y_train, y_test = train_test_split(x,y,test_size=0.2,random_state=100) #train test split

In [4]:
def get_result(y_true,y_pred):    #function to check results
    total_correctly_predicted = len([i for i in range(len(y_true)) if (y_true[i]==y_pred[i]).sum() == 5])
    print("Fully correct output")
    print(total_correctly_predicted)
    print(total_correctly_predicted/400.)
    print("hamming loss")
    print(hamming_loss(y_true,y_pred))

In [5]:
model = OneVsRestClassifier(SVC(kernel='rbf',gamma=0.0020,C=5., probability=True ),n_jobs=-1)   #multi label classifier

In [6]:
batch_x, batch_y = x_train[0:100],y_train[0:100]   #small subset(100 examples) of train data 

In [7]:
model.fit(batch_x, batch_y)

OneVsRestClassifier(estimator=SVC(C=5.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma=0.002, kernel='rbf',
  max_iter=-1, probability=True, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
          n_jobs=-1)

In [8]:
y_pred = model.predict(x_test)
print("*"*100)
print("Trained on dataset:"+str(batch_x.shape))
get_result(y_test,y_pred)

****************************************************************************************************
Trained on dataset:(100, 294)
Fully correct output
139
0.3475
hamming loss
0.198


In [9]:
batch = range(100,1600,100)  #batch mode active learning

In [10]:
for i in batch:
    next_batch_x = x_train[i:i+100]
    next_batch_y = y_train[i:i+100]
    scores = np.abs(model.decision_function(next_batch_x))
    tmp_y = next_batch_y
    index = [i for i,Sum in enumerate(np.sum(scores<0.2,axis=1)) if Sum!=0]
    print("*"*100)
    print("Found "+str(len(index))+" uncertain examples")
    batch_x = np.vstack((batch_x,next_batch_x[index]))
    batch_y = np.vstack((batch_y,next_batch_y[index]))
    print("New Dataset shape"+str(batch_x.shape))
    model.fit(batch_x,batch_y)
    y_pred = model.predict(x_test)
    get_result(y_test,y_pred) 

****************************************************************************************************
Found 49 uncertain examples
New Dataset shape(149, 294)
Fully correct output
158
0.395
hamming loss
0.1885
****************************************************************************************************
Found 54 uncertain examples
New Dataset shape(203, 294)
Fully correct output
164
0.41
hamming loss
0.177
****************************************************************************************************
Found 38 uncertain examples
New Dataset shape(241, 294)
Fully correct output
162
0.405
hamming loss
0.1695
****************************************************************************************************
Found 34 uncertain examples
New Dataset shape(275, 294)
Fully correct output
167
0.4175
hamming loss
0.172
****************************************************************************************************
Found 40 uncertain examples
New Dataset shape(315, 294)
Fully correct