In [7]:
# generate feature subsets
from CustomFeatures import MyFeatures
from NERmodel import NERmodel
import subprocess
import traceback, sys
import pickle

# preapre different MyFeature instances
fs = []
# number of model permutations
m = 2
for i in range(m):
    mf = MyFeatures()
    mf.deriveNewFeatureSet(degree=3)
    fs.append(mf)
    #mf.printActiveFeatureFunctions()
    #print()

print("feature instances:")
print(fs)
print()    

# initialize
models=[]
for fset in fs:
    model = NERmodel(featureset=fset)
    models.append(model)
print("models:")
print(models)
print()
    

# train all models
models_pickles =[None]*len(models)
print(len(models_pickles))
print(models_pickles)
for i in range(len(models)):
    model = models[i]
    try:
        model.trainFeatureExtraction("../data/LaboCase/Train", limit=None)
        model.saveTrainingFeatures("../data/models/multitrain-data"+str(i)+".json")
        model.newModelPipeline()
        models_pickles[i] = model.saveModelPipeline()
    except Exception:
        print("Exception in user code:",i)
        print("-"*60)
        traceback.print_exc(file=sys.stdout)
        print("-"*60)
        print("model "+str(i)+" has failed... ")
        models_pickles[i] = None
        
print("model pickles:")
print(models_pickles)
print()

# test all models
# PROBLEM I need the same feature set! so maybe the 
# pipeline should be pickled with the featureset
# so maybe we should use pickle of the whole NER?
results_files = [None]*len(models)
evaluation_files = [None]*len(models)
for i in range(len(models_pickles)):
    modelfile = models_pickles[i]
    fset = fs[i]
    try:
        model = NERmodel(featureset=fs[i])
        model.loadModelPipeline(filepath=modelfile)
        model.testFeatureExtraction(
            '../data/LaboCase/Test/Test for DrugNER task/DrugBank', 
            limit=None)
        model.predict()
        model.parsePredictionOutput('../data/models/task9.1_AHLT_'+str(i)+'.txt')
        results_files[i] = '../data/models/multitrain-'+str(i)+'-output.csv'
        evaluation_files[i] = model.autoEvaluation()
    except  Exception:
        print("Exception in user code:",i)
        print("-"*60)
        traceback.print_exc(file=sys.stdout)
        print("-"*60)

print("model predictions:")
print(results_files)
print(evaluation_files)
print()

# print all result sets
for i in range(len(results_files)):
    file = results_files[i]
    file2 = evaluation_files[i]
    print("model "+ str(i))
    print(open(file,'r').read())
    print(open(file2,'r').read())
    print()
    


feature instances:
[<CustomFeatures.MyFeatures object at 0x7f4b3e570080>, <CustomFeatures.MyFeatures object at 0x7f4b3e91a400>, <CustomFeatures.MyFeatures object at 0x7f4b3e91add8>]

models:
[<NERmodel.NERmodel object at 0x7f4b3d6d6320>, <NERmodel.NERmodel object at 0x7f4b3fd164e0>, <NERmodel.NERmodel object at 0x7f4b3fd16780>]

3
[None, None, None]
model pickles:
['../data/models/svm-pipeline_20180510184436.pkl', '../data/models/svm-pipeline_20180510184443.pkl', '../data/models/svm-pipeline_20180510184449.pkl']


AutoEvaluation calling: ../data/models/task9.1_AHLT_0.txt

AutoEvaluation calling: ../data/models/task9.1_AHLT_1.txt

AutoEvaluation calling: ../data/models/task9.1_AHLT_2.txt
model predictions:
['../data/models/multitrain-0-output.csv', '../data/models/multitrain-1-output.csv', '../data/models/multitrain-2-output.csv']
['../data/models/task9.1_AHLT_0.txt_eval', '../data/models/task9.1_AHLT_1.txt_eval', '../data/models/task9.1_AHLT_2.txt_eval']

model 0
multitrain-0-output.cs

In [2]:
# prepare different feature sets 
# get CustomFeatures list of methods
from CustomFeatures import MyFeatures
import random

# APPROACH 1
# get only the feature methods
mf = MyFeatures()
feature_list = dir(mf)
# remove '__func__', 'f', 'window_...', 'addFeatures'
feature_list = [ e for e in feature_list if mf.isFeatureMethod(e)]
print(feature_list)
# discarded approach

# APPROACH 2
# create a permutation of those in each features list
# morpho_features, window_1_before, window_1_after, 
# window_2_before, window_2_after
# window_3_before, window_3_after
print(dir(mf))
print()
print(mf.morpho_features)
print("shuffling")
mf.morpho_features = mf.shuffleFeatureList(mf.morpho_features,3)
print(mf.morpho_features)

print()
print(mf.window_1_before)
print()
print(mf.window_1_after)

print()
# problem is that we need to control exactly which features are executed


['getLemma', 'getPOSTag', 'getWord', 'hasDigits', 'hasStrangeChars', 'isLowerCase', 'isTitleCase', 'isUpperCase', 'lenprefix', 'lensuffix', 'lenword', 'moreThan10chars', 'morpho_features', 'prefix', 'suffix']
['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'addFeatures', 'f', 'getLemma', 'getPOSTag', 'getWord', 'hasDigits', 'hasStrangeChars', 'isFeatureMethod', 'isLowerCase', 'isTitleCase', 'isUpperCase', 'lenprefix', 'lensuffix', 'lenword', 'moreThan10chars', 'morpho_features', 'prefix', 'shuffleFeatureList', 'suffix', 'window_1_after', 'window_1_before', 'window_2_after', 'window_2_before', 'window_3_after', 'window_3_before']

[<bound method MyFeatures.isTitleCase of <CustomFeatures.MyFeatur