# Train an NER model on Chia Corpus without Scope Entity

Author: Kathryn Meldrum (kmm4ap@virginia.edu)

to train Chia corpus based spacy model locally

## Import Modules

In [None]:
import pandas as pd
import os
import scispacy 
import spacy
from spacy.tokens import DocBin
from tqdm import tqdm
from spacy.util import filter_spans
import random
import re

## Define Helper Functions
ann_to_dict: converts txt and ann docs for each trial into a dictionary format representing text and entity spans 

doc_to_bin: add doc dicts into spacy docbin objects

In [None]:
fail_list=[]
def ann_to_dict(nct_id): #took out file path arg
    '''
    nct_id: file name before .ann or .txt
    file_path: pathname to where .ann and .txt files are stored
    return: formatted dictionary
    '''
    
    # change directory
    #os.chdir(file_path)
    
    # read in files
    with open(nct_id+'.ann') as f1:
        ann=f1.read()
        f1.close()
    with open(nct_id+'.txt') as f2:
        txt=f2.read()
        f2.close()
        
    # ANN FILE MANIPULATION
    lines=ann.split('\n')
    ents=[]
    for i in range(len(lines)): 
        line=lines[i].split('\t') 
        if 'T' in line[0]:
            try: 
                start=txt.index(line[2])
                end= start+len(line[2])
                label=line[1].split(' ')[0]
                if label in label_list: 
                    add=True
                    for ent in ents: 
                        if (ent[0]<=start<=ent[1]) or (ent[0]<=end<=ent[1]) or (start<=ent[0]<=end) or (start<=ent[1]<=end): #check if it overlaps existing span
                            add=False 
                            if (start-end) > (ent[1]-ent[0]): #replace existing span if this span is bigger
                                ents.remove(ent)
                                ents.append((start, end, label))

                    if add==True: 
                        ents.append((start, end, label))
                else:
                    fail_list.append(label)
            
            except:
                None
                #print(nct_id, line)
            
    content={'entities': list(set(ents)), 'text': txt}
    
    return content

def doc_to_bin(d, bin_obj):
    '''
    d: <str> name of a text/ann doc pair
    bin_obj: <spacy bin object> bin to add doc to
    '''
    doc_dict=ann_to_dict(d)
    text = doc_dict['text']
    labels = doc_dict['entities']
    doc = nlp.make_doc(text) 
    ents = []
    for start, end, label in labels:
        span = doc.char_span(start, end, label=label, alignment_mode="contract")
        if (span is not None):
                #print(span)
            ents.append(span)
        else:
            None
    filtered_ents=filter_spans(ents)
    doc.ents=filtered_ents
    bin_obj.add(doc)

## Initialize Spacy Model Objects

In [None]:
nlp = spacy.load("en_core_sci_sm") # load scispacy small model as base model
doc_bin_train = DocBin()           # DocBin object for train data
doc_bin_dev = DocBin()              # DocBin object for test data

## Add trials to DocBins

For splits dictionary generation see 'stratified_sampling.ipynb'

In [None]:
# define dictionary of file names to be train, dev, test
os.chdir('/Users/meldrumapple/Desktop/Capstone/chia_noscope_corpus')
splits={'train': ['NCT03187379_exc', 'NCT02735577_inc', 'NCT01803438_inc', 'NCT03104816_inc', 'NCT02348918_exc', 'NCT02607319_inc', 'NCT02053246_exc', 'NCT03363295_exc', 'NCT00425789_exc', 'NCT02015494_exc', 'NCT02964715_inc', 'NCT01579604_exc', 'NCT00401245_exc', 'NCT02604459_exc', 'NCT01118871_inc', 'NCT01942915_exc', 'NCT03131050_exc', 'NCT02888704_inc', 'NCT02721017_exc', 'NCT03255044_exc', 'NCT02567214_inc', 'NCT00806936_inc', 'NCT02816762_inc', 'NCT01531257_inc', 'NCT02273791_exc', 'NCT00461136_inc', 'NCT03212352_inc', 'NCT02579733_exc', 'NCT02536976_exc', 'NCT02365870_inc', 'NCT01774019_inc', 'NCT03249311_exc', 'NCT02600000_exc', 'NCT01084993_inc', 'NCT02385045_exc', 'NCT02282319_exc', 'NCT02464813_exc', 'NCT02951754_exc', 'NCT00785213_exc', 'NCT03199560_inc', 'NCT02653131_inc', 'NCT01261832_inc', 'NCT03541980_exc', 'NCT02746900_inc', 'NCT02368743_inc', 'NCT02695992_inc', 'NCT02429583_exc', 'NCT02762851_exc', 'NCT02755701_exc', 'NCT01349413_exc', 'NCT03208465_exc', 'NCT02396732_inc', 'NCT01856491_exc', 'NCT00989261_inc', 'NCT03013790_exc', 'NCT02764476_inc', 'NCT01932996_exc', 'NCT02384850_inc', 'NCT03208244_exc', 'NCT02456532_inc', 'NCT00894712_exc', 'NCT03080493_exc', 'NCT03233880_exc', 'NCT01701219_exc', 'NCT02137369_inc', 'NCT01806558_exc', 'NCT02573168_exc', 'NCT00543712_exc', 'NCT02203019_exc', 'NCT02483715_inc', 'NCT02456129_exc', 'NCT01579604_inc', 'NCT03117608_exc', 'NCT01665417_exc', 'NCT01809041_inc', 'NCT00931983_exc', 'NCT02589977_exc', 'NCT03472495_exc', 'NCT01650792_exc', 'NCT02957305_exc', 'NCT02612181_inc', 'NCT01891383_exc', 'NCT02365870_exc', 'NCT02429583_inc', 'NCT02890719_exc', 'NCT02992938_inc', 'NCT02634541_exc', 'NCT03471117_exc', 'NCT02566863_exc', 'NCT03296488_inc', 'NCT02876484_exc', 'NCT02526823_inc', 'NCT00904202_exc', 'NCT03491059_exc', 'NCT00050349_exc', 'NCT03519568_exc', 'NCT03255044_inc', 'NCT02515773_exc', 'NCT02805504_inc', 'NCT02579200_inc', 'NCT02707809_exc', 'NCT03223909_inc', 'NCT00730301_inc', 'NCT03347513_exc', 'NCT03124329_exc', 'NCT02986659_inc', 'NCT02312076_inc', 'NCT03317197_inc', 'NCT02687724_exc', 'NCT01996436_exc', 'NCT03241368_exc', 'NCT01793831_inc', 'NCT00625742_inc', 'NCT03480607_inc', 'NCT03339284_exc', 'NCT02445339_inc', 'NCT00502567_exc', 'NCT03216447_inc', 'NCT02979561_inc', 'NCT03434951_exc', 'NCT02926235_exc', 'NCT02441179_inc', 'NCT03493919_exc', 'NCT03355469_inc', 'NCT02553226_inc', 'NCT00676273_inc', 'NCT02257580_exc', 'NCT03176316_inc', 'NCT03497598_exc', 'NCT01664507_exc', 'NCT03228017_inc', 'NCT01803828_exc', 'NCT03351972_exc', 'NCT02833116_exc', 'NCT03046108_exc', 'NCT03345589_inc', 'NCT02940912_inc', 'NCT01735955_exc', 'NCT03154931_exc', 'NCT02437084_inc', 'NCT01794793_inc', 'NCT02766530_exc', 'NCT02785549_inc', 'NCT00461136_exc', 'NCT03164096_inc', 'NCT02652572_inc', 'NCT03560310_inc', 'NCT03083197_exc', 'NCT02959801_inc', 'NCT01980680_exc', 'NCT00962364_inc', 'NCT03168178_exc', 'NCT02519777_exc', 'NCT02627521_inc', 'NCT03424993_exc', 'NCT02243553_exc', 'NCT02034019_inc', 'NCT03099408_exc', 'NCT03355469_exc', 'NCT02787863_inc', 'NCT01794793_exc', 'NCT02747940_exc', 'NCT03026088_inc', 'NCT02579200_exc', 'NCT02650024_inc', 'NCT03034837_exc', 'NCT02874092_exc', 'NCT02295202_inc', 'NCT03199560_exc', 'NCT03464552_inc', 'NCT01604187_inc', 'NCT02396420_inc', 'NCT03623789_exc', 'NCT02731794_exc', 'NCT01943812_inc', 'NCT02478346_inc', 'NCT01742117_inc', 'NCT03059069_exc', 'NCT03194074_inc', 'NCT02406885_exc', 'NCT03040024_inc', 'NCT03073603_exc', 'NCT03120533_inc', 'NCT01118871_exc', 'NCT02686021_exc', 'NCT03044093_exc', 'NCT02656394_inc', 'NCT02986659_exc', 'NCT02822001_inc', 'NCT01701219_inc', 'NCT02939872_inc', 'NCT03366779_inc', 'NCT01116973_exc', 'NCT02145026_exc', 'NCT02478515_exc', 'NCT02312089_exc', 'NCT00183885_exc', 'NCT02337764_inc', 'NCT00235170_inc', 'NCT02573597_inc', 'NCT02563535_inc', 'NCT02798237_inc', 'NCT01994382_exc', 'NCT02553226_exc', 'NCT01531257_exc', 'NCT02462317_inc', 'NCT02982577_exc', 'NCT02968342_inc', 'NCT03034096_inc', 'NCT02369211_exc', 'NCT02733159_exc', 'NCT03187639_inc', 'NCT02687178_exc', 'NCT02715518_inc', 'NCT02379156_exc', 'NCT02823808_inc', 'NCT03140423_exc', 'NCT02186600_inc', 'NCT02334722_exc', 'NCT03123562_inc', 'NCT02584140_exc', 'NCT02477280_exc', 'NCT02562456_exc', 'NCT02557412_inc', 'NCT03299517_exc', 'NCT01217671_inc', 'NCT01850147_exc', 'NCT01997580_exc', 'NCT02893293_inc', 'NCT02773173_inc', 'NCT03555526_inc', 'NCT02995291_exc', 'NCT01205334_inc', 'NCT03536520_inc', 'NCT02845427_exc', 'NCT02156999_inc', 'NCT02746900_exc', 'NCT02894645_inc', 'NCT00867958_inc', 'NCT02596555_inc', 'NCT02863120_inc', 'NCT02830360_inc', 'NCT03328052_inc', 'NCT00396734_inc', 'NCT00319748_inc', 'NCT02735902_inc', 'NCT03460002_inc', 'NCT02260700_inc', 'NCT03213834_exc', 'NCT02396732_exc', 'NCT03360214_inc', 'NCT01993836_exc', 'NCT02886962_exc', 'NCT02903407_exc', 'NCT02868437_exc', 'NCT02760251_inc', 'NCT03275584_exc', 'NCT03044561_inc', 'NCT03541980_inc', 'NCT03033745_inc', 'NCT02584140_inc', 'NCT02990403_exc', 'NCT03119766_inc', 'NCT00749112_exc', 'NCT03187379_inc', 'NCT02944604_inc', 'NCT02571179_inc', 'NCT01117181_exc', 'NCT00599924_exc', 'NCT01064752_exc', 'NCT01604187_exc', 'NCT01684501_inc', 'NCT02969187_inc', 'NCT02933671_exc', 'NCT03446885_exc', 'NCT02393287_exc', 'NCT02464865_exc', 'NCT00351611_exc', 'NCT02951832_exc', 'NCT01768195_inc', 'NCT03209011_exc', 'NCT03335436_exc', 'NCT02797548_exc', 'NCT03339284_inc', 'NCT02637076_inc', 'NCT00639795_exc', 'NCT01314898_exc', 'NCT02883400_exc', 'NCT03064867_exc', 'NCT01793519_exc', 'NCT02872935_exc', 'NCT02739295_inc', 'NCT02371200_exc', 'NCT02511574_exc', 'NCT02732080_exc', 'NCT02763007_inc', 'NCT00718952_inc', 'NCT03333655_inc', 'NCT02951754_inc', 'NCT02618057_inc', 'NCT03062358_inc', 'NCT01809041_exc', 'NCT02649114_exc', 'NCT02764476_exc', 'NCT01715584_inc', 'NCT02195024_exc', 'NCT03253796_inc', 'NCT02528604_exc', 'NCT03241368_inc', 'NCT03297944_inc', 'NCT02744976_exc', 'NCT01491763_exc', 'NCT02818816_inc', 'NCT03615508_inc', 'NCT03004209_exc', 'NCT02573168_inc', 'NCT00576173_exc', 'NCT02419378_exc', 'NCT03624881_exc', 'NCT02205931_inc', 'NCT01117181_inc', 'NCT02566226_inc', 'NCT02968602_exc', 'NCT02380118_inc', 'NCT02790593_exc', 'NCT02893228_exc', 'NCT02209545_exc', 'NCT03079141_exc', 'NCT02992028_inc', 'NCT03404479_exc', 'NCT02844907_inc', 'NCT03511521_inc', 'NCT03624881_inc', 'NCT03387059_inc', 'NCT02807857_exc', 'NCT02340169_exc', 'NCT02548013_inc', 'NCT02804646_inc', 'NCT03495557_exc', 'NCT02837783_exc', 'NCT02361905_inc', 'NCT02621489_exc', 'NCT03146390_exc', 'NCT03336801_exc', 'NCT02624908_inc', 'NCT00609531_inc', 'NCT01770340_exc', 'NCT01978028_inc', 'NCT00235170_exc', 'NCT03217409_inc', 'NCT02816164_exc', 'NCT02557386_inc', 'NCT02985710_exc', 'NCT02831166_inc', 'NCT03350815_inc', 'NCT02488057_inc', 'NCT02612181_exc', 'NCT02944292_inc', 'NCT02560389_exc', 'NCT03036462_exc', 'NCT01218737_exc', 'NCT02872090_inc', 'NCT02959580_inc', 'NCT03461679_inc', 'NCT02924870_inc', 'NCT02385448_exc', 'NCT03099863_inc', 'NCT00679341_inc', 'NCT01743755_inc', 'NCT03511521_exc', 'NCT02951832_inc', 'NCT03373318_exc', 'NCT02926235_inc', 'NCT03499639_inc', 'NCT02195024_inc', 'NCT02334631_inc', 'NCT02905890_inc', 'NCT02566863_inc', 'NCT02644629_exc', 'NCT03070847_inc', 'NCT02550769_inc', 'NCT03213834_inc', 'NCT02315287_inc', 'NCT03561753_exc', 'NCT01313676_exc', 'NCT00344318_exc', 'NCT02162433_exc', 'NCT02227992_inc', 'NCT02901106_exc', 'NCT02566226_exc', 'NCT02041299_exc', 'NCT00720031_exc', 'NCT02924090_inc', 'NCT03192020_inc', 'NCT03177811_inc', 'NCT02137369_exc', 'NCT02455921_exc', 'NCT03083197_inc', 'NCT02624908_exc', 'NCT00970866_inc', 'NCT02816164_inc', 'NCT02102243_exc', 'NCT02630628_exc', 'NCT03518034_exc', 'NCT02678377_inc', 'NCT03506750_exc', 'NCT03500211_inc', 'NCT02375295_exc', 'NCT02105090_exc', 'NCT02607748_exc', 'NCT02912182_inc', 'NCT03380429_inc', 'NCT03376763_exc', 'NCT03228654_inc', 'NCT02859480_inc', 'NCT02621541_exc', 'NCT02892968_exc', 'NCT03017053_inc', 'NCT02558504_inc', 'NCT02562456_inc', 'NCT02632266_inc', 'NCT03228498_exc', 'NCT02277041_exc', 'NCT03320057_inc', 'NCT01078051_inc', 'NCT03249272_inc', 'NCT02254668_inc', 'NCT02092467_exc', 'NCT02600000_inc', 'NCT02570347_inc', 'NCT03190304_inc', 'NCT02573909_exc', 'NCT02849483_exc', 'NCT03256864_inc', 'NCT02357654_inc', 'NCT03366779_exc', 'NCT03280017_inc', 'NCT02416765_inc', 'NCT03221231_exc', 'NCT01491763_inc', 'NCT01997112_exc', 'NCT02745704_exc', 'NCT02607319_exc', 'NCT03063866_inc', 'NCT01967420_exc', 'NCT00356148_inc', 'NCT02691793_exc', 'NCT02733159_inc', 'NCT01175044_exc', 'NCT02595190_inc', 'NCT00312429_exc', 'NCT02462590_exc', 'NCT03147599_inc', 'NCT01665417_inc', 'NCT03513874_exc', 'NCT02083991_inc', 'NCT02715466_exc', 'NCT02607748_inc', 'NCT01711801_inc', 'NCT03140423_inc', 'NCT03026465_inc', 'NCT02399033_inc', 'NCT02260700_exc', 'NCT02678962_inc', 'NCT01116882_exc', 'NCT02627560_exc', 'NCT03336801_inc', 'NCT02535299_inc', 'NCT02196285_inc', 'NCT03046108_inc', 'NCT00609531_exc', 'NCT02287259_exc', 'NCT02749617_inc', 'NCT02386800_inc', 'NCT01000155_inc', 'NCT02077556_exc', 'NCT02613039_exc', 'NCT03249272_exc', 'NCT02390973_inc', 'NCT03356834_exc', 'NCT03344887_inc', 'NCT02888704_exc', 'NCT00319748_exc', 'NCT03363295_inc', 'NCT03156855_inc', 'NCT01959425_exc', 'NCT03216967_exc', 'NCT03134378_exc', 'NCT02330705_exc', 'NCT02416869_inc', 'NCT02671318_inc', 'NCT02542956_inc', 'NCT02282319_inc', 'NCT03445949_exc', 'NCT02056301_exc', 'NCT03089086_exc', 'NCT03212352_exc', 'NCT02072811_exc', 'NCT02982577_inc', 'NCT03639545_exc', 'NCT02970773_exc', 'NCT02245256_exc', 'NCT03047538_inc', 'NCT02226887_inc', 'NCT02509949_exc', 'NCT02531724_exc', 'NCT02118467_inc', 'NCT00279552_inc', 'NCT02689089_exc', 'NCT03250507_exc', 'NCT02787070_inc', 'NCT02734173_inc', 'NCT01642875_exc', 'NCT02406495_inc', 'NCT02509091_exc', 'NCT02990403_inc', 'NCT03354572_inc', 'NCT03465397_inc', 'NCT02298504_inc', 'NCT01602081_inc', 'NCT03472508_inc', 'NCT02558504_exc', 'NCT02689024_inc', 'NCT01000155_exc', 'NCT01631058_inc', 'NCT02637453_inc', 'NCT02200978_exc', 'NCT02961582_exc', 'NCT01084993_exc', 'NCT03329456_inc', 'NCT02589691_inc', 'NCT01098383_exc', 'NCT02414399_inc', 'NCT02638935_inc', 'NCT02894372_inc', 'NCT03004261_inc', 'NCT03059069_inc', 'NCT02958072_inc', 'NCT02298504_exc', 'NCT02321202_exc', 'NCT02281643_inc', 'NCT02644629_inc', 'NCT02406495_exc', 'NCT02731794_inc', 'NCT02621541_inc', 'NCT02529475_inc', 'NCT00527826_inc', 'NCT02277067_inc', 'NCT03015818_inc', 'NCT02968602_inc', 'NCT01911650_exc', 'NCT02884401_inc', 'NCT02299063_inc', 'NCT01567605_inc', 'NCT01978028_exc', 'NCT02805504_exc', 'NCT00061308_exc', 'NCT03315975_inc', 'NCT02152696_inc', 'NCT02871206_exc', 'NCT02022709_exc', 'NCT03190304_exc', 'NCT00917891_exc', 'NCT03481894_inc', 'NCT00440245_inc', 'NCT01228279_exc', 'NCT00250640_inc', 'NCT03337503_inc', 'NCT01963754_inc', 'NCT02203019_inc', 'NCT01391780_inc', 'NCT03034096_exc', 'NCT00305097_exc', 'NCT00483106_exc', 'NCT02851303_exc', 'NCT03120728_inc', 'NCT02299947_inc', 'NCT02394158_exc', 'NCT00994786_exc', 'NCT03318393_inc', 'NCT02675153_inc', 'NCT02137538_inc', 'NCT02208739_inc', 'NCT01051414_inc', 'NCT03164096_exc', 'NCT02609698_inc', 'NCT03045562_inc', 'NCT03484091_exc', 'NCT02959580_exc', 'NCT03056391_inc', 'NCT02570321_exc', 'NCT03223909_exc', 'NCT01700790_inc', 'NCT02445339_exc', 'NCT02974660_inc', 'NCT02393287_inc', 'NCT03355326_inc', 'NCT02219880_inc', 'NCT02068365_exc', 'NCT02827526_exc', 'NCT03372304_inc', 'NCT00586898_inc', 'NCT02056626_inc', 'NCT02689024_exc', 'NCT02901106_inc', 'NCT03390933_exc', 'NCT00324363_inc', 'NCT01322464_exc', 'NCT03033745_exc', 'NCT03129555_inc', 'NCT02821819_exc', 'NCT01184638_inc', 'NCT02765035_exc', 'NCT02783859_inc', 'NCT02951520_exc', 'NCT01642875_inc', 'NCT01967420_inc', 'NCT02825290_exc', 'NCT02877485_inc', 'NCT02944929_inc', 'NCT02916342_inc', 'NCT02332291_inc', 'NCT02322203_exc', 'NCT03260881_exc', 'NCT02882113_inc', 'NCT02196285_exc', 'NCT02565277_inc', 'NCT03134378_inc', 'NCT02269137_inc', 'NCT02893228_inc', 'NCT02601157_inc', 'NCT02671318_exc', 'NCT01630954_exc', 'NCT02231892_exc', 'NCT02339844_exc', 'NCT03247738_exc', 'NCT01032109_exc', 'NCT00943865_inc', 'NCT00730301_exc', 'NCT01884337_exc', 'NCT02046395_inc', 'NCT02106598_exc', 'NCT02668016_inc', 'NCT03589105_exc', 'NCT02974660_exc', 'NCT02701881_exc', 'NCT03420638_exc', 'NCT01497639_inc', 'NCT02838810_exc', 'NCT02205931_exc', 'NCT02892968_inc', 'NCT01895946_inc', 'NCT03016741_inc', 'NCT02589691_exc', 'NCT03260881_inc', 'NCT02923700_inc', 'NCT02721017_inc', 'NCT03431831_inc', 'NCT02905734_exc', 'NCT01891513_inc', 'NCT02256956_inc', 'NCT02726009_exc', 'NCT03168178_inc', 'NCT03381755_inc', 'NCT03027115_exc', 'NCT02571881_inc', 'NCT02885909_inc', 'NCT02961764_exc', 'NCT03149887_inc', 'NCT02550028_exc', 'NCT02366819_inc', 'NCT03100513_exc', 'NCT00749112_inc', 'NCT02635893_exc', 'NCT03344887_exc', 'NCT03047538_exc', 'NCT03209011_inc', 'NCT01009359_exc', 'NCT02118467_exc', 'NCT01518946_inc', 'NCT02437045_inc', 'NCT01696617_inc', 'NCT02055053_inc', 'NCT01349413_inc', 'NCT03260790_exc', 'NCT02167022_inc', 'NCT02833116_inc', 'NCT03169127_exc', 'NCT02231892_inc', 'NCT02774317_inc', 'NCT02003339_exc', 'NCT01218737_inc', 'NCT00886158_exc', 'NCT01807897_exc', 'NCT03532620_exc', 'NCT02579928_exc', 'NCT00356148_exc', 'NCT02713087_inc', 'NCT02904785_exc', 'NCT02019628_inc', 'NCT03099408_inc', 'NCT03118232_exc', 'NCT02385448_inc', 'NCT02664558_inc', 'NCT03495557_inc', 'NCT02984228_exc', 'NCT03264911_inc', 'NCT03495609_inc', 'NCT03407625_exc', 'NCT02566928_inc', 'NCT02579733_inc', 'NCT02339974_exc', 'NCT02415257_exc', 'NCT02908919_exc', 'NCT02673359_inc', 'NCT02974686_inc', 'NCT02918409_inc', 'NCT01907230_inc', 'NCT02053246_inc', 'NCT01664507_inc', 'NCT00182520_inc', 'NCT03256864_exc', 'NCT01313676_inc', 'NCT02566928_exc', 'NCT01888965_inc', 'NCT02550080_inc', 'NCT02954029_inc', 'NCT03537924_inc', 'NCT01929434_exc', 'NCT03444142_inc', 'NCT02570347_exc', 'NCT02601157_exc', 'NCT02858804_exc', 'NCT02667730_inc', 'NCT02560389_inc', 'NCT02985242_inc', 'NCT02924870_exc', 'NCT02106624_inc', 'NCT02225548_exc', 'NCT03138577_inc', 'NCT01410890_inc', 'NCT02886962_inc', 'NCT03247738_inc', 'NCT02652637_inc', 'NCT03506750_inc', 'NCT02678728_exc', 'NCT02871206_inc', 'NCT02477280_inc', 'NCT03360981_exc', 'NCT01214096_exc', 'NCT02555163_inc', 'NCT00970866_exc', 'NCT02055053_exc', 'NCT02973035_exc', 'NCT02164734_exc', 'NCT02380118_exc', 'NCT02935855_exc', 'NCT03345589_exc', 'NCT02420015_exc', 'NCT02467686_exc', 'NCT02560766_inc', 'NCT02734173_exc', 'NCT02543710_inc', 'NCT02701777_inc', 'NCT03225469_inc', 'NCT02068365_inc', 'NCT00198913_exc', 'NCT01757717_exc', 'NCT03115320_exc', 'NCT03388840_exc', 'NCT01483118_inc', 'NCT02464865_inc', 'NCT00965900_exc', 'NCT02299947_exc', 'NCT02425774_inc', 'NCT03176316_exc'], 'dev': ['NCT02257580_inc', 'NCT03259243_exc', 'NCT02765217_inc', 'NCT02552459_exc', 'NCT02330705_inc', 'NCT02934269_inc', 'NCT02102243_inc', 'NCT02443844_inc', 'NCT03252249_exc', 'NCT02077556_inc', 'NCT01650792_inc', 'NCT03120728_exc', 'NCT02156999_exc', 'NCT02893293_exc', 'NCT02542956_exc', 'NCT02330757_inc', 'NCT01912651_inc', 'NCT02502734_inc', 'NCT02788045_inc', 'NCT02837783_inc', 'NCT02370069_exc', 'NCT01009359_inc', 'NCT02528604_inc', 'NCT00050349_inc', 'NCT01720394_exc', 'NCT03631355_inc', 'NCT01116973_inc', 'NCT02872935_inc', 'NCT02396420_exc', 'NCT02476461_exc', 'NCT02951520_inc', 'NCT02209545_inc', 'NCT02939209_inc', 'NCT03097068_inc', 'NCT02862314_inc', 'NCT03464552_exc', 'NCT00527826_exc', 'NCT03173092_exc', 'NCT02779374_inc', 'NCT03228654_exc', 'NCT03036462_inc', 'NCT00317148_exc', 'NCT02186782_inc', 'NCT02015923_inc', 'NCT03369379_inc', 'NCT02969187_exc', 'NCT03424993_inc', 'NCT02101554_exc', 'NCT03221231_inc', 'NCT02961764_inc', 'NCT02267616_exc', 'NCT02375295_inc', 'NCT03477851_exc', 'NCT02779374_exc', 'NCT02958072_exc', 'NCT02842424_inc', 'NCT02995291_inc', 'NCT02430740_exc', 'NCT02481518_exc', 'NCT02344888_inc', 'NCT02456129_inc', 'NCT00752310_inc', 'NCT02992938_exc', 'NCT03228238_inc', 'NCT02668978_exc', 'NCT02650388_inc', 'NCT02804646_exc', 'NCT02944929_exc', 'NCT03335904_inc', 'NCT02432404_inc', 'NCT01322464_inc', 'NCT03187639_exc', 'NCT02946918_exc', 'NCT02957877_exc', 'NCT03561753_inc', 'NCT02510404_exc', 'NCT03012984_exc', 'NCT02680054_inc', 'NCT03115151_inc', 'NCT03473132_inc', 'NCT02531724_inc', 'NCT01424020_inc', 'NCT03103204_inc', 'NCT02868437_inc', 'NCT02627521_exc', 'NCT03624517_exc', 'NCT02535299_exc', 'NCT03372265_exc', 'NCT03350659_inc', 'NCT02509091_inc', 'NCT02072811_inc', 'NCT02780427_exc', 'NCT02698969_inc', 'NCT02802644_exc', 'NCT02742233_inc', 'NCT03355326_exc', 'NCT02200978_inc', 'NCT02145026_inc', 'NCT01346436_exc', 'NCT01891513_exc', 'NCT02609425_inc', 'NCT02593409_exc', 'NCT02643381_inc', 'NCT03431831_exc', 'NCT02590653_inc', 'NCT02884115_exc', 'NCT03387059_exc', 'NCT02592980_inc', 'NCT02580630_exc', 'NCT03208465_inc', 'NCT02570321_inc', 'NCT02498483_exc', 'NCT02537899_exc', 'NCT02003339_inc', 'NCT03034837_inc', 'NCT01997580_inc', 'NCT01890759_exc', 'NCT00483106_inc', 'NCT03639519_inc', 'NCT02726009_inc', 'NCT02316886_exc', 'NCT02691793_inc', 'NCT03236246_exc', 'NCT03185130_inc', 'NCT03338855_exc', 'NCT01824537_exc', 'NCT00502567_inc', 'NCT00324363_exc', 'NCT02607163_inc', 'NCT02101554_inc', 'NCT02627560_inc', 'NCT02954029_exc', 'NCT02787070_exc', 'NCT02164734_inc', 'NCT02804126_inc', 'NCT02713087_exc', 'NCT02618057_exc', 'NCT02838810_inc', 'NCT02920177_inc', 'NCT00480129_inc', 'NCT03080493_inc', 'NCT03390933_inc', 'NCT02022709_inc', 'NCT02788045_exc', 'NCT00445029_exc', 'NCT00965900_inc', 'NCT02862314_exc', 'NCT02426944_inc', 'NCT02425774_exc', 'NCT02528136_inc', 'NCT03434951_inc', 'NCT02961582_inc', 'NCT02781610_exc', 'NCT03352869_inc', 'NCT03058835_inc', 'NCT02413970_inc', 'NCT02613039_inc', 'NCT03056287_inc', 'NCT03027115_inc', 'NCT02952963_inc', 'NCT01815580_exc', 'NCT01680081_exc', 'NCT02958566_inc', 'NCT01261832_exc', 'NCT02408120_inc', 'NCT02920177_exc', 'NCT02334631_exc', 'NCT03513757_inc', 'NCT02897856_inc', 'NCT02509949_inc', 'NCT03539718_exc', 'NCT02985242_exc', 'NCT02946918_inc', 'NCT03335436_inc', 'NCT02205502_exc', 'NCT03337581_exc', 'NCT02441179_exc', 'NCT02260206_inc', 'NCT03639545_inc', 'NCT02647788_exc', 'NCT03475589_inc', 'NCT02968342_exc', 'NCT00894712_inc', 'NCT03373669_inc', 'NCT01943812_exc', 'NCT02227992_exc', 'NCT03297944_exc', 'NCT03146390_inc', 'NCT02692651_exc', 'NCT02478515_inc', 'NCT03044093_inc', 'NCT02957305_inc', 'NCT02656394_exc', 'NCT02862912_inc', 'NCT02953873_inc', 'NCT02141061_exc', 'NCT02884115_inc', 'NCT03354572_exc', 'NCT03026465_exc', 'NCT02965027_inc', 'NCT02473809_exc', 'NCT01993836_inc', 'NCT02984475_exc', 'NCT02208739_exc', 'NCT02974686_exc', 'NCT01314898_inc', 'NCT02226887_exc', 'NCT02432404_exc', 'NCT02201316_inc', 'NCT03388840_inc', 'NCT03228498_inc', 'NCT03043495_exc', 'NCT03262038_exc', 'NCT01669369_exc', 'NCT03317197_exc', 'NCT01942109_inc', 'NCT02632266_exc', 'NCT03234816_inc', 'NCT02789111_exc', 'NCT02897856_exc', 'NCT03181984_exc', 'NCT02754583_exc', 'NCT01184638_exc', 'NCT03444142_exc', 'NCT02527512_exc', 'NCT02983214_inc', 'NCT02964416_inc', 'NCT02245256_inc', 'NCT03472846_inc', 'NCT03404804_exc', 'NCT00317148_inc', 'NCT02678663_inc', 'NCT02782702_exc', 'NCT02152696_exc', 'NCT03282006_inc', 'NCT03335904_exc', 'NCT02918851_inc', 'NCT02318446_inc', 'NCT03164304_inc', 'NCT03506477_inc', 'NCT02950558_exc', 'NCT03278548_inc', 'NCT02766530_inc', 'NCT02457442_inc', 'NCT03369379_exc', 'NCT01816997_inc', 'NCT03400735_exc', 'NCT02337764_exc', 'NCT02973035_inc', 'NCT01346436_inc', 'NCT02361892_exc', 'NCT02707874_inc', 'NCT00122070_inc', 'NCT02301962_inc', 'NCT03126214_inc', 'NCT02466113_exc', 'NCT03185130_exc', 'NCT00236340_inc', 'NCT02905734_inc', 'NCT03364036_inc', 'NCT03113253_exc', 'NCT02386800_exc', 'NCT01793519_inc', 'NCT02859480_exc', 'NCT02481518_inc', 'NCT02573597_exc', 'NCT01602081_exc', 'NCT03194074_exc', 'NCT03637946_inc', 'NCT02121145_exc', 'NCT03639519_exc', 'NCT03217409_exc', 'NCT01866800_inc', 'NCT02590822_inc', 'NCT02563535_exc', 'NCT03011476_inc', 'NCT02455921_inc', 'NCT02589353_exc', 'NCT01440296_inc', 'NCT02796378_exc', 'NCT02902120_exc', 'NCT03018171_exc', 'NCT02332291_exc', 'NCT02833623_exc', 'NCT03004209_inc', 'NCT02923700_exc', 'NCT02360631_exc', 'NCT02243553_inc', 'NCT02350439_inc', 'NCT03236246_inc', 'NCT02283905_inc', 'NCT03446885_inc', 'NCT03159507_inc', 'NCT02531971_exc', 'NCT02984475_inc', 'NCT01373684_exc', 'NCT01696617_exc', 'NCT02431559_exc', 'NCT00867958_exc', 'NCT01980680_inc', 'NCT03264911_exc', 'NCT00806936_exc', 'NCT01497639_exc', 'NCT03333655_exc', 'NCT02340169_inc', 'NCT03077204_inc', 'NCT03460002_exc', 'NCT03211741_inc', 'NCT02015494_inc', 'NCT00679341_exc', 'NCT02361905_exc', 'NCT00445029_inc', 'NCT02371200_inc', 'NCT01909934_inc', 'NCT02242188_inc', 'NCT02384850_exc', 'NCT02035800_exc', 'NCT01501201_inc', 'NCT02366819_exc', 'NCT01994382_inc', 'NCT02933671_inc', 'NCT01700790_exc', 'NCT02654912_exc', 'NCT02443623_inc', 'NCT03472495_inc', 'NCT00720031_inc', 'NCT02983214_exc', 'NCT03066440_exc', 'NCT03513874_inc', 'NCT02571179_exc', 'NCT03016741_exc', 'NCT02654912_inc', 'NCT00440245_exc', 'NCT01032109_inc', 'NCT02678728_inc', 'NCT02957877_inc', 'NCT01715584_exc', 'NCT00061308_inc', 'NCT01774019_exc', 'NCT02790593_inc', 'NCT00862446_inc', 'NCT01177891_exc', 'NCT00344318_inc', 'NCT03648021_inc', 'NCT00586898_exc', 'NCT02312089_inc', 'NCT02797548_inc', 'NCT03340740_inc', 'NCT02964715_exc', 'NCT00391690_exc', 'NCT02546856_inc', 'NCT03296488_exc', 'NCT00391690_inc', 'NCT03208127_inc', 'NCT01446094_inc', 'NCT03297021_exc', 'NCT02348918_inc', 'NCT02254668_exc', 'NCT02762851_inc', 'NCT03088280_exc', 'NCT02844907_exc', 'NCT02802644_inc', 'NCT01728194_exc', 'NCT03103204_exc', 'NCT02019160_exc', 'NCT02606565_inc', 'NCT02287259_inc', 'NCT03029078_exc', 'NCT02940912_exc', 'NCT03623789_inc', 'NCT02344888_exc', 'NCT03084588_inc', 'NCT02550028_inc', 'NCT02890719_inc', 'NCT02851303_inc', 'NCT02908919_inc', 'NCT02780427_inc', 'NCT02225548_inc', 'NCT02056301_inc', 'NCT01816997_exc', 'NCT00198913_inc', 'NCT02667730_exc', 'NCT02202369_inc', 'NCT00279552_exc', 'NCT03476850_exc', 'NCT02689817_inc', 'NCT02632318_inc', 'NCT03250507_inc', 'NCT02774317_exc', 'NCT02609048_exc', 'NCT00846703_exc', 'NCT02765217_exc', 'NCT03318393_exc', 'NCT03506009_exc', 'NCT02885909_exc', 'NCT01991743_exc', 'NCT03247413_inc', 'NCT03404479_inc', 'NCT02664558_exc', 'NCT02490839_inc', 'NCT02167022_exc', 'NCT01857167_inc', 'NCT02900443_inc', 'NCT01997112_inc', 'NCT02773173_exc', 'NCT03340740_exc', 'NCT02704754_inc', 'NCT03040024_exc', 'NCT03034733_inc', 'NCT02415257_inc', 'NCT01491295_inc', 'NCT03480607_exc', 'NCT03589105_inc', 'NCT02858804_inc', 'NCT02303171_inc', 'NCT02595190_exc', 'NCT02635893_inc', 'NCT02426944_exc', 'NCT02489045_exc', 'NCT02882113_exc', 'NCT03138577_exc', 'NCT02851888_inc', 'NCT03305666_exc', 'NCT02109081_inc', 'NCT03532620_inc', 'NCT02689817_exc', 'NCT03196843_exc', 'NCT02499185_inc', 'NCT02894372_exc', 'NCT00812344_exc', 'NCT02557386_exc', 'NCT03019562_inc', 'NCT03019562_exc', 'NCT03182114_inc', 'NCT03424733_inc', 'NCT03467750_inc', 'NCT02939872_exc', 'NCT03315975_exc', 'NCT02632760_exc', 'NCT03288428_exc', 'NCT03472508_exc', 'NCT02314559_exc', 'NCT02590315_exc', 'NCT02969876_inc', 'NCT02573909_inc', 'NCT00904202_inc', 'NCT01803438_exc', 'NCT00787254_inc', 'NCT03360981_inc', 'NCT02175186_exc', 'NCT02632760_inc', 'NCT02416869_exc', 'NCT02476461_inc', 'NCT02467686_inc', 'NCT00094861_exc', 'NCT03067740_inc', 'NCT03413891_inc', 'NCT02620904_inc', 'NCT03233880_inc', 'NCT02874092_inc', 'NCT02284737_inc', 'NCT02634541_inc', 'NCT03208244_inc', 'NCT02997215_inc', 'NCT03323047_inc', 'NCT01890759_inc', 'NCT00425789_inc', 'NCT03305575_inc', 'NCT03430284_inc', 'NCT01491295_exc', 'NCT03360214_exc', 'NCT02555163_exc', 'NCT02593409_inc', 'NCT02905890_exc', 'NCT00812344_inc', 'NCT02137538_exc', 'NCT02570230_exc', 'NCT02996916_inc', 'NCT03068897_exc', 'NCT03091881_exc', 'NCT02571881_exc', 'NCT02205502_inc', 'NCT02668978_inc', 'NCT02804126_exc', 'NCT01912677_inc', 'NCT02650024_exc', 'NCT02035904_exc', 'NCT01803828_inc', 'NCT03070847_exc', 'NCT03064568_exc', 'NCT02704234_inc', 'NCT00236340_exc', 'NCT02821819_inc', 'NCT03100513_inc', 'NCT02251249_inc', 'NCT02315287_exc', 'NCT00397215_inc', 'NCT01801072_exc', 'NCT02970773_inc', 'NCT02996916_exc', 'NCT02312076_exc', 'NCT02604459_inc', 'NCT02490839_exc', 'NCT02370069_inc', 'NCT02894268_exc', 'NCT03249311_inc', 'NCT02952963_exc', 'NCT02321839_exc', 'NCT01391780_exc', 'NCT03225469_exc', 'NCT02339844_inc', 'NCT01846507_exc', 'NCT02680054_exc', 'NCT02715466_inc', 'NCT02570230_inc', 'NCT01312012_exc', 'NCT02277041_inc', 'NCT00576173_inc', 'NCT02334722_inc', 'NCT03560310_exc', 'NCT01352598_inc', 'NCT00787254_exc', 'NCT03518034_inc', 'NCT03115320_inc', 'NCT00886158_inc', 'NCT02759861_inc', 'NCT02735902_exc', 'NCT02413970_exc', 'NCT02686021_inc', 'NCT01963754_exc', 'NCT02519777_inc', 'NCT03073603_inc', 'NCT02330757_exc', 'NCT03389061_inc', 'NCT00404495_exc', 'NCT02833623_inc', 'NCT02950558_inc', 'NCT00676273_exc', 'NCT02924090_exc', 'NCT02760459_exc', 'NCT02106598_inc', 'NCT02283905_exc', 'NCT02609698_exc', 'NCT00806273_inc', 'NCT02567214_exc', 'NCT03463564_inc', 'NCT02742233_exc', 'NCT03211741_exc', 'NCT02416765_exc', 'NCT02883400_inc', 'NCT03337581_inc', 'NCT03140488_exc', 'NCT02579928_inc', 'NCT03228017_exc', 'NCT02744976_inc', 'NCT03413891_exc', 'NCT01614041_exc', 'NCT02992028_exc', 'NCT03216967_inc', 'NCT02536976_inc', 'NCT02498483_inc', 'NCT02437045_exc', 'NCT03495609_exc', 'NCT03350659_exc', 'NCT03364036_exc', 'NCT00480129_exc', 'NCT01943409_exc', 'NCT03356834_inc', 'NCT03104816_exc', 'NCT02590822_exc', 'NCT03067740_exc', 'NCT01908465_inc', 'NCT02035800_inc', 'NCT02798237_exc', 'NCT02872090_exc', 'NCT03351608_exc', 'NCT02845427_inc', 'NCT03260790_inc', 'NCT02701777_exc', 'NCT01483118_exc', 'NCT02630628_inc', 'NCT02894268_inc'], 'test': ['NCT03117608_inc', 'NCT02550080_exc', 'NCT03252249_inc', 'NCT03193684_inc', 'NCT03056391_exc', 'NCT03382106_inc', 'NCT01715714_inc', 'NCT01907230_exc', 'NCT02283996_exc', 'NCT02273791_inc', 'NCT03013790_inc', 'NCT03338296_exc', 'NCT02469610_inc', 'NCT03234816_exc', 'NCT02557412_exc', 'NCT02062489_inc', 'NCT02426034_exc', 'NCT02560766_exc', 'NCT02269137_exc', 'NCT03445949_inc', 'NCT03164304_exc', 'NCT03344042_inc', 'NCT01996436_inc', 'NCT02810704_exc', 'NCT03304496_exc', 'NCT03123562_exc', 'NCT02631512_exc', 'NCT02904785_inc', 'NCT01639664_inc', 'NCT03262038_inc', 'NCT02678962_exc', 'NCT00728156_exc', 'NCT01891383_inc', 'NCT02592980_exc', 'NCT02502734_exc', 'NCT00728156_inc', 'NCT02942303_inc', 'NCT00650312_exc', 'NCT02473809_inc', 'NCT03231982_inc', 'NCT01639664_exc', 'NCT01888965_exc', 'NCT01793831_exc', 'NCT01799681_exc', 'NCT02695992_exc', 'NCT02406885_inc', 'NCT02637453_exc', 'NCT02754583_inc', 'NCT02527512_inc', 'NCT02531971_inc', 'NCT02997215_exc', 'NCT03159507_exc', 'NCT02965443_exc', 'NCT02966236_exc', 'NCT02046395_exc', 'NCT00917891_inc', 'NCT03491059_inc', 'NCT02390973_exc', 'NCT02965443_inc', 'NCT02652572_exc', 'NCT02323399_inc', 'NCT01401335_exc', 'NCT03663387_exc', 'NCT02916342_exc', 'NCT01424020_exc', 'NCT02912182_exc', 'NCT02247128_exc', 'NCT00351611_inc', 'NCT03008005_exc', 'NCT02318446_exc', 'NCT02295202_exc', 'NCT02589977_inc', 'NCT02952378_inc', 'NCT02858180_exc', 'NCT02827487_inc', 'NCT02117986_exc', 'NCT03089086_inc', 'NCT02202369_exc', 'NCT01228279_inc', 'NCT03119766_exc', 'NCT02698969_exc', 'NCT01943409_inc', 'NCT03329456_exc', 'NCT01236417_exc', 'NCT00343668_inc', 'NCT02707874_exc', 'NCT02150590_exc', 'NCT01857167_exc', 'NCT02645474_inc', 'NCT02863120_exc', 'NCT03624517_inc', 'NCT01824537_inc', 'NCT03171987_exc', 'NCT02443844_exc', 'NCT03530124_inc', 'NCT02019628_exc', 'NCT03149887_exc', 'NCT02251249_exc', 'NCT02299063_exc', 'NCT03467750_exc', 'NCT02499185_exc', 'NCT02316886_inc', 'NCT03129555_exc', 'NCT02984228_inc', 'NCT02321202_inc', 'NCT00312429_inc', 'NCT00846703_inc', 'NCT03177811_exc', 'NCT02789111_inc', 'NCT03376763_inc', 'NCT03043495_inc', 'NCT00989261_exc', 'NCT03091881_inc', 'NCT03404804_inc', 'NCT02704754_exc', 'NCT02379156_inc', 'NCT01735955_inc', 'NCT03177837_inc', 'NCT02242188_exc', 'NCT00639795_inc', 'NCT03663387_inc', 'NCT03280017_exc', 'NCT03471117_inc', 'NCT02782702_inc', 'NCT02112734_inc', 'NCT03619707_exc', 'NCT03064568_inc', 'NCT01217671_exc', 'NCT02256943_inc', 'NCT00343668_exc', 'NCT01728194_inc', 'NCT02958566_exc', 'NCT03465397_exc', 'NCT01680081_inc', 'NCT03226080_exc', 'NCT03325023_inc', 'NCT02707809_inc', 'NCT03247413_exc', 'NCT02515773_inc', 'NCT02894645_exc', 'NCT03015818_exc', 'NCT03063866_exc', 'NCT03025620_inc', 'NCT02745704_inc', 'NCT02842424_exc', 'NCT02650388_exc', 'NCT01799681_inc', 'NCT03620526_inc', 'NCT02360631_inc', 'NCT01846507_inc', 'NCT03216447_exc', 'NCT03147599_exc', 'NCT02267616_inc', 'NCT03282006_exc', 'NCT02760459_inc', 'NCT02743598_exc', 'NCT03079141_inc', 'NCT01765231_inc', 'NCT01051414_exc', 'NCT03318874_exc', 'NCT02034019_exc', 'NCT03297125_inc', 'NCT03500211_exc', 'NCT03259243_inc', 'NCT02637076_exc', 'NCT00650312_inc', 'NCT02150590_inc', 'NCT03068897_inc', 'NCT03424733_exc', 'NCT03082573_inc', 'NCT02777424_inc', 'NCT02823808_exc', 'NCT00806273_exc', 'NCT02541955_exc', 'NCT03169127_inc', 'NCT00401245_inc', 'NCT02903407_inc', 'NCT02141061_inc', 'NCT00959569_inc', 'NCT02652637_exc', 'NCT02427295_inc', 'NCT03305575_exc', 'NCT02443623_exc', 'NCT03018171_inc', 'NCT03555526_exc', 'NCT02321839_inc', 'NCT02810704_inc', 'NCT00862446_exc', 'NCT03231982_exc', 'NCT02546856_exc', 'NCT02818816_exc', 'NCT02675153_exc', 'NCT00785213_inc', 'NCT02186782_exc', 'NCT02175186_inc', 'NCT02109081_exc', 'NCT01912677_exc', 'NCT03253796_exc', 'NCT01631058_exc', 'NCT02056288_inc', 'NCT02638935_exc', 'NCT02743598_inc', 'NCT02162433_inc', 'NCT03416413_exc', 'NCT02759861_exc', 'NCT02222272_inc', 'NCT01098383_inc', 'NCT01959061_exc', 'NCT02877485_exc', 'NCT02121145_inc', 'NCT00959569_exc', 'NCT03506009_inc', 'NCT03193684_exc', 'NCT02564471_inc', 'NCT02361892_inc', 'NCT03513757_exc', 'NCT03122119_inc', 'NCT03477851_inc', 'NCT01866800_exc', 'NCT03537924_exc', 'NCT01064752_inc', 'NCT03430284_exc', 'NCT01177891_inc', 'NCT02015923_exc', 'NCT03062358_exc', 'NCT02528136_exc', 'NCT02822001_exc', 'NCT02777424_exc', 'NCT03181984_inc', 'NCT02283996_inc', 'NCT01909934_exc', 'NCT03113253_inc', 'NCT00752310_exc', 'NCT02606565_exc', 'NCT02529475_exc', 'NCT02942303_exc', 'NCT02787863_exc', 'NCT01815580_inc', 'NCT02056288_exc', 'NCT02781610_inc', 'NCT02609048_inc', 'NCT03299517_inc', 'NCT00404495_inc', 'NCT03209687_exc', 'NCT02687724_inc', 'NCT03088280_inc', 'NCT03171987_inc', 'NCT03124329_inc', 'NCT00379366_inc', 'NCT03125057_exc', 'NCT02900443_exc', 'NCT02511574_inc', 'NCT02350439_exc', 'NCT02876484_inc', 'NCT02939209_exc', 'NCT02934269_exc', 'NCT03479502_inc', 'NCT02117986_inc', 'NCT02765035_inc', 'NCT02488057_exc', 'NCT01801072_inc', 'NCT01082549_exc', 'NCT03463564_exc', 'NCT02678663_exc', 'NCT01929434_inc', 'NCT03381755_exc', 'NCT02739295_exc', 'NCT02035904_inc', 'NCT02106624_exc', 'NCT02430740_inc', 'NCT03012984_inc', 'NCT01567605_exc', 'NCT02952378_exc', 'NCT02431559_inc', 'NCT02935855_inc', 'NCT03011476_exc', 'NCT02548013_exc', 'NCT02369211_inc', 'NCT03177837_exc', 'NCT03131050_inc', 'NCT02368743_exc', 'NCT02777580_inc', 'NCT02201316_exc', 'NCT02041299_inc', 'NCT02112734_exc', 'NCT03305666_inc', 'NCT01717911_inc', 'NCT00094861_inc', 'NCT01711801_exc', 'NCT03347513_inc', 'NCT02796378_inc', 'NCT03320057_exc', 'NCT03402945_exc', 'NCT02735577_exc', 'NCT00718952_exc', 'NCT00543712_inc', 'NCT02510404_inc', 'NCT01214096_inc', 'NCT02437084_exc', 'NCT01884337_inc', 'NCT02314559_inc', 'NCT00599924_inc', 'NCT03192020_exc', 'NCT02525991_inc', 'NCT03481894_exc', 'NCT02748330_inc', 'NCT03115151_exc', 'NCT01320579_exc', 'NCT01856491_inc', 'NCT02462317_exc', 'NCT02580630_inc', 'NCT00931983_inc', 'NCT03648021_exc', 'NCT02301039_inc', 'NCT01895946_exc', 'NCT02867618_inc', 'NCT01911650_inc', 'NCT03026088_exc', 'NCT03397914_exc', 'NCT03096613_inc', 'NCT03004261_exc', 'NCT02704234_exc', 'NCT02303171_exc', 'NCT01991743_inc', 'NCT02456532_exc', 'NCT01581749_inc', 'NCT02543710_exc', 'NCT01717911_exc', 'NCT02364648_inc', 'NCT00379366_exc', 'NCT02541955_inc', 'NCT01684501_exc', 'NCT03402945_inc', 'NCT02732080_inc', 'NCT03056287_exc', 'NCT01518946_exc', 'NCT02042287_inc', 'NCT02831166_exc', 'NCT02062489_exc', 'NCT02884401_exc', 'NCT02281643_exc', 'NCT01709981_inc', 'NCT02858180_inc', 'NCT01175044_inc', 'NCT03337503_exc', 'NCT02760251_exc', 'NCT00122070_exc', 'NCT03400735_inc', 'NCT01501201_exc', 'NCT00250640_exc', 'NCT01822262_exc', 'NCT02631512_inc', 'NCT03473132_exc', 'NCT02260206_exc', 'NCT03208998_exc', 'NCT03066440_inc', 'NCT03209687_inc', 'NCT02937779_inc', 'NCT03182114_exc', 'NCT03297125_exc', 'NCT02526823_exc', 'NCT03397914_inc', 'NCT02825290_inc', 'NCT01236417_inc', 'NCT02364648_exc', 'NCT02565277_exc', 'NCT02645474_exc', 'NCT02056626_exc', 'NCT02747940_inc', 'NCT03344042_exc', 'NCT03351608_inc', 'NCT03338855_inc', 'NCT02322203_inc', 'NCT02653131_exc', 'NCT01715714_exc', 'NCT01630954_inc', 'NCT02466113_inc', 'NCT02714725_exc', 'NCT02092467_inc', 'NCT02224040_exc', 'NCT03226080_inc', 'NCT03151603_exc', 'NCT02979561_exc', 'NCT02589353_inc', 'NCT02918851_exc', 'NCT00625742_exc', 'NCT02705222_exc', 'NCT03328052_exc', 'NCT03122119_exc', 'NCT02830360_exc', 'NCT02692651_inc', 'NCT02763007_exc', 'NCT03118232_inc', 'NCT03637946_exc', 'NCT01743755_exc', 'NCT00994786_inc', 'NCT02705222_inc', 'NCT02464813_inc', 'NCT03472846_exc', 'NCT00954850_exc', 'NCT03126214_exc', 'NCT01768195_exc', 'NCT03519568_inc', 'NCT02590315_inc', 'NCT01320579_inc', 'NCT02426034_inc', 'NCT03096613_exc', 'NCT02301039_exc', 'NCT03208998_inc', 'NCT03323047_exc', 'NCT03140488_inc', 'NCT01908465_exc', 'NCT00426751_inc', 'NCT03168555_exc', 'NCT02714725_inc', 'NCT03476850_inc', 'NCT03017053_exc', 'NCT02827487_exc', 'NCT03373318_inc', 'NCT01373684_inc', 'NCT03288428_inc', 'NCT01614041_inc', 'NCT03631355_exc', 'NCT02849483_inc', 'NCT01490034_inc', 'NCT02429765_inc', 'NCT03151603_inc', 'NCT02959801_exc', 'NCT02643381_exc', 'NCT02678377_exc', 'NCT00397215_exc', 'NCT02609425_exc', 'NCT03372265_inc', 'NCT03382106_exc', 'NCT02952365_inc', 'NCT02462590_inc', 'NCT02270970_inc', 'NCT03619707_inc', 'NCT03208127_exc', 'NCT03173092_inc', 'NCT02687178_inc', 'NCT03325023_exc', 'NCT03499639_exc', 'NCT03275584_inc', 'NCT01446094_exc', 'NCT03195153_exc', 'NCT02552459_inc', 'NCT03380429_exc', 'NCT02414399_exc', 'NCT03029078_inc', 'NCT03064867_inc', 'NCT00455663_inc', 'NCT03352869_exc', 'NCT03506477_exc', 'NCT02489045_inc', 'NCT02867618_exc', 'NCT02862912_exc', 'NCT01757717_inc', 'NCT02649114_inc', 'NCT03099863_exc', 'NCT02777580_exc', 'NCT02827526_inc', 'NCT03373669_exc', 'NCT02394158_inc', 'NCT02224040_inc', 'NCT02946892_exc', 'NCT02783859_exc', 'NCT01116882_inc', 'NCT02431442_exc', 'NCT03351972_inc', 'NCT01959061_inc', 'NCT01765231_exc', 'NCT02965027_exc', 'NCT02647788_inc', 'NCT03195153_inc', 'NCT01669369_inc', 'NCT02083991_exc', 'NCT00954850_inc', 'NCT02816762_exc', 'NCT03318874_inc', 'NCT02247128_inc', 'NCT03493919_inc', 'NCT02701881_inc', 'NCT00396734_exc', 'NCT02277067_exc', 'NCT01912651_exc', 'NCT00426751_exc', 'NCT02427295_exc', 'NCT02807857_inc', 'NCT03536520_exc', 'NCT01822262_inc', 'NCT01312012_inc', 'NCT01082549_inc', 'NCT02851888_exc', 'NCT01770340_inc', 'NCT02944292_exc', 'NCT00183885_inc', 'NCT02525991_exc', 'NCT03125057_inc', 'NCT02926989_exc', 'NCT03338296_inc', 'NCT02632318_exc', 'NCT02749617_exc', 'NCT03497598_inc', 'NCT02946892_inc', 'NCT03058835_exc', 'NCT02483715_exc', 'NCT02926989_inc', 'NCT02385045_inc', 'NCT03484091_inc', 'NCT03034733_exc', 'NCT01850147_inc', 'NCT02550769_exc', 'NCT01720394_inc', 'NCT02966236_inc', 'NCT02537899_inc', 'NCT02985710_inc', 'NCT03132259_exc', 'NCT01205334_exc', 'NCT00500500_exc', 'NCT00926523_inc', 'NCT03154931_inc', 'NCT03011177_inc', 'NCT03120533_exc', 'NCT03407625_inc', 'NCT03084588_exc', 'NCT02673359_exc', 'NCT02339974_inc', 'NCT03097068_exc', 'NCT02907554_inc', 'NCT03044561_exc', 'NCT02755701_inc', 'NCT00926523_exc', 'NCT03420638_inc', 'NCT01932996_inc', 'NCT02186600_exc', 'NCT03198910_inc', 'NCT02689089_inc', 'NCT01709981_exc', 'NCT02937779_exc', 'NCT03416413_inc', 'NCT01959425_inc', 'NCT02469610_exc', 'NCT03132259_inc', 'NCT01581749_exc', 'NCT01944800_inc', 'NCT00182520_exc', 'NCT02668016_exc', 'NCT01942109_exc', 'NCT03461679_exc', 'NCT03304496_inc', 'NCT02284737_exc', 'NCT00305097_inc', 'NCT00455663_exc', 'NCT01806558_inc', 'NCT02429765_exc', 'NCT02596555_exc', 'NCT03297021_inc', 'NCT02019160_inc', 'NCT03539718_inc', 'NCT03355157_inc', 'NCT02952365_exc', 'NCT02105090_inc', 'NCT03196843_inc', 'NCT00943865_exc', 'NCT03530124_exc', 'NCT02431442_inc', 'NCT02907554_exc', 'NCT01942915_inc', 'NCT00500500_inc', 'NCT02564471_exc', 'NCT03479502_exc', 'NCT02621489_inc', 'NCT02902120_inc', 'NCT02301962_exc', 'NCT03475589_exc', 'NCT03008005_inc', 'NCT02419378_inc', 'NCT03168555_inc', 'NCT02420015_inc', 'NCT02964416_exc', 'NCT01807897_inc']}

In [None]:
# labels to be trained by model
label_list=['Person', 'Condition', 'Drug', 'Observation', 'Measurement', 'Procedure', 'Device', 'Temporal', 'Value', 'Mood', 'Pregnancy_considerations']

In [None]:
#add train docs

for d in splits['train']:
    doc_to_bin(d, doc_bin_train)

#add dev docs

for d in splits['dev']:
    doc_to_bin(d, doc_bin_dev)

#print the test set for easy access for eval
print('test:')
print(splits['test'])

## Write docbin objects to disk

In [None]:
mod_folder='/Users/meldrumapple/Desktop/Capstone/mod_chia/' # folder for model to end up in
doc_bin_train.to_disk(mod_folder+'training_data.spacy'); 
doc_bin_dev.to_disk(mod_folder+'dev_data.spacy');

## Make Config File

Use the spacy base_config file https://spacy.io/usage/training
(same as below)

replace train and dev paths as paths to files where docbins are saved

copy this into a txt file and then save as 'base_config.cfg'

Then open a terminal in jupyter lab, and change directory to mod_folder: 

Use the spacy fill function to fill a config file from the base_config file: 

Initiate Model Training: 

In [None]:
print(splits['train'])