In [None]:
import json
import os
from datetime import date
from medcat.cat import CAT
from medcat.components.addons.meta_cat import MetaCAT, MetaCATAddon
from medcat.config.config_meta_cat import ConfigMetaCAT
from medcat.components.addons.meta_cat.mctokenizers.bert_tokenizer import TokenizerWrapperBERT

In [None]:
from medcat_den.injection import inject_into_medcat
from medcat_den.den import get_default_den
from medcat_den.utils import summarise_den
# NOTE: this will centralise model storage and
#       allow using the centralise storage automatically
inject_into_medcat(inject_save=True)
# to see available models
print(summarise_den(get_default_den()))

### Load the model pack with MetaCATs


In [None]:
# CHANGE THIS:
model_hash = 'a645742030cae7be'

# make sure the export is in this folder
ann_dir = "../data/mct_exports/"
# CHANGE THIS:
mctrainer_export_name = "example_mct_export.json" 

mctrainer_export_path = ann_dir + mctrainer_export_name

In [None]:
# Load model
cat = CAT.load_model_pack(model_hash)

In [None]:
# if running with the default model, we need to actually add a MetaCAT model because it doesn't ship with one
from medcat.config.config_meta_cat import ConfigMetaCAT
cnf = ConfigMetaCAT()
cnf.general.category_name = 'Status'
cnf.model.nclasses = 2
cnf.general.category_value2id = {'Other': 1, 'Confirmed': 0}

data_path = os.path.join("example_data", "tok_data.txt")

# create MetaCAT
mc = MetaCATAddon.create_new(
    cnf, cat.pipe.tokenizer,
    tknzer_preprocessor=lambda tknzer: tknzer.hf_tokenizers.train(data_path))

# add MetaCAT
if cat.config.components.addons and cnf in cat.config.components.addons:
    # avoid adding multiple on multiple runs
    print("Already had an addon, so not adding a 2nd (probably identical) one")
else:
    cat.add_addon(mc)
print("Addon configs:", cat.config.components.addons)
print("Addons", cat.get_addons())


In [None]:
meta_cats = cat.get_addons_of_type(MetaCATAddon)
# Check what meta cat models are in this model pack.
print(f'There are: {len(meta_cats)} meta cat models in this model pack.')

There are: 3 meta cat models in this model pack.


In [None]:
print(meta_cats[0])

{
  "Category Name": "Temporality",
  "Description": "No description",
  "Classes": {
    "Past": 0,
    "Recent": 1,
    "Future": 2
  },
  "Model": "bert"
}


<b> NOTE: </b> 
 The name for the classification task can vary. E.g: The Category Name for 'Experiencer' can be 'Subject', as it has been configured an annoated in MedCATTrainer this way, but the model expects 'Experiencer'
 
 To accomodate for this, we have a list that stores the variations for the alternate names. This attribute can be found under `mc.config.general.alternative_category_names`

E.g. for Experiencer, it will be pre-loaded as alternative_category_names = ['Experiencer','Subject']

Set this list to ensure during training / fine-tuning the model is aware of alternative names for classes.

In [None]:
print(meta_cats[0].config.general.alternative_category_names)

üí° In case you are using older modelpacks, the above field will be empty. In that case, 

In [None]:
# Only run in case the above output is an empty list
category_name_mapping = [["Presence", "Status"],["Temporality","Time"],["Experiencer","Subject"]]
lookup = {item: group for group in category_name_mapping for item in group}

for meta_model in range(len(meta_cats)):
    mc = meta_cats[meta_model]
    gen_cnf = mc.config.general
    gen_cnf.alternative_category_names = lookup.get(gen_cnf.category_name)

<b> NOTE: </b> 
 The name for the classes can vary too. Some sites may have trained a MetaCAT model for the same task, but called a class value a slightly different name.
 
 E.g: For the Presence task, the class name can be 'Not present (False)' or 'False'
 
 To accomodate for this, we have a mapping that stores the variations for the alternate names. This attribute can be found under `mc.config.general.alternative_class_names`

 E.g. for Presence, it will be pre-loaded as alternative_class_names = [["Hypothetical (N/A)","Hypothetical"],["Not present (False)","False"],["Present (True)","True"]]

In [None]:
print(meta_cats[0].config.general.alternative_class_names)

üí° In case you are using older modelpacks, the above field will be empty. In that case, please run the following code:

In [None]:
# Only run in case the above output is an empty list
class_name_mapping =  {
    "Temporality": [["Past"], ["Recent", "Present"], ["Future"]],
    "Time": [["Past"], ["Recent", "Present"], ["Future"]],
    "Experiencer": [["Family"], ["Other"], ["Patient"]],
    "Subject": [["Family"], ["Other"], ["Patient"]],
    "Status": [["Hypothetical (N/A)", "Hypothetical"], ["Not present (False)", "False"], ["Present (True)", "True"]]
}

for meta_model in range(len(meta_cats)):
    meta_cats[meta_model].config.general.alternative_class_names = class_name_mapping[meta_cats[meta_model].config.general.category_name]

# For LSTM and BERT model

In [None]:
# Train the first meta cat model - 'Temporality' Task.
meta_cat: MetaCATAddon = meta_cats[0]

# to overwrite the existing model, resave the fine-tuned model with the same model pack dir
meta_cat_task = meta_cat.config.general.category_name
save_dir_path = os.path.join("temp", "meta_" + meta_cat_task)

# to save the new model elsewhere, uncomment the below line
#save_dir_path= "test_meta_"+meta_cat_task # Where to save the meta_model and results. 

# train the meta_model
results = meta_cat.mc.train_from_json(mctrainer_export_path, save_dir_path=save_dir_path)

# Save results
json.dump(results['report'], open(os.path.join(save_dir_path,'meta_'+meta_cat_task+'_results.json'), 'w'))

## If you dont have the model packs, and are training from scratch
<b>‚ö†Ô∏èThis is very rare, it is recommended to always use the model packs and then fine-tune them</b>

In [None]:
# config = ConfigMetaCAT()
# # make sure to change the following parameters:
# # config.model.nclasses
# # config.general.category_name

# # change model name if training BERT for the first time
# config.model.model_name = 'bert'

# tokenizer = TokenizerWrapperBERT.load("", config.model.model_variant)

# save_dir_path= "test_meta_" + meta_cat_task # Where to save the meta_model and results. 

# # Initialise and train meta_model
# mc = MetaCAT(tokenizer=tokenizer, embeddings=None, config=config)
# results = mc.train_from_json(mctrainer_export_path, save_dir_path=save_dir_path)

# # Save results
# json.dump(results['report'], open(os.path.join(save_dir_path,'meta_' + meta_cat_task+'_results.json'), 'w'))