Example usage of how to load model and predict a set of MedDRA terms using. You need to download and unpack the meddra package using your subscription login.

In [9]:
from transformers import BertTokenizer, BertForSequenceClassification
from os import path
import pandas as pd
import numpy as np
import torch
import json

In [10]:
MEDDRA_DIR_EN = '' # set this to the meddra directory containing the ascii files

In [11]:
MODEL_NAME_OR_DIR = 'olastor/mcn-en-smm4h' # adjust model here
DATA_DIR = './data/smm4h/smm4h/' # set to corresponding dataset folder with labels.json

In [12]:
# load meddra data to display name of labels
cols_pt = 'pt_code,pt_name,null_field,pt_soc_code,pt_whoart_code,pt_harts_code,pt_costart_sym,pt_icd9_code,pt_icd9cm_code,pt_icd10_code,pt_jart_code'
df_pt_de = pd.read_csv(
    path.join(MEDDRA_DIR_EN, 'pt.asc'), 
    sep='$', 
    encoding='latin-1', 
    names=cols_pt.split(','), 
    index_col=False
)

In [13]:
tokenizer = BertTokenizer.from_pretrained(MODEL_NAME_OR_DIR)
model = BertForSequenceClassification.from_pretrained(MODEL_NAME_OR_DIR)

with open(path.join(DATA_DIR, 'labels.json')) as f:
    labels = json.loads(f.read())

In [14]:
pt_code_to_name = { row.pt_code: row.pt_name for row in df_pt_de.itertuples() }

def predict_meddra(input_sequence: str, top_n: int = 10):
    # encode sentence
    inputs = tokenizer.encode_plus(input_sequence, add_special_tokens=True, return_tensors='pt')

    # calculate predictions
    preds = model(inputs['input_ids'], token_type_ids=inputs['token_type_ids'])[0]

    # sort predictions by their score
    indices = np.array(preds.sort(descending=True).indices[0])
    
    for k, i in enumerate(indices[:top_n]):
        try:
            label = labels[i]
            print('%i. %s (%s)' % (k + 1, pt_code_to_name[int(label)], label))
        except:
            print('%i ERROR' % (k + 1))

In [15]:
predict_meddra('Last night I had a bad dream!', 3)

1. Nightmare (10029412)
2. Abnormal dreams (10000125)
3. Crying (10011469)
