# Feed-forward neural network baseline

We build two feed-forward neural networks, one for each diagnosis (pneu and inf), which take both symptoms and text embeddings as an input, and learn to output a prediction for the diagnosis probability. 

![title](figures/models_FF.png)

We turn all the nan values into a third cateogry, "unk". This way, dysp, cough and nasal can take on three values: "no", "yes" and "unk". 

In [1]:
import pickle
with open("data/train_4000_final.p", "rb") as file: 
    train_df = pickle.load(file)

In [2]:
train_df = train_df.fillna("unk")

In [3]:
import pickle
with open("data/test_1000_final.p", "rb") as file: 
    test_df = pickle.load(file)

In [4]:
class_map = {"season": {"warm": 0, "cold": 1}, 
            "pneu": {"no": 0, "yes": 1}, "inf": {"no": 0, "yes": 1}, 
            "dysp": {"no": 0, "yes": 1, "unk": 2}, "cough": {"no": 0, "yes": 1, "unk": 2}, "nasal": {"no": 0, "yes": 1, "unk": 2}}

We create a Pytorch Dataset class that turns the background and symptom portion of all samples into a feature vector, while also storing the BioLORD embeddings as text embedding.

In [5]:
from torch.utils.data import Dataset
import torch
import numpy as np 
import pandas as pd
import itertools 

class TextSymptomDataset(Dataset):
    """
    Feature vector dataset for use in Pytorch models
    """
    def __init__(self, dataframe, keeplen, class_map, emb_type, feature_names, interactions, device):
        """
        dataframe: dataset containing samples made up of values for the background, disease and symptom variables, as well as text embeddings
        keeplen: discard all samples past this index, except when building up the interaction features
        class_map: dictionary mapping class names for each variable to index {"var_name": {"class_name": int}}
        emb_type: name of embedding to use, will be "BioLORD emb" in our models
        feature_names: names of categorical input features
        interactions: whether to include extra features at the input capturing the inter-variable interactions between feature_names
        device: device to load tensors to
        """

        super(Dataset, self).__init__()
        
        self.dataframe = dataframe
        self.keeplen = keeplen
        self.class_map = class_map
        self.emb_type = emb_type
        self.device = device
        self.feature_names = feature_names
        self.interactions = interactions

        self.preprocessed_data = self.preprocess()

    def __len__(self):
        """ 
        returns number of samples in dataset
        """

        return len(self.preprocessed_data["index"])

    def generate_one_hot(self, df):
        """
        generate one-hot encodings for all samples in the dataframe
        df: dataframe containing values for all variables at input 

        returns: dataframe with full background+symptom encoding for every sample 
                 number of columns = 2 + 3 + 3 + 3 + #pairwise interactions + #threeway interactions + #fourway interactions
                 each column contains either 0 or 1
        """
        
        df = df.copy(deep=True)

        # Feature-wise one-hot encodings
        season_encoded = pd.get_dummies(df['season'], prefix='season') # dim 2
        dysp_encoded = pd.get_dummies(df['dysp'], prefix='dysp') # dim 3
        cough_encoded = pd.get_dummies(df['cough'], prefix='cough') # dim 3
        nasal_encoded = pd.get_dummies(df['nasal'], prefix='nasal') # dim 3

        # Concatenate feature-wise one-hot encodings to the original DataFrame
        df_single = pd.concat([season_encoded, dysp_encoded, cough_encoded, nasal_encoded], axis=1) # dim 11

        # If no interaction features requested, we output concatenated one-hot encodings
        if not self.interactions:
            return df_single
        
        # If interaction features requested, we create new features for all possible pairwise, three-way and four-way combinations of input values
        # These features are subsequently one-hot encoded 

        # Pairwise interactions -> [season=no,dysp=no],[season=no,dysp=yes],[season=no,dysp=unk],...,[dysp=unk,cough=no],[dysp=unk,cough=yes],[dysp=unk,cough=unk]
        pairwise_interactions = list(itertools.combinations(df.columns, 2))

        df_pairwise = pd.DataFrame()
        for interaction in pairwise_interactions:
            interaction_name = f'{interaction[0]}_{interaction[1]}'
            df[interaction_name] = df[interaction[0]].astype(str) + '_' + df[interaction[1]].astype(str)
            df_encoded = pd.get_dummies(df[interaction_name], prefix=interaction_name)
            df_pairwise = pd.concat([df_pairwise, df_encoded], axis=1) # pairwise feature encoding for every sample

        # Three-way interactions -> [season=no,dysp=no,cough=no],[season=no,dysp=no,cough=yes],...,[dysp=unk,cough=unk,nasal=no],[dysp=unk,cough=unk,nasal=yes],[dysp=unk,cough=unk,nasal=unk]
        three_way_interactions = list(itertools.combinations(df.columns[:4], 3))

        df_threeway = pd.DataFrame()
        for interaction in three_way_interactions:
            interaction_name = f'{interaction[0]}_{interaction[1]}_{interaction[2]}'
            df[interaction_name] = df[interaction[0]].astype(str) + '_' + df[interaction[1]].astype(str) + '_' + df[interaction[2]].astype(str)
            df_encoded = pd.get_dummies(df[interaction_name], prefix=interaction_name)
            df_threeway = pd.concat([df_threeway, df_encoded], axis=1) # three-way feature encoding for every sample

        # Total configuration -> four-way interactions; treat all possible combinations of background and symptom values as a feature, one-hot encode this feature for every sample
        total_configurations = list(itertools.product(df['season'].unique(), df['dysp'].unique(), df['cough'].unique(), df['nasal'].unique()))

        for configuration in total_configurations:
            config_name = f'season={configuration[0]}_dysp={configuration[1]}_cough={configuration[2]}_nasal={configuration[3]}'
            df[config_name] = (df['season'] == configuration[0]) & (df['dysp'] == configuration[1]) & (df['cough'] == configuration[2]) & (df['nasal'] == configuration[3])

        # One-hot encode the total configuration
        total_config_encoded = pd.get_dummies(df.iloc[:, -len(total_configurations):].idxmax(axis=1), prefix='total_config')

        # Concatenate all one-hot encodings for interaction features to the DataFrame
        df_encoded = pd.concat([df_single, df_pairwise, df_threeway, total_config_encoded], axis=1)

        return df_encoded

    def preprocess(self):
        """
        turn dataframe of length n into a dictionary of tensors 
        use class_map to map variable classes (e.g. "no"/"yes") to float (e.g. 0.0/1.0). use np.nan for unobserved symptom values
        dict contains {"season": tensor(dim=n), "pneu": tensor(dim=n), "inf": tensor(dim=n), "dysp": tensor(dim=n), "cough":tensor(dim=n), "nasal":tensor(dim=n),
                       "sympt_features":tensor(dim=(n,m)), "text":tensor(dim=(n,768))} 
        with m = #features in symptom encoding (output of generate_one_hot)
        """
        
        preprocessed_data = {}

        for var in self.class_map: 
            values = self.dataframe[var].apply(lambda x: self.class_map[var][x] if not pd.isna(x) else np.nan).values
            preprocessed_data[var] = torch.tensor(values, dtype=torch.float, device=self.device)

        preprocessed_data["text"] = torch.tensor(self.dataframe[self.emb_type].tolist(), dtype=torch.float, device=self.device)
        preprocessed_data["index"] = torch.tensor(self.dataframe.index.tolist(), device=self.device)

        df = self.dataframe.replace(self.class_map)
        df_encoded = self.generate_one_hot(df[self.feature_names])
        preprocessed_data["sympt_features"] = torch.tensor(df_encoded.to_numpy(), dtype=torch.float, device=self.device)

        if self.keeplen != -1: 
            preprocessed_data = {key: val[:self.keeplen] for key, val in preprocessed_data.items()}

        return preprocessed_data
    
    def len_sympt_obs(self): 
        """
        get the number of samples in the dataset where the symptoms are observed (not nan)
        """
        
        df_sympt_obs = self.dataframe.dropna(axis=0, how="any") # drop all records where symptoms not observed
        return len(df_sympt_obs)

    def __getitem__(self, index):
        """
        return sample {"season": tensor(dim=1), "pneu": tensor(dim=1), "inf": tensor(dim=1), "dysp": tensor(dim=1), "cough":tensor(dim=1), "nasal":tensor(dim=1),
                       "sympt_features":tensor(dim=m), "text":tensor(dim=768)} 
        with m = #features in symptom encoding (output of generate_one_hot)
        """

        dp = {var: self.preprocessed_data[var][index] for var in self.preprocessed_data.keys()}
        return dp

Below, we encode the train set with feature interactions turned on. Note that the "symp_features" input vector has 101 dimensions.  

In [6]:
emb_type = "BioLORD emb"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # put train and test data on the device
feature_names = ["season", "dysp", "cough", "nasal"]
interactions=True
keeplen=-1
train_set = TextSymptomDataset(train_df, keeplen, class_map, emb_type, feature_names, interactions, device)

  preprocessed_data["text"] = torch.tensor(self.dataframe[self.emb_type].tolist(), dtype=torch.float, device=self.device)


In [19]:
train_set.__getitem__(0)

{'season': tensor(1.),
 'pneu': tensor(0.),
 'inf': tensor(1.),
 'dysp': tensor(0.),
 'cough': tensor(1.),
 'nasal': tensor(1.),
 'text': tensor([ 3.5453e-01,  3.6790e-01, -1.1390e-02,  2.3277e-01, -7.3408e-02,
          3.7717e-01,  5.5223e-02,  3.7554e-02, -3.3992e-01, -1.9111e-01,
         -1.1521e-01,  7.4201e-02,  1.6542e-01,  3.5197e-01, -1.3381e-02,
         -3.1276e-01,  2.1234e-02, -2.5756e-01, -1.4279e-01, -1.1621e-01,
         -9.7627e-02, -5.7466e-02, -2.5728e-01, -9.0918e-02,  1.2057e-02,
         -1.3028e-01,  1.0703e-01,  1.3443e-02, -1.0396e-01,  6.8164e-02,
          1.4590e-01,  2.4242e-01,  2.0206e-01, -4.7323e-01, -2.2851e-01,
         -4.0334e-02, -1.6823e-01,  8.0301e-03, -2.6753e-01, -6.9912e-02,
         -3.0163e-03, -1.0121e-01, -2.9026e-02,  7.2016e-02,  5.0150e-02,
          7.1253e-02,  1.2259e-01,  1.0259e-01, -9.9757e-02,  3.0487e-02,
         -1.9827e-01, -2.2960e-01, -3.2041e-01, -8.0531e-02,  2.2730e-01,
         -1.0927e-01,  1.7954e-01, -1.7577e-01,  

In [21]:
len(train_set.__getitem__(0)["sympt_features"])

101

We need to ensure that the test set is encoded in the same interaction features. However, the symptoms are never unobserved in the test set, so if we would input it into the "TextSymptomDataset" class, we would be missing some combinations, and the feature representations between train and test set would not match up. For this reason, we add all possible combinations of the background and symptom variables to the test set, so these are also used to build up the interaction features, and then use the keeplen argument to ensure that only real test samples are retained in the final dataset. 

In [22]:
empty_text_emb = train_df[train_df["text"] == ""].iloc[0][emb_type] # empty text embedding for evaluation purposes
unk_entries = pd.DataFrame({"season": ["warm", "cold"], "dysp": ["unk", "unk"], "cough": ["unk", "unk"], "nasal": ["unk", "unk"], "BioLORD emb": 2*[empty_text_emb]})
all_combos = pd.DataFrame({"season": ["warm", "warm", "warm", "warm", "warm", "warm", "warm", "warm", "cold", "cold", "cold", "cold", "cold", "cold", "cold", "cold"], 
                           "dysp": ["yes", "yes", "yes", "yes", "no", "no", "no", "no", "yes", "yes", "yes", "yes", "no", "no", "no", "no"],
                           "cough": ["yes", "yes", "no", "no", "yes", "yes", "no", "no", "yes", "yes", "no", "no", "yes", "yes", "no", "no"],
                           "nasal": ["yes", "no", "yes", "no", "yes", "no", "yes", "no", "yes", "no", "yes", "no", "yes", "no", "yes", "no"], "BioLORD emb": 16*[empty_text_emb]})
test_df_ext = pd.concat([test_df, unk_entries, all_combos], axis=0) # ensure correct encoding (test set does not normally contain nans) -> are removed again in TextSymptomDataset
test_set = TextSymptomDataset(test_df_ext, 1000, class_map, emb_type, ["season", "dysp", "cough", "nasal"], interactions=interactions, device=device)

In [23]:
len(test_set.__getitem__(0)["sympt_features"])

101

In [7]:
from torch.utils.data import DataLoader
from torch.optim import Adam
import torch
from utils.models import TextEmbClassifier

def train_diag_classifier(train, test, diag_name, n_emb, hidden_dim, dropout, bs_train=100, epochs=100, seed=2023, lr=0.0001, weight_decay=1e-5):
    """
    training loop for diagnosis classifier
    
    train: set of training samples
    test: set of test samples
    diag_name: name of output variable (pneu or inf) to learn to classify
    n_emb: embedding size of text 
    hidden_dim: list of hidden dimensions to use in TextEmbClassifier
    dropout: list of dropout probabilities to use in TextEmbClassifier
    
    bs_train: batch size to use for training
    epochs: number of epochs to train for
    seed: seed to use for initialization
    lr: learning rate 
    weight_decay: L2 penalty parameter

    returns
        train_loss: list of train losses across epochs
        test_loss: list of test losses across epochs
        model: trained classifier for diag_name
    """
    
    torch.manual_seed(seed)
    
    train_loader = DataLoader(train, batch_size=bs_train, shuffle=True)
    test_loader = DataLoader(test, batch_size=len(test), shuffle=False)

    # put model on the device
    model = TextEmbClassifier(n_emb, hidden_dim, dropout, seed)
    model.to(device)

    adam = Adam(params=model.parameters(), lr=lr, weight_decay=weight_decay)
    loss = torch.nn.BCELoss(reduction="none")

    train_loss = []
    test_loss = []

    for epoch in range(epochs):

        epoch_loss = 0

        for i, x in enumerate(train_loader): 

            model.train() # put model in train mode
            adam.zero_grad()

            input = torch.cat((x["text"], x["sympt_features"]), dim=1) # concatenate text and symptom features at input

            pred = model(input).squeeze() # predictions of model
            batch_loss = loss(pred, x[diag_name]).sum()
            batch_loss.backward()

            epoch_loss += batch_loss.item()
            
            torch.nn.utils.clip_grad_value_(model.parameters(), 5)
            adam.step()
        
        train_loss.append(epoch_loss/len(train))

        model.eval() # put model in eval mode
        with torch.no_grad():
            for x_test in test_loader: 

                input = torch.cat((x_test["text"], x_test["sympt_features"]), dim=1)

                pred = model(input).squeeze()
                batch_loss = loss(pred, x_test[diag_name]).sum()
                test_loss.append(batch_loss.item()/len(test))

    return train_loss, test_loss, model

In [8]:
import pandas as pd

def predict_diagnoses(model, diag, test_set, unk_features, bs, excl_text=False, excl_sympt=False, empty_text_emb=None): 

    """ 
    build prediction dataframe for diagnosis (pneu/inf) based on predictions made by trained model for set of test samples

    model: trained diagnosis classifier
    diag: diagnosis to predict 
    test_set: dataframe with test cases
    unk_features: feature representation of sample with unknown symptoms, to use at input when excl_sympt=True
                  dimension (2, 101) (contains representation for both season=0 and season=1)
    bs: batch size to use when looping over test cases
    excl_text: if True, use empty text embedding in input vector, calculate P(diag=yes|background,symptoms)
    excl_sympt: if True, use unknown symptom representation in input vector, calculate P(diag=yes|background,text)
                if excl_text and excl_sympt are both False, we calculate P(diag=yes|background,symptoms,text)
    empty_text_emb: embedding of empty text "" to use at input when excl_text=True

    returns: test_set dataframe, extended with prediction for diagnosis (pred_pneu/pred_inf)
    """

    test_loader = DataLoader(test_set, batch_size=bs, shuffle=True)
    res_df = pd.DataFrame(columns=list(test_set.__getitem__(0).keys()).remove("sympt_features"))
    res_df = res_df.rename({"text": "emb"}, axis=1)

    model.eval() # set model in eval mode

    for x in test_loader: 

        with torch.no_grad():
            if excl_text: # P(Diag=yes|background,symptoms)
                x["text"] = empty_text_emb.unsqueeze(0).expand(x["text"].shape[0], -1) # set all embeddings to empty 
            if excl_sympt:  # P(Diag=yes|background,text)
                            # set to encoding corresponding to unknown features, while background remains known
                if not test_set.interactions: 
                    unk_features = unk_features[:, :11] # if no interactions, only select individual features
                unk_exp = unk_features[None, :].expand(x["season"].shape[0], -1, -1) # shape (bs, 2, 101) or (bs, 2, 11)
                x_exp = x["season"][:, None].expand(-1, unk_features.shape[-1])[:, None, :] # shape (bs, 1, 101) or (bs, 1, 11)
                x["sympt_features"] = torch.gather(unk_exp, 1, x_exp.long()).squeeze(1) # select feature representation of unknown features based on value of background variable
                                                                                        # shape (bs, 101) or (bs, 11)

            input = torch.cat((x["text"], x["sympt_features"]), dim=1)
            res_diag = model(input) # P(Diag=yes|background,symptoms,text)

        batch_df = pd.DataFrame({key:val.cpu().numpy() for key, val in x.items() if key != "text" and key != "sympt_features"})
        batch_df[f"pred_{diag}"] = res_diag.cpu().numpy()
        batch_df["emb"] = list(x["text"].cpu())
        res_df = pd.concat([res_df, batch_df], ignore_index=True)

    return res_df

We execute the train loop and calculate the average precision of the following predictions on the test set:
- P(Diag=yes|background,symptoms,text)
- P(Diag=yes|background,symptoms) (input empty text embedding)
- P(Diag=yes|background,text) (input feature representation of unknown symptoms)

All predictions are calculated via the function above. 

We optimize the following parameters by maximizing average precision over P(Diag=yes|background,symptoms,text) on a validation split:
- dropout
- bs_train
- epochs
- lr
- weight_decay
- hidden_dim (separately for "pneu" and "inf")
- interactions (separately for "pneu" and "inf")

We used the following validation split to optimize these parameters: 

In [None]:
from sklearn.model_selection import train_test_split
seed = 2024
subtrain_df, val_df = train_test_split(train_df, train_size=0.8, random_state=seed) # use val_df in code below to optimize hyperparameters

The two pieces of code show the final hyperparameters chosen for the "pneu" and "inf" classifier respectively. We train the models on the full train set and evaluate on the test set. The final results reported in the paper were obtained by running this code over 5 initialization seeds [422, 957, 267, 956, 781].

In [14]:
from utils.evaluation import performance_metrics

seeds = [422, 957, 267, 956, 781]

# hyperparameter settings
interactions = True
diag_name = "inf"
if interactions:
    n_emb = 768+101
else: 
    n_emb = 768+11
hidden_dim = [1]
dropout = 0.7
bs_train = 256
epochs = 200
lr = 0.001
weight_decay = 1e-3

# extend test set with all possible symptom combinations
empty_text_emb = train_df[train_df["text"] == ""].iloc[0][emb_type] # empty text embedding for evaluation purposes
unk_entries = pd.DataFrame({"season": ["warm", "cold"], "dysp": ["unk", "unk"], "cough": ["unk", "unk"], "nasal": ["unk", "unk"], "BioLORD emb": 2*[empty_text_emb]})
all_combos = pd.DataFrame({"season": ["warm", "warm", "warm", "warm", "warm", "warm", "warm", "warm", "cold", "cold", "cold", "cold", "cold", "cold", "cold", "cold"], 
                           "dysp": ["yes", "yes", "yes", "yes", "no", "no", "no", "no", "yes", "yes", "yes", "yes", "no", "no", "no", "no"],
                           "cough": ["yes", "yes", "no", "no", "yes", "yes", "no", "no", "yes", "yes", "no", "no", "yes", "yes", "no", "no"],
                           "nasal": ["yes", "no", "yes", "no", "yes", "no", "yes", "no", "yes", "no", "yes", "no", "yes", "no", "yes", "no"], "BioLORD emb": 16*[empty_text_emb]})
test_df_ext = pd.concat([test_df, unk_entries, all_combos], axis=0) # ensure correct encoding (test set does not normally contain nans) -> are removed again in TextSymptomDataset

# create feature representations for train and test set
emb_type = "BioLORD emb"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # put train and test data on the device
train_set = TextSymptomDataset(train_df, -1, class_map, emb_type, ["season", "dysp", "cough", "nasal"], interactions=interactions, device=device)
test_set = TextSymptomDataset(test_df_ext, 1000, class_map, emb_type, ["season", "dysp", "cough", "nasal"], interactions=interactions, device=device)

results = {}

for seed in seeds: 
    results[seed] = {}

    # train diagnosis classifier
    train_loss, test_loss, model = train_diag_classifier(train_set, test_set, diag_name, n_emb, hidden_dim, dropout, bs_train=bs_train, epochs=epochs, seed=seed, lr=lr, weight_decay=weight_decay)

    # predict P(diag|evidence) for test set, with various selections of evidence
    unk_features = torch.tensor([[1., 0., 0., 0., 1., 0., 0., 1., 0., 0., 1., 0., 0., 1., 0., 0., 0., 0.,
            0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
            0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
            0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
            0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
            0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.], # encoding of unk features when season = warm
            [0., 1., 0., 0., 1., 0., 0., 1., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0.,
            0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0.,
            0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
            0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
            0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
            0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]]) # encoding of unk features when season = cold

    # P(inf=yes|background,symptoms,text)
    pred_df = predict_diagnoses(model, diag_name, test_set, unk_features, 100)
    _, ap = performance_metrics(pred_df, diag_name, model_type="ff", plot=False) 
    results[seed][f"P({diag_name}|season,symptoms,text) test PR"] = ap

    # P(inf=yes|background,symptoms)
    empty_text_emb = train_df[train_df["text"] == ""].iloc[0][emb_type] # empty text embedding for evaluation purposes
    pred_df = predict_diagnoses(model, diag_name, test_set, unk_features, 100, excl_text=True, empty_text_emb=torch.tensor(empty_text_emb))
    _, ap = performance_metrics(pred_df, diag_name, model_type="ff", plot=False) 
    results[seed][f"P({diag_name}|season,symptoms) test PR"] = ap

    # P(inf=yes|background,text)
    pred_df = predict_diagnoses(model, diag_name, test_set, unk_features, 100, excl_sympt=True)
    _, ap = performance_metrics(pred_df, diag_name, model_type="ff", plot=False) # calculate average precision for these predictions, comparing with ground truth diagnosis values
    results[seed][f"P({diag_name}|season,text) test PR"] = ap

In [15]:
results

{422: {'P(inf|season,symptoms,text) test PR': 0.9064023507732217,
  'P(inf|season,symptoms) test PR': 0.8811287849271705,
  'P(inf|season,text) test PR': 0.884546608253735},
 957: {'P(inf|season,symptoms,text) test PR': 0.9059643432904488,
  'P(inf|season,symptoms) test PR': 0.8808977461927195,
  'P(inf|season,text) test PR': 0.8822361432254402},
 267: {'P(inf|season,symptoms,text) test PR': 0.9017053513420118,
  'P(inf|season,symptoms) test PR': 0.881056510415434,
  'P(inf|season,text) test PR': 0.8803060505952348},
 956: {'P(inf|season,symptoms,text) test PR': 0.9046857932215641,
  'P(inf|season,symptoms) test PR': 0.881056510415434,
  'P(inf|season,text) test PR': 0.8816843166468447},
 781: {'P(inf|season,symptoms,text) test PR': 0.9026494900326892,
  'P(inf|season,symptoms) test PR': 0.8811140819639608,
  'P(inf|season,text) test PR': 0.8816974152271206}}

In [18]:
from utils.evaluation import performance_metrics

seeds = [422, 957, 267, 956, 781]

# hyperparameter settings
interactions = False
diag_name = "pneu"
if interactions:
    n_emb = 768+101
else: 
    n_emb = 768+11
hidden_dim = [256, 1]
dropout = 0.7
bs_train = 256
epochs = 200
lr = 0.001
weight_decay = 1e-3

# extend test set with all possible symptom combinations
empty_text_emb = train_df[train_df["text"] == ""].iloc[0][emb_type] # empty text embedding for evaluation purposes
unk_entries = pd.DataFrame({"season": ["warm", "cold"], "dysp": ["unk", "unk"], "cough": ["unk", "unk"], "nasal": ["unk", "unk"], "BioLORD emb": 2*[empty_text_emb]})
all_combos = pd.DataFrame({"season": ["warm", "warm", "warm", "warm", "warm", "warm", "warm", "warm", "cold", "cold", "cold", "cold", "cold", "cold", "cold", "cold"], 
                           "dysp": ["yes", "yes", "yes", "yes", "no", "no", "no", "no", "yes", "yes", "yes", "yes", "no", "no", "no", "no"],
                           "cough": ["yes", "yes", "no", "no", "yes", "yes", "no", "no", "yes", "yes", "no", "no", "yes", "yes", "no", "no"],
                           "nasal": ["yes", "no", "yes", "no", "yes", "no", "yes", "no", "yes", "no", "yes", "no", "yes", "no", "yes", "no"], "BioLORD emb": 16*[empty_text_emb]})
test_df_ext = pd.concat([test_df, unk_entries, all_combos], axis=0) # ensure correct encoding (test set does not normally contain nans) -> are removed again in TextSymptomDataset

# create feature representations for train and test set
emb_type = "BioLORD emb"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # put train and test data on the device
train_set = TextSymptomDataset(train_df, -1, class_map, emb_type, ["season", "dysp", "cough", "nasal"], interactions=interactions, device=device)
test_set = TextSymptomDataset(test_df_ext, 1000, class_map, emb_type, ["season", "dysp", "cough", "nasal"], interactions=interactions, device=device)

results = {}

for seed in seeds: 
    results[seed] = {}

    # train diagnosis classifier
    train_loss, test_loss, model = train_diag_classifier(train_set, test_set, diag_name, n_emb, hidden_dim, dropout, bs_train=bs_train, epochs=epochs, seed=seed, lr=lr, weight_decay=weight_decay)

    # predict P(diag|evidence) for test set, with various selections of evidence
    unk_features = torch.tensor([[1., 0., 0., 0., 1., 0., 0., 1., 0., 0., 1., 0., 0., 1., 0., 0., 0., 0.,
            0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
            0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
            0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
            0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
            0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.], # encoding of unk features when season = warm
            [0., 1., 0., 0., 1., 0., 0., 1., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0.,
            0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0.,
            0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
            0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
            0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
            0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]]) # encoding of unk features when season = cold

    # P(pneu=yes|background,symptoms,text)
    pred_df = predict_diagnoses(model, diag_name, test_set, unk_features, 100)
    _, ap = performance_metrics(pred_df, diag_name, model_type="ff", plot=False) 
    results[seed][f"P({diag_name}|season,symptoms,text) test PR"] = ap

    # P(pneu=yes|background,symptoms)
    empty_text_emb = train_df[train_df["text"] == ""].iloc[0][emb_type] # empty text embedding for evaluation purposes
    pred_df = predict_diagnoses(model, diag_name, test_set, unk_features, 100, excl_text=True, empty_text_emb=torch.tensor(empty_text_emb))
    _, ap = performance_metrics(pred_df, diag_name, model_type="ff", plot=False) 
    results[seed][f"P({diag_name}|season,symptoms) test PR"] = ap

    # P(pneu=yes|background,text)
    pred_df = predict_diagnoses(model, diag_name, test_set, unk_features, 100, excl_sympt=True)
    _, ap = performance_metrics(pred_df, diag_name, model_type="ff", plot=False) # calculate average precision for these predictions, comparing with ground truth diagnosis values
    results[seed][f"P({diag_name}|season,text) test PR"] = ap

In [19]:
results

{422: {'P(pneu|season,symptoms,text) test PR': 0.6594485107196193,
  'P(pneu|season,symptoms) test PR': 0.10604823419110285,
  'P(pneu|season,text) test PR': 0.6361288417163994},
 957: {'P(pneu|season,symptoms,text) test PR': 0.642421513640098,
  'P(pneu|season,symptoms) test PR': 0.10849483975931845,
  'P(pneu|season,text) test PR': 0.6124411621296232},
 267: {'P(pneu|season,symptoms,text) test PR': 0.6741234250542154,
  'P(pneu|season,symptoms) test PR': 0.10729524870502026,
  'P(pneu|season,text) test PR': 0.6420120238327062},
 956: {'P(pneu|season,symptoms,text) test PR': 0.6639545456470064,
  'P(pneu|season,symptoms) test PR': 0.10631460627670511,
  'P(pneu|season,text) test PR': 0.6137191741098247},
 781: {'P(pneu|season,symptoms,text) test PR': 0.646584904387248,
  'P(pneu|season,symptoms) test PR': 0.10823646873918542,
  'P(pneu|season,text) test PR': 0.6063633541805298}}