# Labeling Questions and Answers using deberta-v3-large-zeroshot-v2.0

Model: https://huggingface.co/MoritzLaurer/deberta-v3-large-zeroshot-v2.0

This notebook is written to be run using Kaggle with a GPU TP4 accelerator. Before running, upload a dataset called "mts_dialogue_qa_dataset" containing the files "mts_dialog_questions.csv" and "mts_dialog_answers.csv", created using the "make_mts_dialog_dataset.ipynb" notebook. And this dataset to the Kaggle notebooks input. 

In [None]:
from transformers import pipeline

model_name = "MoritzLaurer/deberta-v3-large-zeroshot-v2.0"
zs_text_classifier = pipeline(model=model_name)

## Label doctor questions with emote code

In [None]:
import pandas as pd

df = pd.read_csv("/kaggle/input/mts_dialogue_qa_dataset/mts_dialog_questions.csv")
df

In [None]:
candidate_labels = [
    "Affirmative",
    "Empathetic",
    "Apologetic",
    "Neutral",
]

hypothesis_template = "The emotion of this text is {}"

def label_emotes(start_idx, stop_idx):
    sub_df = df.loc[start_idx:stop_idx]
    for idx, row in sub_df.iterrows():
        try:
            classified = zs_text_classifier(
                            row["doctor_q"], 
                            candidate_labels,
                            hypothesis_template=hypothesis_template,
                            multi_label=True
            )
        except: 
            print("ignore")

        label = "Neutral"
        if classified["labels"][0] == "Neutral" and classified["scores"][1] > .85:
            label = classified["labels"][1]
        elif classified["scores"][0] > .5:
            label = classified["labels"][0]
    
        df.loc[idx, "emote"] = label

        # print(row["doctor_q"], label)
    

In [None]:
start_idx = 0
stop_idx = 5808

label_emotes(start_idx, stop_idx)

df.to_csv("mts_dialog_questions_w_emotes.csv", index=False)
df

## Label patient responses as affirmative

In [None]:
import pandas as pd

df = pd.read_csv("/kaggle/input/mts-dialog-qa-dataset/mts_dialog_answers.csv")
df

In [None]:
candidate_labels = [
    "True",
    "False",
    "Unidentifiable",
]

hypothesis_template = "As an answer to a question, it is {} that this response is affirmative"

def label_affirmative(start_idx, stop_idx):
    sub_df = df.loc[start_idx:stop_idx]
    for idx, row in sub_df.iterrows():
        try:
            classified = zs_text_classifier(
                            row["patient_a"], 
                            candidate_labels,
                            hypothesis_template=hypothesis_template,
                            multi_label=True
            )
        except: 
            print("ignore")

        label = "Unknown"
        if classified["scores"][0] > .5:
            label = classified["labels"][0]
            
    
        df.loc[idx, "affirmative"] = label

        # print(row["patient_a"], label)
    

In [None]:
start_idx = 0
stop_idx = 5478

label_affirmative(start_idx, stop_idx)

df.to_csv("mts_dialog_answers_with_yn.csv", index=False)
df