In [None]:
from transformers import pipeline
import numpy as np
import pandas as pd
from sklearn import metrics
import torch
from datasets import Dataset
from transformers.pipelines.pt_utils import KeyDataset
from tqdm.auto import tqdm

In [None]:
# https://huggingface.co/docs/transformers/v4.26.1/en/main_classes/pipelines#transformers.ZeroShotClassificationPipeline
pipe = pipeline(
    model="facebook/bart-large-mnli",
    framework="pt",
    device=0,
)

In [None]:
assert pipe.device.type == "cuda"

In [None]:
pipe("I have a problem with my iphone that needs to be resolved asap!",
     candidate_labels=["urgent", "not urgent", "phone", "tablet", "computer"],
     multi_label=True,
)

In [None]:
chexbert_results = pd.read_csv('/root/Project-CS224N-ED-Disposition/CheXbert-Labeler/labeled_reports_output.csv')
chexbert_results.head()

In [None]:
chexbert_results["Report Impression"].head()

In [None]:
# all_labels = l2v_scores.columns.tolist()[4:]
# print(len(all_labels))
labels = ["Fracture", "Edema", "Cardiomegaly", "Pneumonia", "Atelectasis", "Pneumothorax", "Pleural Effusion"]

In [None]:
chexbert_results["Report Impression"].head().tolist()

In [None]:
pred5 = pipe(
    sequences=chexbert_results["Report Impression"].head().tolist(),
    candidate_labels=labels,
    multi_label=True,
)
len(pred5)

In [None]:
dataset = Dataset.from_pandas(chexbert_results[["Report Impression"]])

In [None]:
pred_all = pipe(
    sequences=KeyDataset(dataset, "Report Impression"),
    candidate_labels=labels,
    multi_label=True,
    batch_size=1,
)
len(pred_all)

In [None]:
results = []
for out in tqdm(pred_all):
    results.append(out)
#     print(out)
#     break

In [None]:
import pickle
dict_a = {'A':0, 'B':1, 'C':2}
pickle.dump(results, open('hug_results.pkl', 'wb'))

In [None]:
print(1 + 2)

In [None]:
results[0]

In [None]:
"""
compare against chexbert, assuming chexbert is truth
"""
threshold=0.75
def to_pandas(results_list):
    labels = ["Fracture", "Edema", "Cardiomegaly", "Pneumonia", "Atelectasis", "Pneumothorax", "Pleural Effusion"]
#     labels = ['urgent', 'phone', 'computer', 'not urgent', 'tablet']
    schema = {col_name: pd.Series(dtype="float64") for col_name in labels}
    schema["Impression"] = pd.Series(dtype="object")
    rdf = pd.DataFrame(schema)
    for result in results:
        row = {label: score for label, score in zip(result["labels"], result["scores"])}
        row["Impression"] = result["sequence"]
        rdf = rdf.append(row, ignore_index=True)
    #     rdf = pd.concat([rdf, pd.DataFrame(row)])
    return rdf

results_df = to_pandas(results)

In [None]:
def naive_bayes(y_pred, y_true, col_name):
    X = y_pred[[col_name]]
    y_true = y_true[col_name]
    y = y_true.mask(y_true != 1, 0)

    nb = GaussianNB()
    nb.fit(X, y)
    return nb

def evaluate(pred_df, true_df, col_name):
    """
    compare results against chexbert ground truth
    """
    nb = naive_bayes(pred_df, true_df, col_name)
    
    y_pred = nb.predict(X)
    print(label)
    print(metrics.classification_report(y, y_pred))
    fpr, tpr, thresholds = metrics.roc_curve(y, y_pred, pos_label=1)
    auc = metrics.auc(fpr, tpr)
    print("AUC", auc)
    return nb
    
def compare_hist(y_pred, y_true, scale="", title=""):
    assert y_pred.shape == y_true.shape
    df = pd.DataFrame({ 'y_pred': y_pred, 'y_true': y_true })
    
    true_scores = df[df["y_true"] == 1]["y_pred"]
    false_scores = df[df["y_true"] != 1]["y_pred"]
#     print(true)
    bins = np.linspace(-1, 1, 100)

    plt.hist(true_scores, bins, alpha=0.5, label='true_scores')
    plt.hist(false_scores, bins, alpha=0.5, label='false_scores')
    plt.legend(loc='upper right')
    if scale == 'log':
        plt.yscale('log')
    plt.title(title)
    plt.show()

In [None]:
compare_hist(results_df["Pneumonia"], chex_df["Pneumonia"], title="Pneumonia")

In [None]:
for label in labels:
    evaluate(results_df, chex_df, label)