In [None]:
import _base_path
import pickle
import json
import spacy
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from spacy import displacy
from resources.data_io import load_mappings

In [None]:
DATA      = 'incidents'
MODEL     = 'tfidf-lr'
LABEL     = 'hazard-category'
TASK      = LABEL.split("-")[0]
CV_SPLITS = [0, 1, 2, 3, 4]

# Load Class-Mappings:

In [None]:
class_map = load_mappings(f"../data/{DATA}/splits/", LABEL)
class_map

In [None]:
with open(f'../data/{DATA}/support_zones.json', 'r') as file:
    high_support, low_support = json.load(file)[LABEL]

In [None]:
high_support

In [None]:
low_support

In [None]:
counts = pd.read_csv(f'../data/{DATA}/{DATA}_final.csv')[LABEL].value_counts()

class_map = list(zip(
    class_map,
    range(len(class_map)),
    [counts[c] if c in counts else 0 for c in class_map]
))
class_map.sort(key=lambda row:row[1])
class_map

# Load Results:

In [None]:
correct   = []
incorrect = []

for i in CV_SPLITS:
    try:
        with open(f'../data/{DATA}/splits/split_{TASK}_{i:d}.pickle', 'rb') as f:
            texts = pickle.load(f)['test'][[LABEL, TASK+'-title', 'title']]

        with open(f'{MODEL}/{MODEL}-{LABEL}-{i:d}.pickle', 'rb') as f:
            predictions = pickle.load(f)

        assert all(texts[LABEL] == predictions['labels'])
        texts.rename(columns={LABEL:'y_true', TASK+'-title':'spans'}, inplace=True)
        texts['y_pred'] = predictions['predictions']

        correct.append(texts[predictions['labels'] == predictions['predictions']])
        incorrect.append(texts[predictions['labels'] != predictions['predictions']])

    except FileNotFoundError: continue

correct = pd.concat(correct)
incorrect = pd.concat(incorrect)

# Display samples:

In [None]:
nlp = spacy.load("en_core_web_sm")

## Correctly classified:

In [None]:
i = np.random.choice(correct.index)
txt = nlp(correct.title[i])
txt.spans["sc"] = []

sup = correct.spans[i]
ent = LABEL[:3].upper()
print('\n', ent, '\n y_true ->', class_map[correct.y_true[i]], '\n y_pred ->', class_map[correct.y_pred[i]])

spans = [txt.char_span(l.start,l.stop,ent) for l in sup]
for span in [correct['title'][i][l] for l in sup]:
    print('[...]', span, '[...]')

#txt.set_ents(txt.ents + tuple([span for span in spans if span is not None]))
txt.spans["sc"] = tuple(txt.spans["sc"]) + tuple([span for span in spans if span is not None])

#displacy.render(txt, style='ent')
displacy.render(txt, style='span')

## Incorrectly classified:

In [None]:
i = np.random.choice(incorrect.index)
txt = nlp(incorrect.title[i])
txt.spans["sc"] = []

sup = incorrect.spans[i]
ent = LABEL[:3].upper()
print('\n', ent, '\n y_true ->', class_map[incorrect.y_true[i]], '\n y_pred ->', class_map[incorrect.y_pred[i]])

spans = [txt.char_span(l.start,l.stop,ent) for l in sup]
for span in [incorrect['title'][i][l] for l in sup]:
    print('[...]', span, '[...]')

#txt.set_ents(txt.ents + tuple([span for span in spans if span is not None]))
txt.spans["sc"] = tuple(txt.spans["sc"]) + tuple([span for span in spans if span is not None])

#displacy.render(txt, style='ent')
displacy.render(txt, style='span')