kernel `conda_pytorch_latest_p36`

In [1]:
# !pip install icecream
# !pip install tqdm
# !pip install torchmetrics
# !pip install pytorch_lightning
# !pip install transformers

In [2]:
import os
import sys
import logging
import argparse
from pathlib import Path
from ast import literal_eval
from collections import Counter
from typing import Any, Dict, Optional

from icecream import ic
from tqdm.auto import tqdm

import torchmetrics
from torchmetrics.functional import accuracy, f1, auroc

import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.core.decorators import auto_move_data
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping

import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler

import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, multilabel_confusion_matrix


import matplotlib.pyplot as plt
from pylab import rcParams
from matplotlib import rc

import transformers
from transformers import (
    AdamW,
    AutoConfig,
    AutoModel,
    AutoModelForSequenceClassification,
    T5ForConditionalGeneration,
    T5Tokenizer,
    AutoTokenizer,
)
from transformers.optimization import (
    Adafactor,
    get_linear_schedule_with_warmup,
)

In [3]:
%matplotlib inline
%config InlineBackend.figure_format='retina'

In [4]:
sns.set(style='whitegrid', palette='muted', font_scale=1.2)
HAPPY_COLORS_PALETTE = ["#01BEFE", "#FFDD00", "#FF7D00", "#FF006D", "#ADFF02", "#8F00FF"]
sns.set_palette(sns.color_palette(HAPPY_COLORS_PALETTE))
rcParams['figure.figsize'] = 12, 8

In [5]:
RANDOM_SEED=2021
pl.seed_everything(RANDOM_SEED)

Global seed set to 2021


2021

In [6]:
ic.configureOutput(outputFunction=sys.stdout.write, includeContext=True)

In [7]:
logger = logging.getLogger(__name__)

In [8]:
MAX_LEN = 200
TRAIN_BATCH_SIZE = 32
VALID_BATCH_SIZE = 64
EPOCHS = 3
LEARNING_RATE = 1e-05
MODEL_NAME = "sentence-transformers/distilbert-multilingual-nli-stsb-quora-ranking"
DATA_ROOT_DIR = "/home/ec2-user/SageMaker/deep-experiments/data/frameworks_data/data_v0.4.3"
TRAIN_PATH = os.path.join(DATA_ROOT_DIR, "data_v0.4.3_train.csv")
VAL_PATH = os.path.join(DATA_ROOT_DIR, "data_v0.4.3_val.csv")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

In [9]:
train_dataset = pd.read_csv(TRAIN_PATH)
val_dataset = pd.read_csv(VAL_PATH)
##
train_dataset["sectors"] = train_dataset["sectors"].apply(literal_eval)
train_dataset["pillars"] = train_dataset["pillars"].apply(literal_eval)
val_dataset["sectors"] = val_dataset["sectors"].apply(literal_eval)
val_dataset["pillars"] = val_dataset["pillars"].apply(literal_eval)
##
train_dataset["tags_2d_mat"] = train_dataset["sectors"] + train_dataset["pillars"]
val_dataset["tags_2d_mat"] = val_dataset["sectors"] + val_dataset["pillars"]
##
tag_set = set()
for tags_i in train_dataset["tags_2d_mat"]:
    tag_set.update(tags_i)
tagname_to_tagid = {tag:i for i, tag in enumerate(list(sorted(tag_set)))}
tagname_to_tagid

{'Agriculture': 0,
 'Capacities & Response': 1,
 'Cross': 2,
 'Education': 3,
 'Food Security': 4,
 'Health': 5,
 'Humanitarian Conditions': 6,
 'Impact': 7,
 'Livelihoods': 8,
 'Logistics': 9,
 'Nutrition': 10,
 'People At Risk': 11,
 'Priority Interventions': 12,
 'Priority Needs': 13,
 'Protection': 14,
 'Shelter': 15,
 'WASH': 16}

In [10]:
class CustomDataset(Dataset):
    def __init__(self, dataframe, tagname_to_tagid, tokenizer, max_len):
        self.tokenizer = tokenizer
        self.data = dataframe
        self.excerpt_text = dataframe["excerpt"].tolist(
        ) if dataframe is not None else None
        self.targets = self.data["tags_2d_mat"].tolist(
        ) if dataframe is not None else None
        self.tagname_to_tagid = tagname_to_tagid
        self.tagid_to_tagname = list(tagname_to_tagid.keys())
        self.max_len = max_len

    def encode_example(self,
                       excerpt_text: str,
                       index=None,
                       as_batch: bool = False):
        
        inputs = self.tokenizer(excerpt_text,
                                            None,
                                            truncation=True,
                                            add_special_tokens=True,
                                            max_length=self.max_len,
                                            padding="max_length",
                                            return_token_type_ids=True)
        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        token_type_ids = inputs["token_type_ids"]
        targets = None
        if self.targets:
            target_indices = [
                self.tagname_to_tagid[target]
                for target in self.targets[index]
                if target in self.tagname_to_tagid
            ]
            targets = np.zeros(len(self.tagname_to_tagid), dtype=np.int)
            targets[target_indices] = 1

        encoded = {
            'ids':
            torch.tensor(ids, dtype=torch.long),
            'mask':
            torch.tensor(mask, dtype=torch.long),
            'token_type_ids':
            torch.tensor(token_type_ids, dtype=torch.long),
            'targets':
            torch.tensor(targets, dtype=torch.float32)
            if targets is not None else None
        }
        if as_batch:
            return {
                "ids": encoded["ids"].unsqueeze(0),
                "mask": encoded["mask"].unsqueeze(0),
                "token_type_ids": encoded["ids"].unsqueeze(0)
            }
        return encoded

    def __len__(self):
        return len(self.excerpt_text)

    def __getitem__(self, index):
        excerpt_text = str(self.excerpt_text[index])
        return self.encode_example(excerpt_text, index)

In [11]:
training_set = CustomDataset(train_dataset, tagname_to_tagid, tokenizer,
                              MAX_LEN)
val_set = CustomDataset(val_dataset, tagname_to_tagid, tokenizer,
                         MAX_LEN)

val_set_frac = CustomDataset(val_dataset.sample(frac=.01),
                              tagname_to_tagid, tokenizer, MAX_LEN)

In [12]:
train_params = {
    'batch_size': TRAIN_BATCH_SIZE,
    'shuffle': True,
    'num_workers': 4
}

val_params = {
    'batch_size': VALID_BATCH_SIZE,
    'shuffle': False,
    'num_workers': 4
}

training_loader = DataLoader(training_set, **train_params)
val_loader = DataLoader(val_set, **val_params)
val_loader_frac = DataLoader(val_set_frac, **val_params)

In [13]:
class Model(nn.Module):
    def __init__(self, model_name_or_path: str, num_labels:int):
        super().__init__()
        self.l1 = AutoModel.from_pretrained(model_name_or_path)
        self.l2 = torch.nn.Dropout(0.3)
        self.l3 = torch.nn.Linear(768, num_labels)
    def forward(self, inputs):
        output = self.l1(inputs["ids"],
                            attention_mask=inputs["mask"],)
        output = output.last_hidden_state
        output = self.l2(output)
        output = self.l3(output)
        return output[:, 0, :]

In [14]:
class Transformer(pl.LightningModule):
    def __init__(self,
                 model_name_or_path: str,
                 num_labels: int,
                 empty_dataset: CustomDataset,
                 pred_threshold: float = .5,
                 learning_rate: float = 2e-5,
                 adam_epsilon: float = 1e-8,
                 warmup_steps: int = 0,
                 weight_decay: float = 0.0,
                 train_batch_size: int = 32,
                 eval_batch_size: int = 32,
                 eval_splits: Optional[list] = None,
                 **kwargs):
        super().__init__()

        self.save_hyperparameters()

        self.model = Model(model_name_or_path, num_labels)
        self.empty_dataset = empty_dataset
        self.pred_threshold = pred_threshold

        self.f1_score_train = torchmetrics.F1(
            num_classes=2,
            threshold=0.5,
            average='macro',
            mdmc_average="samplewise",
            ignore_index=None,
            top_k=None,
            multiclass=True,
            compute_on_step=True,
            dist_sync_on_step=False,
            process_group=None,
            dist_sync_fn=None,
        )

        self.f1_score_val = torchmetrics.F1(
            num_classes=2,
            threshold=0.5,
            average='macro',
            mdmc_average="samplewise",
            ignore_index=None,
            top_k=None,
            multiclass=True,
            compute_on_step=True,
            dist_sync_on_step=False,
            process_group=None,
            dist_sync_fn=None,
        )
    @auto_move_data
    def forward(self, inputs):
        output = self.model(inputs)
        return output

    def training_step(self, batch, batch_idx):
        outputs = self(batch)
        loss = F.binary_cross_entropy_with_logits(outputs, batch["targets"])

        self.f1_score_train(torch.sigmoid(outputs),
                            batch["targets"].to(dtype=torch.long))
        self.log("train_f1", self.f1_score_train, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx, dataloader_idx=0):
        outputs = self(batch)
        val_loss = F.binary_cross_entropy_with_logits(outputs,
                                                      batch["targets"])

        self.f1_score_val(torch.sigmoid(outputs),
                          batch["targets"].to(dtype=torch.long))
        self.log("val_f1",
                 self.f1_score_val,
                 on_step=True,
                 on_epoch=True,
                 prog_bar=True,
                 logger=False)
        
        self.log("val_loss",
                 val_loss,
                 on_step=True,
                 on_epoch=True,
                 prog_bar=True,
                 logger=False)
        return {'val_loss': val_loss}

    def test_step(self, batch, batch_nb):
        logits = self(batch)
        preds = (torch.sigmoid(logits) > .5)
        return {"preds": preds, "targets_i": batch["targets"]}

    def on_test_epoch_end(self, outputs):
        preds = torch.cat([output["preds"] for output in outputs]).cpu()
        targets = torch.cat([output["targets_i"] for output in outputs]).cpu()
        recalls = []
        precisions = []
        f1_scores = []
        for i in range(targets.shape[1]):
            class_roc_auc = auroc(preds[:, i], targets[:, i])
            self.log(
                f"{self.empty_dataset.sectorid_to_sectorname[i]}_roc_auc/Train",
                class_roc_auc)
            class_f1 = metrics.f1_score(targets[:, i], preds[:, i])
            self.log(
                f"{self.empty_dataset.sectorid_to_sectorname[i]}_f1/Train",
                class_f1)

    def predict_step(self, batch, batch_idx, dataloader_idx=None):
        output = self(batch)
        return {"logits": output}

    def on_predict_epoch_end(self, outputs):
        logits = torch.cat([output["logits"] for output in outputs[0]])
        preds = torch.sigmoid(logits) >= self.pred_threshold
        pred_classes = []
        for pred in preds:
            pred_classes_i = [
                self.empty_dataset.sectorid_to_sectorname[i]
                for i, p in enumerate(pred) if p
            ]
            pred_classes.append(pred_classes_i)
        self.log({"pred_classes": pred_classes})

    def custom_predict(self, inputs):
        self.eval()
        self.freeze()
        as_batch = False
        if isinstance(inputs, str):
            as_batch = True
        inputs = self.empty_dataset.encode_example(inputs, as_batch)

        with torch.no_grad():
            logits = self(inputs)
        preds = (torch.sigmoid(logits) >= self.pred_threshold)
        pred_classes = []
        for pred in preds:
            pred_classes_i = [
                self.empty_dataset.sectorid_to_sectorname[i]
                for i, p in enumerate(pred) if p
            ]
            pred_classes.append(pred_classes_i)
        return pred_classes

    def total_steps(self) -> int:
        """The number of total training steps that will be run. Used for lr scheduler purposes."""
        self.dataset_size = len(self.train_dataloader().dataset)
        num_devices = max(1, self.hparams.gpus)  # TODO: consider num_tpu_cores
        effective_batch_size = self.hparams.train_batch_size * self.hparams.accumulate_grad_batches * num_devices
        return (self.dataset_size /
                effective_batch_size) * self.hparams.max_epochs

    def configure_optimizers(self):
        "Prepare optimizer and schedule (linear warmup and decay)"
        model = self.model
        no_decay = ["bias", "LayerNorm.weight"]
        optimizer_grouped_parameters = [
            {
                "params": [
                    p for n, p in model.named_parameters()
                    if not any(nd in n for nd in no_decay)
                ],
                "weight_decay":
                self.hparams.weight_decay,
            },
            {
                "params": [
                    p for n, p in model.named_parameters()
                    if any(nd in n for nd in no_decay)
                ],
                "weight_decay":
                0.0,
            },
        ]
        optimizer = AdamW(optimizer_grouped_parameters,
                          lr=self.hparams.learning_rate,
                          eps=self.hparams.adam_epsilon)

        scheduler = get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=self.hparams.warmup_steps,
            num_training_steps=self.total_steps())
        scheduler = {
            'scheduler': scheduler,
            'interval': 'step',
            'frequency': 1
        }
        return [optimizer], [scheduler]

    def train_dataloader(self):
        return training_loader

    def val_dataloader(self):
        return val_loader
    
    def custom_eval(self, eval_dataloader):
        if self.device.type == "cpu":
            self.to("cuda")
        self.eval()
        self.freeze()
        preds_val_all = []
        y_true = []

        with torch.no_grad():
            for batch in tqdm(eval_dataloader, total=len(eval_dataloader.dataset)//eval_dataloader.batch_size):
                logits = self({"ids": batch["ids"].to("cuda"), "mask": batch["mask"].to("cuda"), "token_type_ids": batch["token_type_ids"].to("cuda")})
                preds_batch = np.zeros(logits.shape, dtype=np.int)
                preds_batch[(torch.sigmoid(logits) > self.pred_threshold).cpu().nonzero(as_tuple=True)] = 1
                preds_val_all.append(preds_batch)
                y_true.append(batch["targets"].numpy().astype(np.int))

        preds_val_all = np.concatenate(preds_val_all)
        y_true = np.concatenate(y_true)

        f1_scores = []
        recalls = []
        precisions = []
        accuracies = []
        supports = []
        tagname_to_tagid = self.empty_dataset.tagname_to_tagid
        for tag_name, tag_id in tagname_to_tagid.items():
            cls_rprt = classification_report(y_true[:, tag_id], preds_val_all[:, tag_id], output_dict=True)
            precisions.append(cls_rprt["macro avg"]["precision"])
            recalls.append(cls_rprt["macro avg"]["recall"])
            f1_scores.append(cls_rprt["macro avg"]["f1-score"])
            supports.append(cls_rprt["1"]["support"])
            accuracies.append(cls_rprt["accuracy"])

        metrics_df = pd.DataFrame({
            "Sector": list(tagname_to_tagid.keys()),
            "Precision": precisions,
            "Recall": recalls,
            "F1 Score": f1_scores,
            "Accuracy": accuracies,
            "Support": supports,
        })
        return metrics_df
        #multilabel_confusion_matrix()

In [15]:
checkpoint_path = "../checkpoints-sentence-transformers-distilbert-multilingual-nli-stsb-quora-ranking/epoch=1-step=5665.ckpt"
model = Transformer.load_from_checkpoint(checkpoint_path)

In [23]:
leads = pd.read_csv("leads.csv")
leads["parsed_text"] = leads["parsed_text"].apply(literal_evaleral_eval)

In [30]:
def custom_predict(model, inputs):
    model.eval()
    model.freeze()
    as_batch = False
    if isinstance(inputs, str):
        as_batch = True
    inputs = model.empty_dataset.encode_example(inputs, as_batch)

    with torch.no_grad():
        logits = model(inputs)
    preds = (torch.sigmoid(logits) >= model.pred_threshold)
    pred_classes = []
    for pred in preds:
        pred_classes_i = [
            model.empty_dataset.tagid_to_tagname[i]
            for i, p in enumerate(pred) if p
        ]
        pred_classes.append(pred_classes_i)
    return pred_classes

In [32]:
preds_all = []
bsz = 32
for doc_sentences in leads["parsed_text"]:
    preds_i = []
    for i in range(0, len(doc_sentences), bsz):
        batch = doc_sentences[i:i+bsz]
        preds = custom_predict(model, batch)
        print(preds)
        preds_i.extend(preds)
    preds_all.append(preds_i)

[[], [], [], ['Cross'], ['Cross'], ['Cross'], [], ['Shelter'], ['Education'], ['Education'], ['Impact'], [], ['Education'], ['Cross'], ['Cross'], ['Cross'], ['Cross'], ['Shelter'], ['Cross'], ['Cross'], ['Shelter']]
[['Impact', 'Protection'], ['Health'], ['Health', 'Humanitarian Conditions'], [], ['Cross'], ['Cross'], ['Cross'], ['Cross'], ['Health'], ['Food Security'], ['Cross'], ['Health'], ['Health'], ['Cross'], ['Cross'], ['Health'], ['Health'], ['Health'], [], ['Health', 'Impact'], ['Health'], ['Cross', 'Impact', 'Logistics'], ['Cross'], [], [], [], ['Health', 'Humanitarian Conditions'], [], [], ['Health'], ['Health'], ['Health', 'Humanitarian Conditions']]
[['Health'], [], ['Health']]
[['Education', 'Impact'], ['Education'], ['Education'], [], ['Health'], ['Cross'], ['Education'], [], ['Education', 'Impact'], ['Education'], ['Education', 'Impact'], ['Health'], ['Education', 'Impact']]
[['Health'], ['Health', 'Impact'], ['Capacities & Response', 'Health'], ['Health'], [], ['Health

[['Protection'], ['Protection'], ['Cross', 'Humanitarian Conditions'], ['Protection'], [], ['Protection'], ['Protection'], ['Cross'], ['Protection'], ['Protection'], ['Humanitarian Conditions', 'Protection'], ['Humanitarian Conditions', 'Protection'], ['Humanitarian Conditions', 'Protection'], [], ['Impact', 'Protection'], ['Humanitarian Conditions', 'Protection'], ['Protection'], ['Protection'], ['Protection'], ['Protection'], ['Protection'], ['Protection'], ['Protection'], ['Protection'], ['Cross', 'Protection'], ['Protection'], ['Humanitarian Conditions', 'Protection'], ['Protection'], ['Protection'], ['Protection'], ['Protection'], ['Protection']]
[['Humanitarian Conditions', 'Protection'], ['Protection'], ['Protection'], ['Humanitarian Conditions', 'Protection'], ['Protection'], ['Humanitarian Conditions', 'Protection'], ['Protection'], ['Humanitarian Conditions', 'Protection'], ['Humanitarian Conditions', 'Protection'], ['Education', 'Protection'], ['Cross', 'Impact', 'Protection

[['Protection'], ['Protection'], ['Protection'], ['Protection'], ['Protection'], ['Humanitarian Conditions', 'Protection'], ['Protection'], ['Cross'], ['Cross'], ['Protection'], ['Cross'], ['Protection'], [], ['Protection'], ['Protection'], ['Cross', 'Humanitarian Conditions'], ['Humanitarian Conditions', 'Protection'], ['Protection'], ['Protection'], ['Protection'], ['Protection'], ['Protection'], ['Protection'], ['Protection'], ['Humanitarian Conditions', 'Protection'], ['Protection'], ['Protection'], ['Protection'], ['Protection'], ['Protection'], ['Humanitarian Conditions', 'Protection'], ['Humanitarian Conditions', 'Protection']]
[['Protection'], ['Protection'], ['Protection'], ['Impact', 'Protection'], ['Protection'], ['Protection'], ['Humanitarian Conditions', 'Protection'], ['Protection'], ['Cross', 'Humanitarian Conditions', 'Impact', 'Protection', 'Shelter'], ['Humanitarian Conditions', 'Protection'], ['Humanitarian Conditions', 'Protection'], ['Protection'], ['Protection'], 

[['Protection'], ['Humanitarian Conditions', 'Protection'], ['Protection'], ['Protection'], ['Humanitarian Conditions', 'Protection'], ['Humanitarian Conditions', 'Protection'], [], ['Protection'], ['Humanitarian Conditions', 'Protection'], ['Protection'], ['Protection'], ['Humanitarian Conditions', 'Impact', 'Shelter'], ['Protection'], ['Protection'], ['Humanitarian Conditions', 'Protection'], ['Cross', 'Impact'], ['Protection'], ['Protection'], ['Cross'], ['Cross'], ['Cross'], ['Cross'], ['Agriculture', 'Humanitarian Conditions', 'Protection'], ['Livelihoods'], ['Cross'], [], ['Protection'], ['Protection'], ['Protection'], ['Protection'], ['Protection'], ['Cross']]
[['Cross'], ['Protection'], ['Humanitarian Conditions', 'Protection'], ['Protection'], ['Protection'], ['Impact', 'Shelter'], ['Shelter'], ['Humanitarian Conditions', 'Protection'], ['Protection'], ['Protection'], ['Shelter'], ['Protection'], ['Protection'], ['Humanitarian Conditions', 'Protection'], ['Impact', 'Protection

[['Humanitarian Conditions', 'Protection'], ['Humanitarian Conditions', 'Protection'], ['Protection'], ['Humanitarian Conditions', 'Protection'], ['Protection'], ['Protection'], ['Protection'], ['Protection'], ['Cross'], ['Impact', 'Protection'], ['Protection'], ['Protection'], ['Humanitarian Conditions', 'Protection'], ['Protection'], ['Humanitarian Conditions', 'Protection'], ['Humanitarian Conditions', 'Protection'], ['Humanitarian Conditions', 'Protection'], ['Humanitarian Conditions', 'Protection'], ['Humanitarian Conditions', 'Protection'], ['Humanitarian Conditions', 'Protection'], ['Humanitarian Conditions', 'Protection'], ['Protection'], ['Humanitarian Conditions', 'Protection'], ['Protection'], ['Humanitarian Conditions', 'Protection'], ['Humanitarian Conditions', 'Protection'], ['Protection'], ['Protection'], ['Cross'], ['Protection'], ['Protection'], ['Humanitarian Conditions', 'Protection']]
[['Humanitarian Conditions', 'Protection'], ['Cross', 'Humanitarian Conditions', '

[['Protection'], ['Protection'], ['Protection'], [], [], ['Protection'], ['Protection'], ['Protection'], ['Humanitarian Conditions', 'Protection'], ['Humanitarian Conditions', 'Protection'], ['Humanitarian Conditions', 'Impact', 'Protection'], ['Cross', 'Protection'], ['Protection'], ['Protection'], ['Protection'], ['Cross'], ['Cross'], ['Cross'], ['Cross'], ['Humanitarian Conditions', 'Protection'], ['Protection'], ['Protection'], ['Protection'], ['Protection'], ['Protection'], ['Protection'], ['Humanitarian Conditions', 'Protection'], ['Protection'], [], ['Protection'], ['Humanitarian Conditions', 'Impact', 'Protection'], ['Protection']]
[['Impact', 'Protection'], ['Protection'], ['Cross'], ['Cross'], ['Cross'], ['Cross'], ['Cross'], ['Humanitarian Conditions', 'Protection'], ['Humanitarian Conditions'], ['Humanitarian Conditions'], [], ['Cross'], ['Protection'], ['Humanitarian Conditions', 'Protection'], ['Humanitarian Conditions', 'Protection'], ['Protection'], ['Protection'], ['Pr

[['Protection'], ['Cross'], ['Protection'], ['Protection'], ['Protection'], ['Protection'], ['Protection'], ['Protection'], ['Protection'], ['Protection'], ['Protection'], ['Protection'], ['Protection'], ['Protection'], ['Protection'], ['Protection'], ['Protection'], ['Impact', 'Protection'], ['Impact', 'Protection'], ['Humanitarian Conditions', 'Protection'], ['Humanitarian Conditions', 'Protection'], ['Humanitarian Conditions', 'Protection'], ['Humanitarian Conditions', 'Protection'], ['Humanitarian Conditions', 'Protection'], ['Humanitarian Conditions', 'Protection'], ['Humanitarian Conditions', 'Protection'], ['Impact', 'Protection'], ['Impact', 'Protection'], ['Impact', 'Protection'], ['Impact', 'Protection'], ['Impact', 'Protection'], ['Impact', 'Protection']]
[['Impact', 'Protection'], ['Impact', 'Protection'], ['Impact', 'Protection'], ['Impact', 'Protection'], ['Cross'], ['Protection'], ['Protection'], ['Protection'], ['Protection'], ['Protection']]
[['Cross'], [], ['Shelter']

In [35]:
for parsed_text, preds in zip(leads["parsed_text"], preds_all):
    for sentence, classes in zip(parsed_text, preds):
        print(f"{sentence}\t{classes}")
        print("-"*50)
    print("*"*70)

The science building of Raja GC School developed cracks due to the earthquake A mild tremor of 3.8 magnitude on the Richter Scale was felt in Sylhet city on Monday evening, causing panic among residents as they have experienced several earthquakes in the last 10 days.	[]
--------------------------------------------------
Seismic In-Charge of Bangladesh Meteorological Department (BMD) Md Muminul Islam confirmed the matter of the tremor to Dhaka Tribune.	[]
--------------------------------------------------
	[]
--------------------------------------------------
The epicenter of the earthquake was near the West Khasi Hills in the Indian state of Meghalaya.	['Cross']
--------------------------------------------------
It occurred at a depth of 10km, according to the earthquake monitoring website VolcanoDiscovery.	['Cross']
--------------------------------------------------
People came out on the streets in panic after the latest earthquake, said lawyer Kamal Hossain of Sagardighipar area.	[

In a 2019 article, citing 2017 data, Zamfara was ranked first of Nigerian states with highest rates of kidnapping (before Rivers, Lagos and Jigawa).	['Humanitarian Conditions', 'Protection']
--------------------------------------------------
915 More recent information on road security in Zamfara state was not found.	['Protection']
--------------------------------------------------
2.7.3.5 Infrastructure damage and explosive remnants of war As of April 2019, in Zamfara state more than 10 000 houses, shops and silos were reportedly destroyed, due to violent attacks.	['Cross', 'Impact', 'Shelter']
--------------------------------------------------
916 No information was found on explosive remnants of war in Zamfara state.	['Humanitarian Conditions', 'Protection']
--------------------------------------------------
906 New Humanitarian (The), The longshot bid to end rampant banditry in Nigerias northwest, 19 January 2021, url 907 New Humanitarian (The), The longshot bid to end rampant band

--------------------------------------------------
The table below shows the number of violent incidents and fatalities, according to type of incident for Bayelsa state in 2020, as recorded by ACLED.	['Protection']
--------------------------------------------------
Bayelsa state Battles Explosions/remote violence Violence against civilians Riots Total Table 38 Number of violent incidents and fatalities by type of incident in 20202029 Nr of incidents 11 0 10 4 25 Nr of fatalities 25 0 10 3 38 2026 ACLED dataset, filtered on Nigeria, 1-1-2020 to 31-12-2020, url 2027 ACLED dataset, filtered on Nigeria, 1-1-2020 to 31-12-2020, url 2028 EASO analysis PowerBI based on ACLED dataset, filtered on Nigeria, 1-1-2020 to 31-12-2020, url 2029 ACLED dataset, filtered on Nigeria, 1-1-2020 to 31-12-2020, url 239	['Protection']
--------------------------------------------------
Bayelsa state Battles Explosions/remote violence Violence against civilians Riots Total Table 38 Number of violent incidents a