#**Zero Shot - Train Set Enrichment Cycle**

## Pipe:
Bert<br>
Zero-Shot for hate speech classification.<br>
Zero-Shot on <a href=https://github.com/jagol/nli-for-hate-speech-detection/blob/main/configs/hypotheses_en.json>target group</a><br><br>
(Inspired by this <a href=https://aclanthology.org/2022.trac-1.10.pdf>article</a>)<br>

## Instructions:
1. Use the Presets cell to configure data and model.<br>
2. Input file from the <a href=https://zenodo.org/record/4442460>Parler Open Dataset</a>.<br>
2. Run all cells.<br>
3. Collect the result csv from DATA_FOLDER_PATH.<br>

# Imports

In [None]:
# !pip install transformers

In [None]:
import numpy as np
import pandas as pd
import transformers
import tensorflow as tf
import pickle
from transformers import BartForSequenceClassification, BartTokenizer
import json
from datetime import datetime
from tensorflow.keras import models, layers, metrics, losses, optimizers

# Code

## Presets

In [2]:
ITERATION                     = 0 # Use 0,1, or 2

USE_GOOGLE_DRIVE_FOR_FILES    = False
DATA_FOLDER_PATH              = "./Data/"
#MODEL_FOLDER_PATH             = "./Data/models/"
hate_classifier_threshold     = 0.5
hate_target_hypothesis        = 'That contains hate '
hate_zeroshot_threshold       = 0.7
hate_target_threshold         = 0.7
bert_model                    = f'BERT_{ITERATION}'
posts_file                    = f'parler_data000000000000.sampled50000.{ITERATION}'
posts_group                   = 1

## Data

In [None]:
def load_data(big_dataset_file_name):
  df = pd.read_json(f'{DATA_FOLDER_PATH}csv_files/{big_dataset_file_name}.ndjson')
  return df

def load_csv(filename):
  df = pd.read_csv(f'{DATA_FOLDER_PATH}csv_files/{filename}.csv')
  return df


## Classifier

In [None]:
classifier_tokenizer = None
classifier_model = None

def load_classifier_model():
  # load tokenizer
  global classifier_tokenizer
  base_model_name = 'distilbert-base-uncased'
  classifier_tokenizer = transformers.DistilBertTokenizer.from_pretrained(base_model_name) 

  # load bert model
  config = transformers.DistilBertConfig(dropout=0.2, attention_dropout=0.2) # Nitzan - config here is irrelevant
  dbert_tf = transformers.TFDistilBertModel.from_pretrained(base_model_name, config=config, trainable=False)

  return models.load_model(f'{DATA_FOLDER_PATH}models/{bert_model}.h5', custom_objects={'TFDistilBertModel': dbert_tf})

def classify(posts):
  max_length = 190
  X_tokenized = classifier_tokenizer(posts.to_list(), padding='max_length', max_length = max_length, truncation=True, return_attention_mask=True)
  y_pred_proba = classifier_model.predict({'input_ids': np.array(X_tokenized['input_ids']), 'input_attention': np.array(X_tokenized['attention_mask'])})
  y_pred = np.array([y_pred_proba > hate_classifier_threshold], dtype=int).flatten()
  return y_pred


## Zero Shot

In [None]:
def load_zeroshot_model():
  tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-mnli')
  model = BartForSequenceClassification.from_pretrained('facebook/bart-large-mnli')
  return model, tokenizer

def zeroshot_hate_classify(premise):
  hypothesis1 = 'That contains hate speech'
# run through model pre-trained on MNLI
  input_ids1 = zeroshot_tokenizer.encode(premise, hypothesis1, return_tensors='pt')
  logits1 = zeroshot_model(input_ids1)[0]
  entail_contradiction_logits1 = logits1[:,[0,2]]
  prob1 = entail_contradiction_logits1.softmax(dim=1)
  true_prob1 = prob1[:,1].item() 

  return(true_prob1)

def zeroshot_hate_target_classify(premise, target):
  hypothesis2 = hate_target_hypothesis + target
  
  input_ids2 = zeroshot_tokenizer.encode(premise, hypothesis2, return_tensors='pt')
  logits2 = zeroshot_model(input_ids2)[0]
  entail_contradiction_logits2 = logits2[:,[0,2]]
  prob2 = entail_contradiction_logits2.softmax(dim=1)
  true_prob2 = prob2[:,1].item() 
  
  return(true_prob2)

def zeroshot_identify_hate_target(premise):
  targets = ['women', 'trans people', 'queer people', 'gay', 'black people', 'asian people', 'people of color', 'indigenous people',
             'muslim people', 'immigrants', 'jewish people', 'christian people', 'hindu people'
             ]
  imax = 0
  pmax = 0
  for i in range(len(targets)):
    p = zeroshot_hate_target_classify(premise, targets[i])
    if p > pmax:
      imax = i
      pmax = p

  if pmax > hate_target_threshold:
    return targets[imax]
  else:
    return None


# Tests / Demo

## Init models

In [None]:
classifier_model = load_classifier_model()
zeroshot_model, zeroshot_tokenizer = load_zeroshot_model()

## Load data

In [None]:
posts = load_csv(posts_file)

In [None]:
posts['bert'] = classify(posts['body'])
posts[posts['bert'] == 1]

## Train

In [None]:
def train_cycle(df):
  df['target'] = -1
  for i in range(df.shape[0]):
    print(i, end = ", ")
    bert = df.iloc[i]['bert']
    print (f'bert hate:{bert}', end = ", ")
    hate = zeroshot_hate_classify(df.iloc[i]['body']) > hate_zeroshot_threshold
    df.at[i, 'hate'] = hate
    if hate == 1:
      print ('zero hate')
      if bert == 0:
        continue
      target = zeroshot_identify_hate_target(df.iloc[i]['body'])
      if target != None:
        print(target)
        df.at[i, 'target'] = target
      else:
        df.at[i, 'target'] = 0
  return df



In [None]:
res = train_cycle(posts)
res.to_csv(f'{DATA_FOLDER_PATH}csv_files/parler_unannotated_predictions_{ITERATION}.csv')