In [6]:
import torch
from tqdm import tqdm
import numpy as np
import pandas as pd
from torch.nn.functional import softmax
from sklearn.metrics import classification_report
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [1]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("distilroberta-base")
model = AutoModelForSequenceClassification.from_pretrained("silvglad/distilroberta-olid-lr5e5-wd001")
model.to(device)
model.eval()

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/480 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/981 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/329M [00:00<?, ?B/s]

In [7]:
model.to(device)
model.eval()

RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-5): 6 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
           

In [8]:
def classify_tweets(tweets, batch_size=32):
    all_preds = []
    for i in tqdm(range(0, len(tweets), batch_size)):
        batch = tweets[i:i+batch_size].tolist()
        inputs = tokenizer(batch, padding=True, truncation=True, return_tensors="pt").to(device)
        with torch.no_grad():
            outputs = model(**inputs)
            probs = softmax(outputs.logits, dim=1).cpu().numpy()
            predictions = probs.argmax(axis=1)
            all_preds.extend(predictions)
    return all_preds

### OffensivLang

In [11]:
offlang = pd.read_csv("./OffensiveLang/OffensiveLang.csv")
offlang.head()

Unnamed: 0,Text,Category,Target Group,Final Annotation,OpenAI_Annotation,Annotator1,Annotator2,Annotator3
0,Your recent performances are lacking the spark...,Occupation,Actor,Offensive,Not Offensive,Not Offensive,Offensive,Offensive
1,I heard your agent is struggling to find you q...,Occupation,Actor,Offensive,Offensive,Offensive,Offensive,Offensive
2,Your character lacked depth; it felt like a ca...,Occupation,Actor,Offensive,Not Offensive,Offensive,Offensive,Offensive
3,You were overshadowed by the set design; it wa...,Occupation,Actor,Offensive,Offensive,Offensive,Offensive,Not Offensive
4,You're not as talented as [another actor] in t...,Occupation,Actor,Offensive,Offensive,Offensive,Not Offensive,Offensive


In [12]:
offlang['true_label'] = offlang['Final Annotation'].map({"Offensive" : 1, "Not Offensive" : 0})
offlang.head()

Unnamed: 0,Text,Category,Target Group,Final Annotation,OpenAI_Annotation,Annotator1,Annotator2,Annotator3,true_label
0,Your recent performances are lacking the spark...,Occupation,Actor,Offensive,Not Offensive,Not Offensive,Offensive,Offensive,1
1,I heard your agent is struggling to find you q...,Occupation,Actor,Offensive,Offensive,Offensive,Offensive,Offensive,1
2,Your character lacked depth; it felt like a ca...,Occupation,Actor,Offensive,Not Offensive,Offensive,Offensive,Offensive,1
3,You were overshadowed by the set design; it wa...,Occupation,Actor,Offensive,Offensive,Offensive,Offensive,Not Offensive,1
4,You're not as talented as [another actor] in t...,Occupation,Actor,Offensive,Offensive,Offensive,Not Offensive,Offensive,1


In [13]:
offlang['prediction'] = classify_tweets(offlang['Text'])
offlang.head()

100%|█████████████████████████████████████████| 259/259 [00:03<00:00, 67.04it/s]


Unnamed: 0,Text,Category,Target Group,Final Annotation,OpenAI_Annotation,Annotator1,Annotator2,Annotator3,true_label,prediction
0,Your recent performances are lacking the spark...,Occupation,Actor,Offensive,Not Offensive,Not Offensive,Offensive,Offensive,1,0
1,I heard your agent is struggling to find you q...,Occupation,Actor,Offensive,Offensive,Offensive,Offensive,Offensive,1,0
2,Your character lacked depth; it felt like a ca...,Occupation,Actor,Offensive,Not Offensive,Offensive,Offensive,Offensive,1,0
3,You were overshadowed by the set design; it wa...,Occupation,Actor,Offensive,Offensive,Offensive,Offensive,Not Offensive,1,0
4,You're not as talented as [another actor] in t...,Occupation,Actor,Offensive,Offensive,Offensive,Not Offensive,Offensive,1,0


In [16]:
offlang['prediction'] = offlang['prediction'].apply(lambda x: 0 if x==0 else 1)
offlang['prediction'].value_counts()

prediction
0    7692
1     578
Name: count, dtype: int64

In [17]:
print(classification_report(y_true=offlang['true_label'], y_pred=offlang['prediction'], target_names=['Not Offensive','Offensive']))

               precision    recall  f1-score   support

Not Offensive       0.22      0.95      0.35      1748
    Offensive       0.85      0.08      0.14      6522

     accuracy                           0.26      8270
    macro avg       0.53      0.51      0.25      8270
 weighted avg       0.72      0.26      0.18      8270



In [15]:
offlang['prediction'].value_counts()

prediction
0    7692
4     451
3     127
Name: count, dtype: int64

In [18]:
offlang.to_csv('silvgrad_distilroberta_offlang.csv',index=False)

### TDavidson

In [19]:
from datasets import load_dataset

ds = load_dataset("tdavidson/hate_speech_offensive")

In [20]:
td = ds['train'].to_pandas()
td.head()

Unnamed: 0,count,hate_speech_count,offensive_language_count,neither_count,class,tweet
0,3,0,0,3,2,!!! RT @mayasolovely: As a woman you shouldn't...
1,3,0,3,0,1,!!!!! RT @mleew17: boy dats cold...tyga dwn ba...
2,3,0,3,0,1,!!!!!!! RT @UrKindOfBrand Dawg!!!! RT @80sbaby...
3,3,0,2,1,1,!!!!!!!!! RT @C_G_Anderson: @viva_based she lo...
4,6,0,6,0,1,!!!!!!!!!!!!! RT @ShenikaRoberts: The shit you...


In [21]:
td['true_label'] = td['class'].apply(lambda x: 1 if x==1 else 0)
td.head()

Unnamed: 0,count,hate_speech_count,offensive_language_count,neither_count,class,tweet,true_label
0,3,0,0,3,2,!!! RT @mayasolovely: As a woman you shouldn't...,0
1,3,0,3,0,1,!!!!! RT @mleew17: boy dats cold...tyga dwn ba...,1
2,3,0,3,0,1,!!!!!!! RT @UrKindOfBrand Dawg!!!! RT @80sbaby...,1
3,3,0,2,1,1,!!!!!!!!! RT @C_G_Anderson: @viva_based she lo...,1
4,6,0,6,0,1,!!!!!!!!!!!!! RT @ShenikaRoberts: The shit you...,1


In [28]:
td['prediction'] = classify_tweets(td['tweet'])
td.head()

100%|█████████████████████████████████████████| 775/775 [00:17<00:00, 45.09it/s]


Unnamed: 0,count,hate_speech_count,offensive_language_count,neither_count,class,tweet,true_label,prediction
0,3,0,0,3,2,!!! RT @mayasolovely: As a woman you shouldn't...,0,4
1,3,0,3,0,1,!!!!! RT @mleew17: boy dats cold...tyga dwn ba...,1,0
2,3,0,3,0,1,!!!!!!! RT @UrKindOfBrand Dawg!!!! RT @80sbaby...,1,4
3,3,0,2,1,1,!!!!!!!!! RT @C_G_Anderson: @viva_based she lo...,1,4
4,6,0,6,0,1,!!!!!!!!!!!!! RT @ShenikaRoberts: The shit you...,1,4


In [29]:
td['prediction'].value_counts()

prediction
4    15956
0     6841
3     1890
1       96
Name: count, dtype: int64

In [33]:
td['prediction'] = td['prediction'].apply(lambda x: 0 if x==0 else 1)
td['prediction'].value_counts()

prediction
1    17942
0     6841
Name: count, dtype: int64

In [34]:
print(classification_report(td['true_label'],td['prediction'],target_names=['Not Offensive','Offensive']))

               precision    recall  f1-score   support

Not Offensive       0.51      0.62      0.56      5593
    Offensive       0.88      0.82      0.85     19190

     accuracy                           0.78     24783
    macro avg       0.69      0.72      0.71     24783
 weighted avg       0.80      0.78      0.79     24783

