In [1]:
#libraries
import pandas as pd 
import numpy as np
import torch
import torch.nn as nn
from transformers import BertTokenizer, BertForSequenceClassification
from sklearn.metrics import classification_report
import sklearn.metrics as metrics

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load BERT model tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Set max sequence length
MAX_SEQ_LENGTH = 128

class Model:
    def load_model(self, load_path):
        model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
        checkpoint = torch.load(load_path, map_location=device)
        model.load_state_dict(checkpoint['model_state_dict'])
        model = model.to(device)
        model.eval()
        print(f'Model loaded from <== {load_path}')
        return model

    # predict sentence label , for model 1, (0 prediction refers to bot, 1 human), 
   
  
    def predict_hate(self, model, sentence):
        tokens = tokenizer.encode_plus(
            sentence,
            max_length=MAX_SEQ_LENGTH,
            truncation=True,
            padding='max_length',
            add_special_tokens=True,
            return_token_type_ids=False,
            return_attention_mask=True,
            return_tensors='pt')
        tokens = tokens.to(device)
        with torch.no_grad():
            outputs = model(tokens['input_ids'], token_type_ids=None, attention_mask=tokens['attention_mask'])
        logits = outputs[0]
        _, predicted = torch.max(logits, dim=1)
        return predicted.item()

    def predict_proba(self, data):
    # Load Model and Evaluate, final out put would be (0 prediction refers to bot, 1 refers to human)
        model1 = self.load_model('model_1.pt')

        predictions=[]
        for post in data:
            result1=self.predict_hate(model1, post)
            if result1==0:
                predictions.append('bot')
            else:

                predictions.append('human')
        return np.array(predictions)

# Instantiate the model
model = Model()



In [3]:
# Read your test data (in your data you dont need label column)
test = pd.read_csv('test_tw.csv')

## Clean the text as like ths. its important it has to be like this
test['description'] = test['description'].astype(str).str.lower()  # Convert text to lowercase
test['description'] = test['description'].str.replace(r'http\S+', 'http')  # Remove URLs while preserving "http"
test['description'] = test['description'].str.replace(r'[^\w\s#@]', '')  # Remove punctuation except hashtags and mention
test['description'] = test['description'].str.replace(r'\n', '')  # Remove newline characters
test['description'] = test['description'].str.replace(r'\r', '')  # Remove line breaks
test['description'] = test['description'].astype(str)


predictions = model.predict_proba(test['description'][:100]) # sent your test data for prediction

# # you dont need this part since you dont have any label
# accuracy = metrics.classification_report(test['label'][:100], predictions, digits=3)
# print('Accuracy of model cascade: \n')
# print(accuracy)

print(predictions)

  test['description'] = test['description'].str.replace(r'http\S+', 'http')  # Remove URLs while preserving "http"
  test['description'] = test['description'].str.replace(r'[^\w\s#@]', '')  # Remove punctuation except hashtags and mention
  test['description'] = test['description'].str.replace(r'\n', '')  # Remove newline characters
  test['description'] = test['description'].str.replace(r'\r', '')  # Remove line breaks
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. i

Model loaded from <== model_1.pt
Accuracy of model cascade: 

              precision    recall  f1-score   support

         bot      0.944     0.883     0.913        77
       human      0.679     0.826     0.745        23

    accuracy                          0.870       100
   macro avg      0.812     0.855     0.829       100
weighted avg      0.883     0.870     0.874       100

['bot' 'bot' 'bot' 'bot' 'bot' 'bot' 'human' 'bot' 'human' 'bot' 'bot'
 'human' 'bot' 'bot' 'bot' 'bot' 'bot' 'bot' 'human' 'human' 'human'
 'human' 'human' 'bot' 'bot' 'bot' 'bot' 'bot' 'bot' 'bot' 'bot' 'bot'
 'bot' 'bot' 'human' 'bot' 'bot' 'human' 'bot' 'bot' 'human' 'human' 'bot'
 'bot' 'bot' 'bot' 'bot' 'human' 'human' 'human' 'human' 'bot' 'human'
 'bot' 'human' 'human' 'bot' 'bot' 'bot' 'bot' 'bot' 'bot' 'bot' 'human'
 'bot' 'bot' 'bot' 'bot' 'bot' 'bot' 'bot' 'bot' 'bot' 'bot' 'bot' 'human'
 'human' 'bot' 'bot' 'bot' 'human' 'bot' 'human' 'bot' 'human' 'bot' 'bot'
 'bot' 'bot' 'human' 'bot' 'bot