# Single Example Prediction

**Models**
- Bert
- RoBERTa
- GPT-2
- XLNet
- Electra
- DistilBERT  

This is the test script for trained models in TuringBench and AdversarialTraining notebooks. To run this script, make sure you loaded the saved model and adjust the path.

In [None]:
!pip install -r requirements.txt

In [2]:
import torch
import re
import nltk
from nltk.corpus import stopwords
from nltk.corpus import wordnet as wn
from nltk.tokenize import word_tokenize
from snowballstemmer import TurkishStemmer

from transformers import BertForSequenceClassification, BertTokenizer
from transformers import RobertaTokenizer, RobertaForSequenceClassification
from transformers import GPT2Tokenizer, GPT2ForSequenceClassification
from transformers import XLNetTokenizer, XLNetForSequenceClassification
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
from transformers import ElectraTokenizer, ElectraForSequenceClassification

import warnings
warnings.filterwarnings('ignore')

In [32]:
from google.colab import drive

drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [5]:
# Single shot test için metni hazırlama ve tahmin yapma
def predict_single_text(model, tokenizer, text, device):
    model.to(device)
    model.eval()
    # Metni tokenize et
    encoding = tokenizer(text, truncation=True, padding=True, max_length=512, return_tensors='pt')
    input_ids = encoding['input_ids'].to(device)
    attention_mask = encoding['attention_mask'].to(device)

    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        preds = torch.argmax(logits, dim=-1)

    return preds.cpu().numpy()[0]


In [6]:
# Stopwords listesi yükleniyor
nltk.download('stopwords')
nltk.download('punkt')
turkish_stopwords = stopwords.words('turkish')

# Kelime kökü bulucu
stemmer = TurkishStemmer()

def preprocess_text(text):
    # Küçük harfe çevirme
    text = text.lower()
    # Noktalama işaretlerini çıkarma
    text = re.sub(r'[^\w\s]', '', text)
    # Rakamları çıkarma
    text = re.sub(r'\d+', '', text)
    # Tokenleme
    words = word_tokenize(text)
    # Durdurma kelimelerini çıkarma ve stemming
    words = [stemmer.stemWord(word) for word in words if word not in turkish_stopwords]
    # Temizlenmiş metni birleştirme
    return ' '.join(words)

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [7]:
# Test text
human_test_text = "PTT Kargo Gelmeyen Kargo Ve Açılmayan Telefonlar. 27509141*** numaralı kargo 3 Haziran'da kargoya verildi 4 ünde sabah Ankara'ya gelmiş görünüyor. 5 i geçti hala hiçbir hareket yok. Bir sürü numara aradım çalıyor asla açmıyorlar. 444'lu numara da çalışmıyor. İl dışına çıkmam gerek dünden bu Yana sadece kargo beklediğim için gidemiyorum. Her gün iki"
ai_test_text = "Cinemaximum Engelli İndirimi Uygulamadı. 03.10.2023 tarihinde İstanbul Kadıköy'deki Cinemaximum'a gittiğimde, engelli indirimi talep ettiğim halde gerekli indirimi uygulamadılar. Görevliye durumu anlattığımda yardımcı olamayacaklarını ve sistemlerinin bunu desteklemediğini söylediler. Oysa daha önce defalarca aynı sinemada bu indirimi kullanmıştım. Engelli haklarına saygı gösterilmemesi ve bu konuda bir çözüm sunulamaması beni gerçekten hayal kırıklığına uğrattı. Sorunun düzeltilmesini ve engelli indirimlerinin her zaman uygulanmasını istiyorum."
human_test_text = preprocess_text(human_test_text)
ai_test_text = preprocess_text(ai_test_text)

## Models

### BERT

In [22]:
# Loading model and tokenizer
model_save_path = '/content/drive/MyDrive/test/bert_model'
model = BertForSequenceClassification.from_pretrained(model_save_path)
tokenizer = BertTokenizer.from_pretrained(model_save_path)

In [23]:
# Single Text Prediction
prediction1 = predict_single_text(model, tokenizer, human_test_text, device)
prediction2 = predict_single_text(model, tokenizer, ai_test_text, device)
prediction1 = "human" if prediction1 == 0 else "ai"
prediction2 = "human" if prediction2 == 0 else "ai"
print(f"Actual Label: human   Prediction: {prediction1}")
print(f"Actual Label: ai      Prediction: {prediction2}")

Actual Label: human   Prediction: human
Actual Label: ai      Prediction: ai


### RoBERTa

In [24]:
# Loading model and tokenizer
model_save_path = '/content/drive/MyDrive/test/roberta_model'
model = RobertaForSequenceClassification.from_pretrained(model_save_path)
tokenizer = RobertaTokenizer.from_pretrained(model_save_path)

In [25]:
# Single Text Prediction
prediction1 = predict_single_text(model, tokenizer, human_test_text, device)
prediction2 = predict_single_text(model, tokenizer, ai_test_text, device)
prediction1 = "human" if prediction1 == 0 else "ai"
prediction2 = "human" if prediction2 == 0 else "ai"
print(f"Actual Label: human   Prediction: {prediction1}")
print(f"Actual Label: ai      Prediction: {prediction2}")

Actual Label: human   Prediction: ai
Actual Label: ai      Prediction: ai


### GPT-2

In [33]:
# Loading model and tokenizer
model_save_path = '/content/drive/MyDrive/test/gpt2_model'
model = GPT2ForSequenceClassification.from_pretrained(model_save_path)
tokenizer = GPT2Tokenizer.from_pretrained(model_save_path)

In [34]:
# Single Text Prediction
prediction1 = predict_single_text(model, tokenizer, human_test_text, device)
prediction2 = predict_single_text(model, tokenizer, ai_test_text, device)
prediction1 = "human" if prediction1 == 0 else "ai"
prediction2 = "human" if prediction2 == 0 else "ai"
print(f"Actual Label: human   Prediction: {prediction1}")
print(f"Actual Label: ai      Prediction: {prediction2}")

Actual Label: human   Prediction: ai
Actual Label: ai      Prediction: ai


### XLNet

In [35]:
# Loading model and tokenizer
model_save_path = '/content/drive/MyDrive/test/xlnet_model'
model = XLNetForSequenceClassification.from_pretrained(model_save_path)
tokenizer = XLNetTokenizer.from_pretrained(model_save_path)

In [36]:
# Single Text Prediction
prediction1 = predict_single_text(model, tokenizer, human_test_text, device)
prediction2 = predict_single_text(model, tokenizer, ai_test_text, device)
prediction1 = "human" if prediction1 == 0 else "ai"
prediction2 = "human" if prediction2 == 0 else "ai"
print(f"Actual Label: human   Prediction: {prediction1}")
print(f"Actual Label: ai      Prediction: {prediction2}")

Actual Label: human   Prediction: human
Actual Label: ai      Prediction: ai


### Electra

In [8]:
# Loading model and tokenizer
model_save_path = '/content/drive/MyDrive/test/electra_model'
model = ElectraForSequenceClassification.from_pretrained(model_save_path)
tokenizer = ElectraTokenizer.from_pretrained(model_save_path)

In [10]:
# Single Text Prediction
prediction1 = predict_single_text(model, tokenizer, human_test_text, device)
prediction2 = predict_single_text(model, tokenizer, ai_test_text, device)
prediction1 = "human" if prediction1 == 0 else "ai"
prediction2 = "human" if prediction2 == 0 else "ai"
print(f"Actual Label: human   Prediction: {prediction1}")
print(f"Actual Label: ai      Prediction: {prediction2}")

Actual Label: human   Prediction: human
Actual Label: ai      Prediction: ai


### DistilBERT

In [11]:
# Loading model and tokenizer
model_save_path = '/content/drive/MyDrive/test/d_bert_model'
model = DistilBertForSequenceClassification.from_pretrained(model_save_path)
tokenizer = DistilBertTokenizer.from_pretrained(model_save_path)

In [12]:
# Single Text Prediction
prediction1 = predict_single_text(model, tokenizer, human_test_text, device)
prediction2 = predict_single_text(model, tokenizer, ai_test_text, device)
prediction1 = "human" if prediction1 == 0 else "ai"
prediction2 = "human" if prediction2 == 0 else "ai"
print(f"Actual Label: human   Prediction: {prediction1}")
print(f"Actual Label: ai      Prediction: {prediction2}")

Actual Label: human   Prediction: human
Actual Label: ai      Prediction: ai


## Adversarial Models

### BERT

In [26]:
# Loading model and tokenizer
model_save_path = '/content/drive/MyDrive/test/bert_adv_model_last'
model = BertForSequenceClassification.from_pretrained(model_save_path)
tokenizer = BertTokenizer.from_pretrained(model_save_path)

In [27]:
# Single Text Prediction
prediction1 = predict_single_text(model, tokenizer, human_test_text, device)
prediction2 = predict_single_text(model, tokenizer, ai_test_text, device)
prediction1 = "human" if prediction1 == 0 else "ai"
prediction2 = "human" if prediction2 == 0 else "ai"
print(f"Actual Label: human   Prediction: {prediction1}")
print(f"Actual Label: ai      Prediction: {prediction2}")

Actual Label: human   Prediction: human
Actual Label: ai      Prediction: ai


### RoBERTa

In [30]:
# Loading model and tokenizer
model_save_path = '/content/drive/MyDrive/test/roberta_adv_model'
model = RobertaForSequenceClassification.from_pretrained(model_save_path)
tokenizer = RobertaTokenizer.from_pretrained(model_save_path)

In [31]:
# Single Text Prediction
prediction1 = predict_single_text(model, tokenizer, human_test_text, device)
prediction2 = predict_single_text(model, tokenizer, ai_test_text, device)
prediction1 = "human" if prediction1 == 0 else "ai"
prediction2 = "human" if prediction2 == 0 else "ai"
print(f"Actual Label: human   Prediction: {prediction1}")
print(f"Actual Label: ai      Prediction: {prediction2}")

Actual Label: human   Prediction: human
Actual Label: ai      Prediction: ai


### GPT-2

In [37]:
# Loading model and tokenizer
model_save_path = '/content/drive/MyDrive/test/gpt2_adv_model'
model = GPT2ForSequenceClassification.from_pretrained(model_save_path)
tokenizer = GPT2Tokenizer.from_pretrained(model_save_path)

In [38]:
# Single Text Prediction
prediction1 = predict_single_text(model, tokenizer, human_test_text, device)
prediction2 = predict_single_text(model, tokenizer, ai_test_text, device)
prediction1 = "human" if prediction1 == 0 else "ai"
prediction2 = "human" if prediction2 == 0 else "ai"
print(f"Actual Label: human   Prediction: {prediction1}")
print(f"Actual Label: ai      Prediction: {prediction2}")

Actual Label: human   Prediction: human
Actual Label: ai      Prediction: ai


### XLNet

In [39]:
# Loading model and tokenizer
model_save_path = '/content/drive/MyDrive/test/xlnet_adv_model'
model = XLNetForSequenceClassification.from_pretrained(model_save_path)
tokenizer = XLNetTokenizer.from_pretrained(model_save_path)

In [40]:
# Single Text Prediction
prediction1 = predict_single_text(model, tokenizer, human_test_text, device)
prediction2 = predict_single_text(model, tokenizer, ai_test_text, device)
prediction1 = "human" if prediction1 == 0 else "ai"
prediction2 = "human" if prediction2 == 0 else "ai"
print(f"Actual Label: human   Prediction: {prediction1}")
print(f"Actual Label: ai      Prediction: {prediction2}")

Actual Label: human   Prediction: human
Actual Label: ai      Prediction: ai


### Electra

In [14]:
# Loading model and tokenizer
model_save_path = '/content/drive/MyDrive/test/electra_adv_model'
model = ElectraForSequenceClassification.from_pretrained(model_save_path)
tokenizer = ElectraTokenizer.from_pretrained(model_save_path)

In [15]:
# Single Text Prediction
prediction1 = predict_single_text(model, tokenizer, human_test_text, device)
prediction2 = predict_single_text(model, tokenizer, ai_test_text, device)
prediction1 = "human" if prediction1 == 0 else "ai"
prediction2 = "human" if prediction2 == 0 else "ai"
print(f"Actual Label: human   Prediction: {prediction1}")
print(f"Actual Label: ai      Prediction: {prediction2}")

Actual Label: human   Prediction: human
Actual Label: ai      Prediction: ai


### DistilBERT

In [16]:
# Loading model and tokenizer
model_save_path = '/content/drive/MyDrive/test/d_bert_adv_model'
model = DistilBertForSequenceClassification.from_pretrained(model_save_path)
tokenizer = DistilBertTokenizer.from_pretrained(model_save_path)

In [17]:
# Single Text Prediction
prediction1 = predict_single_text(model, tokenizer, human_test_text, device)
prediction2 = predict_single_text(model, tokenizer, ai_test_text, device)
prediction1 = "human" if prediction1 == 0 else "ai"
prediction2 = "human" if prediction2 == 0 else "ai"
print(f"Actual Label: human   Prediction: {prediction1}")
print(f"Actual Label: ai      Prediction: {prediction2}")

Actual Label: human   Prediction: human
Actual Label: ai      Prediction: ai
