In [None]:
# imports
import os
import jsonlines
import torch
from sklearn.metrics import classification_report
from transformers import RobertaTokenizer, RobertaForSequenceClassification
from tqdm import tqdm

### Set up test dataset

In [None]:

dev_dir = "cs162-final-dev-main"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
human_texts, machine_texts = [], []

for filename in os.listdir(dev_dir):
    if filename.endswith(".jsonl"):
        file_path = os.path.join(dev_dir, filename)
        with jsonlines.open(file_path) as reader:
            for row in reader:
                human_texts.append(row["human_text"])
                machine_texts.append(row["machine_text"])

test_texts = human_texts + machine_texts
true_labels = [1] * len(human_texts) + [0] * len(machine_texts)

### Load tokenizer and model from pretrained file

In [None]:
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
model = RobertaForSequenceClassification.from_pretrained("./initial-roberta-ai-detector")
model.to(device)

### Run eval test

In [None]:
# Predict
model.eval()
preds = []

with torch.no_grad():
    for i in tqdm(range(0, len(test_texts), 8)):
        batch = test_texts[i:i+8]
        encodings = tokenizer(batch, return_tensors="pt", padding=True, truncation=True, max_length=256)
        input_ids = encodings["input_ids"].to(device)
        attention_mask = encodings["attention_mask"].to(device)
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        batch_preds = torch.argmax(outputs.logits, dim=1).tolist()
        preds.extend(batch_preds)

# Evaluate
from sklearn.metrics import classification_report

print(classification_report(true_labels, preds, target_names=["AI-generated", "Human-written"]))

100%|██████████| 3000/3000 [06:12<00:00,  8.05it/s]


               precision    recall  f1-score   support

 AI-generated       0.76      0.94      0.84     12000
Human-written       0.93      0.70      0.79     12000

     accuracy                           0.82     24000
    macro avg       0.84      0.82      0.82     24000
 weighted avg       0.84      0.82      0.82     24000



In [4]:
misclassified = []

for i in range(len(preds)):
    if preds[i] != true_labels[i]:
        misclassified.append({
            'text': test_texts[i],
            'i': i,
        })

In [7]:
import random
samples = random.choices(misclassified, k=10)

for sample in samples:
    print('=' * 50)
    print(sample['text'])
    print('=' * 50)
    print(f'Incorrectly classified as label {preds[sample["i"]]} instead of {true_labels[sample["i"]]}')
    print()

I didn’t work with glue or caulking but I was a machine operator at a sex toy factory so we worked with the silicone or rubber material (still don’t know what it is.) It came in buckets, texture was similar to sand or mud. The sand materiel made “harder” toys and the mud material made more “jelly like” toys. The machines were set at certain temperatures to melt the material and molds would be filled. We would mix the material with the “rejects” as well so no material was wasted.

But to answer your question, last 30 minutes of work we would turn off the machines and have to basically unclog the machine by poking it with a metal stick or else it would dry up and get clogged up. 

Here’s 2 vids of the actual factory I use to work at
_URL_1_
_URL_0_

Edit: should’ve mention that it was very important that the material was still hot before unclogging cause once it dried up it was a pain to unclog. The next morning we would just start the machine, set it at a high temperature, feed it and l