In [1]:
import pandas as pd
import numpy as np
import glob
from sklearn.metrics import classification_report, roc_curve, auc
from ftlangdetect import detect

In [2]:
def get_language(text):
    text = text.lower()
    res = detect(text=text.replace('\n', ' '), low_memory=False)
    if res['score'] > 0.5: return res['lang']
    return 'unknown'

In [3]:
train = pd.read_json("data/subtaskA_train_multilingual.jsonl", lines=True)
train['language'] = [get_language(text) for text in train['text']]

file = "predictions/s5_train_predictions_probs.jsonl"
temp = pd.read_json(file, lines=True)
train[['ll', 'entropy', 'rank', 'log-rank', 'llm_deviation']] = temp[['ll', 'entropy', 'rank', 'log-rank', 'llm_deviation']]
file = "predictions/binoculars_train_predictions_probs.jsonl"
temp = pd.read_json(file, lines=True)
train['binoculars'] = 1-temp['probs'] #change to higher number represent "machine"
file = "predictions/falcon_train_predictions_probs.jsonl"
temp = pd.read_json(file, lines=True)
train['falcon'] = temp['probs']
file = "predictions/mistral_train_predictions_probs.jsonl"
temp = pd.read_json(file, lines=True)
train['mistral'] = temp['probs']



In [4]:
dev = pd.read_json("data/subtaskA_dev_multilingual.jsonl", lines=True)
dev['language'] = [get_language(text) for text in dev['text']]

file = "predictions/s5_dev_predictions_probs.jsonl"
temp = pd.read_json(file, lines=True)
dev[['ll', 'entropy', 'rank', 'log-rank', 'llm_deviation']] = temp[['ll', 'entropy', 'rank', 'log-rank', 'llm_deviation']]
file = "predictions/binoculars_dev_predictions_probs.jsonl"
temp = pd.read_json(file, lines=True)
dev['binoculars'] = 1-temp['probs'] #change to higher number represent "machine"
file = "predictions/falcon_dev_predictions_probs.jsonl"
temp = pd.read_json(file, lines=True)
dev['falcon'] = temp['probs']
file = "predictions/mistral_dev_predictions_probs.jsonl"
temp = pd.read_json(file, lines=True)
dev['mistral'] = temp['probs']

In [5]:
#optimal classification threshold calculations
languages = ['en', 'bg', 'zh', 'id', 'ur', 'ru', 'de', 'ar']
auc_dict = {}
for model in [x for x in train.columns.to_list()[6:]]:
  temp = pd.concat([train, dev], copy=True, ignore_index=True)
  temp.dropna(inplace=True)
  labels = temp['label']
  fpr, tpr, thresholds = roc_curve(labels, temp[model])
  auc_dict[model] = {'auc': auc(fpr, tpr), 'th_optim': thresholds[np.argmax(tpr - fpr)]}
  temp2 = temp
  for test_language in languages:
    temp = temp2.copy()
    temp = temp[temp.language == test_language].reset_index(drop=True)
    labels = temp['label']
    if len(labels) == 0:
      fpr, tpr, thresholds = np.array([0, 0]), np.array([0, 0]), np.array([0, 0])
    else:
      fpr, tpr, thresholds = roc_curve(labels, temp[model])
    auc_dict[model][test_language] = {'auc': auc(fpr, tpr), 'th_optim': thresholds[np.argmax(tpr - fpr)]}

# Dev set predictions

In [6]:
use_th = 'th_optim'

In [7]:
s5 = pd.DataFrame() #'ll', 'entropy', 'rank', 'log-rank', 'llm_deviation'
selected = 'll'
s5[selected] = [1 if (lang in languages) and (prob>=auc_dict[selected][lang][use_th]) else 1 if (lang not in languages) and (prob>=auc_dict[selected][use_th]) else 0 for lang, prob in zip(dev['language'], dev[selected])]
selected = 'entropy'
s5[selected] = [1 if (lang in languages) and (prob>=auc_dict[selected][lang][use_th]) else 1 if (lang not in languages) and (prob>=auc_dict[selected][use_th]) else 0 for lang, prob in zip(dev['language'], dev[selected])]
selected = 'rank'
s5[selected] = [1 if (lang in languages) and (prob>=auc_dict[selected][lang][use_th]) else 1 if (lang not in languages) and (prob>=auc_dict[selected][use_th]) else 0 for lang, prob in zip(dev['language'], dev[selected])]
selected = 'log-rank'
s5[selected] = [1 if (lang in languages) and (prob>=auc_dict[selected][lang][use_th]) else 1 if (lang not in languages) and (prob>=auc_dict[selected][use_th]) else 0 for lang, prob in zip(dev['language'], dev[selected])]
selected = 'llm_deviation'
s5[selected] = [1 if (lang in languages) and (prob>=auc_dict[selected][lang][use_th]) else 1 if (lang not in languages) and (prob>=auc_dict[selected][use_th]) else 0 for lang, prob in zip(dev['language'], dev[selected])]
s5_dev = s5

In [8]:
selected = 'binoculars'
dev['bino'] = [1 if (lang in languages) and (prob>=auc_dict[selected][lang][use_th]) else 1 if (lang not in languages) and (prob>=auc_dict[selected][use_th]) else 0 for lang, prob in zip(dev['language'], dev[selected])]
s5_dev['bino'] = dev['bino']

In [9]:
selected = 'falcon'
dev['selected1'] = [1 if (lang in languages) and (prob>=auc_dict[selected][lang][use_th]) else 1 if (lang not in languages) and (prob>=auc_dict[selected][use_th]) else 0 for lang, prob in zip(dev['language'], dev[selected])]
selected = 'mistral'
dev['selected2'] = [1 if (lang in languages) and (prob>=auc_dict[selected][lang][use_th]) else 1 if (lang not in languages) and (prob>=auc_dict[selected][use_th]) else 0 for lang, prob in zip(dev['language'], dev[selected])]

In [10]:
#statistical part majority voting
dev['s3'] = [1 if x+y+z>=2 else 0 for x,y,z in zip(s5_dev['entropy'], s5_dev['rank'], s5_dev['bino'])]

In [11]:
#final majority voting
llm2s3 = [1 if x+y+z>=2 else 0 for x,y,z in zip(dev['selected1'], dev['selected2'], dev['s3'])]

In [12]:
print(classification_report(dev['label'], llm2s3, digits=5, output_dict=False))

              precision    recall  f1-score   support

           0    0.91258   0.89250   0.90243      2000
           1    0.89481   0.91450   0.90455      2000

    accuracy                        0.90350      4000
   macro avg    0.90370   0.90350   0.90349      4000
weighted avg    0.90370   0.90350   0.90349      4000



# Test set predictions

In [13]:
test = pd.read_json("data/subtaskA_multilingual.jsonl", lines=True)
test['language'] = [get_language(text) for text in test['text']]

file = "predictions/s5_test_predictions_probs.jsonl"
temp = pd.read_json(file, lines=True)
test[['ll', 'entropy', 'rank', 'log-rank', 'llm_deviation']] = temp[['ll', 'entropy', 'rank', 'log-rank', 'llm_deviation']]
file = "predictions/binoculars_test_predictions_probs.jsonl"
temp = pd.read_json(file, lines=True)
test['binoculars'] = 1-temp['probs'] #change to higher number represent "machine"
file = "predictions/falcon_test_predictions_probs.jsonl"
temp = pd.read_json(file, lines=True)
test['falcon'] = temp['probs']
file = "predictions/mistral_test_predictions_probs.jsonl"
temp = pd.read_json(file, lines=True)
test['mistral'] = temp['probs']

In [14]:
use_th = 'th_optim'

In [15]:
s5 = pd.DataFrame() #'ll', 'entropy', 'rank', 'log-rank', 'llm_deviation'
selected = 'll'
s5[selected] = [1 if (lang in languages) and (prob>=auc_dict[selected][lang][use_th]) else 1 if (lang not in languages) and (prob>=auc_dict[selected][use_th]) else 0 for lang, prob in zip(test['language'], test[selected])]
selected = 'entropy'
s5[selected] = [1 if (lang in languages) and (prob>=auc_dict[selected][lang][use_th]) else 1 if (lang not in languages) and (prob>=auc_dict[selected][use_th]) else 0 for lang, prob in zip(test['language'], test[selected])]
selected = 'rank'
s5[selected] = [1 if (lang in languages) and (prob>=auc_dict[selected][lang][use_th]) else 1 if (lang not in languages) and (prob>=auc_dict[selected][use_th]) else 0 for lang, prob in zip(test['language'], test[selected])]
selected = 'log-rank'
s5[selected] = [1 if (lang in languages) and (prob>=auc_dict[selected][lang][use_th]) else 1 if (lang not in languages) and (prob>=auc_dict[selected][use_th]) else 0 for lang, prob in zip(test['language'], test[selected])]
selected = 'llm_deviation'
s5[selected] = [1 if (lang in languages) and (prob>=auc_dict[selected][lang][use_th]) else 1 if (lang not in languages) and (prob>=auc_dict[selected][use_th]) else 0 for lang, prob in zip(test['language'], test[selected])]
s5_test = s5

In [16]:
selected = 'binoculars'
test['bino'] = [1 if (lang in languages) and (prob>=auc_dict[selected][lang][use_th]) else 1 if (lang not in languages) and (prob>=auc_dict[selected][use_th]) else 0 for lang, prob in zip(test['language'], test[selected])]
s5_test['bino'] = test['bino']

In [17]:
selected = 'falcon'
test['selected1'] = [1 if (lang in languages) and (prob>=auc_dict[selected][lang][use_th]) else 1 if (lang not in languages) and (prob>=auc_dict[selected][use_th]) else 0 for lang, prob in zip(test['language'], test[selected])]
selected = 'mistral'
test['selected2'] = [1 if (lang in languages) and (prob>=auc_dict[selected][lang][use_th]) else 1 if (lang not in languages) and (prob>=auc_dict[selected][use_th]) else 0 for lang, prob in zip(test['language'], test[selected])]

In [18]:
#statistical part majority voting
test['s3'] = [1 if x+y+z>=2 else 0 for x,y,z in zip(s5_test['entropy'], s5_test['rank'], s5_test['bino'])]

In [19]:
#final majority voting
llm2s3 = [1 if x+y+z>=2 else 0 for x,y,z in zip(test['selected1'], test['selected2'], test['s3'])]

In [20]:
if 'label' in test.columns:
  print(classification_report(test['label'], llm2s3, digits=5, output_dict=False))

              precision    recall  f1-score   support

           0    0.97519   0.91877   0.94614     20238
           1    0.92948   0.97864   0.95342     22140

    accuracy                        0.95004     42378
   macro avg    0.95233   0.94870   0.94978     42378
weighted avg    0.95131   0.95004   0.94994     42378



In [21]:
test['label'] = llm2s3
test[['id', 'label']].to_json(f"predictions/ensemble-llm2s3_test_predictions.jsonl", lines=True, orient='records')