In [1]:
import json
import torch
from sklearn.dummy import DummyClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

from utils import load_data, safe_indexing
from utils import path_config as config

In [2]:
term_disambiguation_scores = {}
en_models  = ['en_multi_bert', 'en_bert', 'en_gpt']
ru_models  = ['ru_multi_bert', 'ru_bert', 'ru_gpt']

In [3]:
def term_disambiguation(train):
    label_to_id = train['label_to_id']
    id_to_label = {v: k for k, v in label_to_id.items()}
    X = train['features']
    y = [id_to_label[safe_indexing(y)] for y in train['labels']]
    assert len(X) == len(y)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
    scaler = StandardScaler()
    train_x = scaler.fit_transform(X_train)
    test_x = scaler.transform(X_test)

    majority_baseline = DummyClassifier(strategy='most_frequent', random_state=0) 
    majority_baseline.fit(train_x, y_train)
    y_pred = majority_baseline.predict(test_x)
    majority_f1 = round(f1_score(y_test, y_pred, average="macro", zero_division=0)*100, 2)
    print(f'F1 score (majority baseline) = {majority_f1}')

    random_baseline = DummyClassifier(strategy='uniform', random_state=0)
    random_baseline.fit(train_x, y_train)
    y_pred = random_baseline.predict(test_x)
    random_f1 = round(f1_score(y_test, y_pred, average="macro", zero_division=0)*100, 2)
    print(f'F1 score (random baseline) = {random_f1}')

    logreg = LogisticRegression(random_state=0, solver='saga')
    logreg.fit(train_x, y_train)
    y_pred=logreg.predict(test_x)
    true_f1 = round(f1_score(y_test, y_pred, average="macro", zero_division=0)*100, 2)
    print(f'F1 score (logistic regression) = {true_f1}')
    return {'majority': majority_f1, 'random': random_f1, 'f1': true_f1}

In [5]:
for model in en_models:
    print(f'\n{model}')
    data = torch.load(config[model])
    term_disambiguation_scores[model] = term_disambiguation(data)


en_multi_bert


F1 score (majority baseline) = 37.1
F1 score (random baseline) = 49.59
F1 score (logistic regression) = 88.41

en_bert
F1 score (majority baseline) = 37.16
F1 score (random baseline) = 49.52
F1 score (logistic regression) = 88.23

en_gpt
F1 score (majority baseline) = 37.28
F1 score (random baseline) = 49.54
F1 score (logistic regression) = 87.53


In [6]:
for model in ru_models:
    print(f'\n{model}')
    data = torch.load(config[model])
    term_disambiguation_scores[model] = term_disambiguation(data)


ru_multi_bert
F1 score (majority baseline) = 42.12
F1 score (random baseline) = 47.27
F1 score (logistic regression) = 75.37

ru_bert
F1 score (majority baseline) = 42.25
F1 score (random baseline) = 47.02
F1 score (logistic regression) = 74.95

ru_gpt
F1 score (majority baseline) = 42.59
F1 score (random baseline) = 46.8
F1 score (logistic regression) = 73.43


In [7]:
with open("term_disambiguation_scores.json", "w") as fp:
    json.dump(term_disambiguation_scores , fp)

In [6]:
theme_disambiguation_scores = {}

In [4]:
def theme_disambiguation(train, data_type, language='en'):
    X, y = load_data(train, data_type, language)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
    scaler = StandardScaler()
    train_x = scaler.fit_transform(X_train)
    test_x = scaler.transform(X_test)

    majority_baseline = DummyClassifier(strategy='most_frequent', random_state=0) 
    majority_baseline.fit(train_x, y_train)
    y_pred = majority_baseline.predict(test_x)
    majority_f1 = round(f1_score(y_test, y_pred, average="macro", zero_division=0)*100, 2)
    print(f'F1 score (majority baseline) = {majority_f1}')

    random_baseline = DummyClassifier(strategy='uniform', random_state=0)
    random_baseline.fit(train_x, y_train)
    y_pred = random_baseline.predict(test_x)
    random_f1 = round(f1_score(y_test, y_pred, average="macro", zero_division=0)*100, 2)
    print(f'F1 score (random baseline) = {random_f1}')

    logreg = LogisticRegression(random_state=0, solver='saga')
    logreg.fit(train_x, y_train)
    y_pred=logreg.predict(test_x)
    true_f1 = round(f1_score(y_test, y_pred, average="macro", zero_division=0)*100, 2) 
    print(f'F1 score (logistic regression) = {true_f1}')
    return {'majority': majority_f1, 'random': random_f1, 'f1': true_f1}

In [7]:
for model in en_models:
    print(f'\n{model} (sentence based)')
    data = torch.load(config[model])
    theme_disambiguation_scores[model] = {} 
    theme_disambiguation_scores[model]['sentence'] = theme_disambiguation(data, data_type='sentence', language='en')


en_multi_bert (sentence based)
F1 score (majority baseline) = 3.81
F1 score (random baseline) = 11.35




F1 score (logistic regression) = 52.48

en_bert (sentence based)
F1 score (majority baseline) = 3.81
F1 score (random baseline) = 11.35




F1 score (logistic regression) = 53.64

en_gpt (sentence based)
F1 score (majority baseline) = 3.81
F1 score (random baseline) = 11.35
F1 score (logistic regression) = 53.96




In [8]:
for model in ru_models:
    print(f'\n{model} (sentence based)')
    data = torch.load(config[model])
    theme_disambiguation_scores[model] = {} 
    theme_disambiguation_scores[model]['sentence'] = theme_disambiguation(data, data_type='sentence', language='ru')


ru_multi_bert (sentence based)
F1 score (majority baseline) = 4.4
F1 score (random baseline) = 9.75




F1 score (logistic regression) = 35.85

ru_bert (sentence based)
F1 score (majority baseline) = 4.4
F1 score (random baseline) = 9.75




F1 score (logistic regression) = 33.02

ru_gpt (sentence based)
F1 score (majority baseline) = 4.4
F1 score (random baseline) = 9.75
F1 score (logistic regression) = 37.84




In [9]:
for model in en_models:
    print(f'\n{model} (token based)')
    data = torch.load(config[model])
    theme_disambiguation_scores[model]['token'] = theme_disambiguation(data, data_type='token', language='en')


en_multi_bert (token based)


F1 score (majority baseline) = 3.91
F1 score (random baseline) = 9.54
F1 score (logistic regression) = 15.56

en_bert (token based)
F1 score (majority baseline) = 3.9
F1 score (random baseline) = 9.25
F1 score (logistic regression) = 16.02

en_gpt (token based)
F1 score (majority baseline) = 3.88
F1 score (random baseline) = 9.31




F1 score (logistic regression) = 24.8


In [10]:
for model in ru_models:
    print(f'\n{model} (token based)')
    data = torch.load(config[model])
    theme_disambiguation_scores[model]['token'] = theme_disambiguation(data, data_type='token', language='ru')


ru_multi_bert (token based)


F1 score (majority baseline) = 3.59
F1 score (random baseline) = 9.17




F1 score (logistic regression) = 16.86

ru_bert (token based)
F1 score (majority baseline) = 3.64
F1 score (random baseline) = 9.61
F1 score (logistic regression) = 25.17

ru_gpt (token based)
F1 score (majority baseline) = 3.65
F1 score (random baseline) = 9.36
F1 score (logistic regression) = 25.01




In [11]:
with open("theme_disambiguation_scores.json", "w") as fp:
    json.dump(theme_disambiguation_scores , fp)