In [1]:
import json
import pickle
import numpy as np
from collections import Counter
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import classification_report, precision_recall_fscore_support

In [2]:
def load_data(path):
    res = []
    f = open(path, "r")
    for line in f:
        res.append(json.loads(line))
    f.close()
    return res
    
def load_model(path):
    with open(path, 'rb') as f:
        return pickle.load(f)

In [3]:
# load data

train_df = load_data('../data/train.jsonl')
test_df = load_data('../data/test.jsonl')

target_names = {0: 'neutral', 1: 'positive', 2: 'negative'}

with open('../data/sentiment_lexicon.json') as f:
    sentiment_dict = json.load(f)

In [4]:
# Random approach

print('Random approach:')

for test_source_name in ['twitter', 'lj', 'lenta', 'all',]:
    true_y = []    
    for sample in test_df:
        if test_source_name != sample['source'] and test_source_name != 'all':
            continue
        
        true_y.append(sample['label'])

    # The sentiment label is chosen randomly for each aspect
    pred_y = np.random.randint(0, 3, len(true_y))

    # The accuracy of the obtained models is measured with the F1 metric
    p_micro, r_micro, f_micro, _ = precision_recall_fscore_support(true_y, pred_y, average="micro")
    p_macro, r_macro, f_macro, _ = precision_recall_fscore_support(true_y, pred_y, average="macro")

    print(f'Test source name: {test_source_name}\t, F1-mic.: {round(f_micro, 2)};\t F1-mac.:{round(f_macro, 2)}')

print(classification_report(true_y, pred_y, target_names=target_names.values()))

Random approach:
Test source name: twitter	, F1-mic.: 0.34;	 F1-mac.:0.33
Test source name: lj	, F1-mic.: 0.34;	 F1-mac.:0.33
Test source name: lenta	, F1-mic.: 0.34;	 F1-mac.:0.32
Test source name: all	, F1-mic.: 0.31;	 F1-mac.:0.3
              precision    recall  f1-score   support

     neutral       0.42      0.30      0.35       614
    positive       0.32      0.33      0.33       460
    negative       0.17      0.27      0.21       267

    accuracy                           0.31      1341
   macro avg       0.30      0.30      0.30      1341
weighted avg       0.34      0.31      0.31      1341



In [5]:
# Lexicon approach

print('Lexicon approach:')

for test_source_name in ['twitter', 'lj', 'lenta', 'all',]:
    true_y, pred_y = [], []
    
    true_y = []    
    for sample in test_df:
        if test_source_name != sample['source'] and test_source_name != 'all':
            continue
        
        true_y.append(sample['label'])
        sample_lemms = [word['lemma'].lower() for sentence in sample['context']['sentences'] for word in sentence]
        
        vocab_labels = []
        for word_lemm in sample_lemms:
            for label in target_names.keys():
                if target_names[label] == 'neutral':
                    continue

                # check the word lemma in the sentiment dictionaries
                elif word_lemm in sentiment_dict[target_names[label]]:
                    vocab_labels.append(label)

        count_sent = Counter(vocab_labels)
        if vocab_labels == []:
            pred_y.append(0)
        elif count_sent[1] >= count_sent[2]:
            pred_y.append(1)
        else:
            pred_y.append(2)

    # The accuracy of the obtained models is measured with the F1 metric
    p_micro, r_micro, f_micro, _ = precision_recall_fscore_support(true_y, pred_y, average="micro")
    p_macro, r_macro, f_macro, _ = precision_recall_fscore_support(true_y, pred_y, average="macro")

    print(f'Test source name: {test_source_name}\t, F1-mic.: {round(f_micro, 2)};\t F1-mac.:{round(f_macro, 2)}')

print(classification_report(true_y, pred_y, target_names=target_names.values(), zero_division=0))

Lexicon approach:


  _warn_prf(average, modifier, msg_start, len(result))


Test source name: twitter	, F1-mic.: 0.48;	 F1-mac.:0.36
Test source name: lj	, F1-mic.: 0.42;	 F1-mac.:0.34
Test source name: lenta	, F1-mic.: 0.35;	 F1-mac.:0.34
Test source name: all	, F1-mic.: 0.41;	 F1-mac.:0.35
              precision    recall  f1-score   support

     neutral       0.25      0.00      0.00       614
    positive       0.44      0.80      0.57       460
    negative       0.36      0.69      0.48       267

    accuracy                           0.41      1341
   macro avg       0.35      0.50      0.35      1341
weighted avg       0.34      0.41      0.29      1341



In [6]:
# TPOT (ELMo)

print('TPOT(ELMo) model:\n')

# load TPOT model
tpot_model = load_model('../models/tpot_elmo_model.pkl')

# load ELMO embedding vectors
df = load_model('../data/elmo_vec.pkl')

for test_source_name in ['twitter', 'lj', 'lenta', 'all',]:
    test_x, test_y, pred_y = [], [], []
    
    for sample in df['test']:
        if test_source_name != sample['source'] and test_source_name != 'all':
            continue
        
        test_x.append(sample['vec'])
        test_y.append(sample['label'])

    pred_y = tpot_model.predict(test_x)

    p_micro,r_micro,f_micro,_ = precision_recall_fscore_support(test_y, pred_y, average="micro")
    p_macro,r_macro,f_macro,_ = precision_recall_fscore_support(test_y, pred_y, average="macro")

    print(f'Test source name: {test_source_name}\t, F1-mic.: {round(f_micro, 2)};\t F1-mac.:{round(f_macro, 2)}')
    
print(classification_report(true_y, pred_y, target_names=target_names.values()))

TPOT(ELMo) model:

Test source name: twitter	, F1-mic.: 0.63;	 F1-mac.:0.57
Test source name: lj	, F1-mic.: 0.59;	 F1-mac.:0.57
Test source name: lenta	, F1-mic.: 0.74;	 F1-mac.:0.71
Test source name: all	, F1-mic.: 0.66;	 F1-mac.:0.65
              precision    recall  f1-score   support

     neutral       0.77      0.57      0.66       614
    positive       0.68      0.73      0.70       460
    negative       0.51      0.73      0.60       267

    accuracy                           0.66      1341
   macro avg       0.65      0.68      0.65      1341
weighted avg       0.68      0.66      0.66      1341



In [7]:
print("Successful complete")

Successful complete
