In [1]:
import json
import numpy as np
import nltk
from nltk.corpus import sentiwordnet as swn
from sklearn.metrics import accuracy_score
from typing import Optional
import warnings
import seaborn as sns
import matplotlib.pyplot as plt
from nltk.corpus import wordnet as wn

In [2]:
# Load datasets
with open('./data/original_data/X_train.json', 'r') as file:
    X_train = json.load(file)
    
with open('./data/original_data/y_train.json', 'r') as file:
    y_train = json.load(file)
    
with open('./data/original_data/X_test.json', 'r') as file:
    X_test = json.load(file)
    
with open('./data/original_data/y_test.json', 'r') as file:
    y_test = json.load(file)
    
with open('./data/synsets/lesk_test_s_synsets.json', 'r') as file:
    test_synsets = json.load(file)

In [3]:
# Functions of the ipynb files:
def get_sentiment(synset:'Synset') -> Optional[tuple]:
    """
    Get sentiment scores for a synset.

    Parameters:
        synset (Synset): Synset to analyze.

    Returns:
        tuple: Tuple with positive, negative and objective scores if synset is found. None otherwise.
    """
    sentiment = swn.senti_synset(synset)
    return (sentiment.pos_score(), sentiment.neg_score(), sentiment.obj_score()) if sentiment else None

def score_synsets(synsets:list, score:str = 'obj', score_threshold:float = 0, merge_scores:str = 'mean', default:float = 0, hide_warnings:bool = False) -> float:
    """
    Compute a score for each synset in a list of synsets and merge them into a single score.

    Parameters:
        synsets (list): List of synsets.
        score (str): Score to compute. One of 'pos', 'neg', 'obj', 'max_score', 'dif', 'dif2', 'dif_threshold', 'dif2_threshold'.
        score_threshold (float): Threshold for 'dif_threshold' and 'dif2_threshold' scores. Scores below this threshold are set to 0.
        merge_scores (str): Method for merging scores into a single score. One of 'sum', 'mean', 'max', 'min', 'scale_norm1_mean', 'scale_norm2_mean'.
        default (float): Default score to return if synsets is empty.
        hide_warnings (bool): Whether to hide warnings or not.
        
    Returns:
        float: Merged score.
    """
    if len(synsets) == 0:
        if not hide_warnings:
            warnings.warn(f"Empty synsets list. Returning default score ({default}).", SyntaxWarning)
        return default

    if score == 'max_score' and merge_scores not in ['sum', 'mean']:
        if not hide_warnings:
            warnings.warn(f"Score 'max_score' is not compatible with '{merge_scores}'. Using 'sum' instead.", SyntaxWarning)
        merge_scores = 'sum'

    dict_score = {
        'pos': lambda s: s[0],
        'neg': lambda s: s[1], 
        'obj': lambda s: s[2],
        'max_score': lambda s: (-1 if s[0] > s[1] else 1) if s[0] != s[1] else 0,
        'dif': lambda s: s[0] - s[1],
        'dif2': lambda s: s[0]**2 - s[1]**2,
        'dif_threshold': lambda s: (s[0] if abs(s[0]) >= score_threshold else 0) - (s[1] if abs(s[1]) >= score_threshold else 0),
        'dif2_threshold': lambda s: (s[0]**2 if abs(s[0]) >= score_threshold else 0) - (s[1]**2 if abs(s[1]) >= score_threshold else 0),
        }
    
    assert score in dict_score.keys(), f"Score '{score}' not valid. Choose one of {list(dict_score.keys())}"
    
    def min_max_scale(scores:list[float|int]) -> list[float]:
        """
        Compute min-max scaling of a list of scores.

        Parameters:
            scores (list): List of scores.

        Returns:
            list: Scaled scores.
        """
        min_score = min(scores)
        max_score = max(scores)
        return [(s - min_score) / (max_score - min_score) for s in scores]

    dict_merge = {
        'sum': lambda sc: sum(sc),
        'mean': lambda sc: np.mean(sc),
        'max': lambda sc: max(sc),
        'min': lambda sc: min(sc),
        'scale_norm1_mean': lambda sc: np.mean(np.abs(min_max_scale(sc))),
        'scale_norm2_mean': lambda sc: np.linalg.norm(min_max_scale(sc)) / len(sc),
    }
    
    assert merge_scores in dict_merge.keys(), f"Merge score '{merge_scores}' not valid. Choose one of {list(dict_merge.keys())}"

    score_func = dict_score[score]
    scores = [score_func(get_sentiment(synset=s)) for s in synsets if s is not None]

    if merge_scores in ['scale_norm1_mean', 'scale_norm2_mean'] and min(scores) == max(scores):
        if not hide_warnings:
            warnings.warn(f"Scores are all the same and cannot be scaled. Returning default score ({default}).", RuntimeWarning)
        return default


    merge_func = dict_merge[merge_scores]
    scores_merged = merge_func(scores)

    return scores_merged

def discretize_scores(scores:list, threshold:float, positive_value = 1, negative_value = 0) -> list:
    """
    Returns a list of binary values based on a threshold.

    Parameters:
        scores (list): List of scores.
        threshold (float): Minimum value to consider a score as positive.
        positive_value: Value to assign to positive scores.
        negative_value: Value to assign to negative scores.
    """
    return list(map(lambda x: positive_value if x >= threshold else negative_value, scores))


def run_experiment(synsets:list, y:list, allowed:list, score:str, merge:str, thresh:float, score_thresh:float = 0, hide_warnings:bool = False, continuous:bool = False) -> tuple[float, list]:
    """
    Run an experiment with a set of parameters.

    Parameters:
        synsets (list): List of synsets.
        y (list): List of labels.
        allowed (list): Allowed POS tags.
        score (str): Score to compute.
        merge (str): Method to merge scores.
        thresh (float): Threshold for discretization.
        score_thresh (float): Threshold for 'dif_threshold' and 'dif2_threshold' scores.
        hide_warnings (bool): Whether to hide warnings or not.
        continuous (bool): Wheter to return continuous scores or not.

    Returns:
        tuple: Accuracy (float) and results (list of predictions).
    """
    scores_opinions = []
    for opinion in synsets:
        scores_sentences = []
        for sentence in opinion:
            filter_sentence = [name for name in sentence if name.split('.')[1] in allowed]
            scores_sentences.append(score_synsets(synsets=filter_sentence, score=score, merge_scores=merge, score_threshold=score_thresh, hide_warnings=hide_warnings))

        scores_opinions.append(np.mean(scores_sentences))

    if not continuous:
        results_opinions = discretize_scores(scores=scores_opinions, threshold=thresh)

        accuracy = accuracy_score(y, results_opinions)
    else:
        results_opinions = scores_opinions
        accuracy = 0.0

    return accuracy, results_opinions

In [4]:
def compare_wrong_predictions(y_true, y_pred1, y_pred2):
	"""
	Returns accuracy of model 2 in the wrong predictions of model 1.

	Parameters:
		y_true (list): List of true labels.
		y_pred1 (list): List of predictions of model 1.
		y_pred2 (list): List of predictions of model 2.
	
	Returns:
		float: Accuracy of model 2 in the wrong predictions of model 1.
	"""

	wrong_indices = [i for i in range(len(y_true)) if y_true[i] != y_pred1[i]]
	y_true_wrong = [y_true[i] for i in wrong_indices]
	y_pred2_wrong = [y_pred2[i] for i in wrong_indices]

	return accuracy_score(y_true_wrong, y_pred2_wrong)

In [6]:
# Get accuracy with wrong predictions of

# Model supervised
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import CountVectorizer
vectorizer_alternate = CountVectorizer(min_df=0.0, binary=True, stop_words='english', strip_accents='ascii')

X_train_alt = vectorizer_alternate.fit_transform(X_train)
X_test_alt = vectorizer_alternate.transform(X_test)

best_clf = RandomForestClassifier(max_depth=14, n_estimators=1500, random_state=42)
best_clf.fit(X_train_alt, y_train)
y_pred_sup = best_clf.predict(X_test_alt)

# Model unsupervised
_, y_pred_unsup = run_experiment(synsets=test_synsets, y=y_test, allowed=['a', 's', 'r', 'n'], score='dif', merge='sum', thresh=0, hide_warnings=True)

# Compare wrong predictions
acc_unsup = compare_wrong_predictions(y_true=y_test, y_pred1=y_pred_sup, y_pred2=y_pred_unsup)
print(f"Accuracy of unsupervised model in the wrong predictions of supervised model: {acc_unsup:.2f}")

acc_sup = compare_wrong_predictions(y_true=y_test, y_pred1=y_pred_unsup, y_pred2=y_pred_sup)
print(f"Accuracy of supervised model in the wrong predictions of unsupervised model: {acc_sup:.2f}")

Accuracy of unsupervised model in the wrong predictions of supervised model: 0.45
Accuracy of supervised model in the wrong predictions of unsupervised model: 0.78
