In [None]:
! pip install fugashi ipadic # Japanese language processing libraries
! pip install -U accelerate # Hugging Face's Accelerate library for mixed precision and distributed training
! pip install -U transformers # Hugging Face's library for pretrained models
! pip install datasets # Library for managing datasets

In [None]:
# Required Libraries
import torch
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [None]:
! wget https://github.com/ids-cv/wrime/raw/master/wrime-ver2.tsv

In [None]:
df_wrime = pd.read_table('wrime-ver2.tsv')

In [None]:
# Calculate the max emotion intensity for a row
def max_emotion_intensity(row):
    avg_readers_emotions = {emotion: row[emotion] for emotion in row.index if emotion.startswith('Avg. Readers_') and emotion != 'Avg. Readers_Sentiment'}
    max_intensity = max(avg_readers_emotions.values())
    return max_intensity

# Apply the function to each row to get the max emotion intensity
df_wrime['Max_Emotion_Intensity'] = df_wrime.apply(max_emotion_intensity, axis=1)

# Filter out rows where the max emotion intensity is 0
df_wrime = df_wrime[df_wrime['Max_Emotion_Intensity'] > 0]

In [None]:
# Define the sentiment scores for each emotion
emotion_sentiment_scores = {
    "Joy": 1.117353,
    "Sadness": -1.029128,
    "Anticipation": 0.653459,
    "Surprise": 0.050611,
    "Anger": -1.365032,
    "Fear": -0.892089,
    "Disgust": -1.148589,
    "Trust": 1.197159
}

def adjust_primary_emotion_intensity_for_ties(row):
    # Extract only the Avg. Readers_ emotions and their scores, excluding Avg. Readers_Sentiment
    avg_readers_emotions = {emotion: row[emotion] for emotion in row.index if emotion.startswith('Avg. Readers_') and emotion != 'Avg. Readers_Sentiment'}

    # Find the maximum intensity among the Avg. Readers_ emotions
    max_intensity = max(avg_readers_emotions.values())
    # Count how many emotions share this maximum intensity
    emotions_with_max_intensity = [emotion for emotion, intensity in avg_readers_emotions.items() if intensity == max_intensity]

    # Proceed only if there's a tie for the maximum intensity
    if len(emotions_with_max_intensity) > 1:
        avg_readers_sentiment = row['Avg. Readers_Sentiment']

        # Find the emotion (from those with max intensity) whose sentiment score is closest to the Avg. Readers_Sentiment
        closest_emotion = min(emotions_with_max_intensity, key=lambda x: abs(emotion_sentiment_scores[x.replace('Avg. Readers_', '')] - avg_readers_sentiment))

        # Adjust the intensity of the closest emotion by adding an insignificant number (0.01)
        row[closest_emotion] += 0.01
        row['Max_Emotion_Intensity'] = max_intensity + 0.01

    return row

# Apply the adjustment function to each row
df_wrime = df_wrime.apply(adjust_primary_emotion_intensity_for_ties, axis=1)

In [None]:
# Function to determine the primary emotion for each row
def determine_primary_emotion(row):
    # Extract only the Avg. Readers_ emotions and their scores, excluding Avg. Readers_Sentiment
    avg_readers_emotions = {emotion: row[emotion] for emotion in row.index if emotion.startswith('Avg. Readers_') and emotion != 'Avg. Readers_Sentiment'}

    # Find the emotion with the highest intensity
    primary_emotion = max(avg_readers_emotions, key=avg_readers_emotions.get)
    return primary_emotion.replace('Avg. Readers_', '')

# Apply the function to each row to identify the primary emotion
df_wrime['Primary_Emotion'] = df_wrime.apply(determine_primary_emotion, axis=1)

print(df_wrime['Primary_Emotion'].value_counts())

In [None]:
# Splitting the DataFrame into train, validation, and test sets
# First, split into train+dev and test
train_dev_df, df_test = train_test_split(df_wrime, test_size=0.2, random_state=42)
# Then, split train+dev into actual train and dev
df_train, df_dev = train_test_split(train_dev_df, test_size=0.25, random_state=42)  # 0.25 x 0.8 = 0.2

In [None]:
#Creat easy dataset
# List of emotion columns to check
emotion_columns = [
    'Avg. Readers_Joy', 'Avg. Readers_Sadness', 'Avg. Readers_Anticipation',
    'Avg. Readers_Surprise', 'Avg. Readers_Anger', 'Avg. Readers_Fear',
    'Avg. Readers_Disgust', 'Avg. Readers_Trust'
]

# Values to remove
values_to_remove = [1.01, 2.01, 3.01]
# Remove rows where any of the emotion columns have values in values_to_remove to creat easy dataset
easy_df_train = df_train[~df_train[emotion_columns].apply(lambda x: x.isin(values_to_remove).any(), axis=1)]
easy_df_dev = df_dev[~df_dev[emotion_columns].apply(lambda x: x.isin(values_to_remove).any(), axis=1)]
easy_df_test = df_test[~df_test[emotion_columns].apply(lambda x: x.isin(values_to_remove).any(), axis=1)]

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig

checkpoint = 'cl-tohoku/bert-base-japanese-whole-word-masking'

# Initialize the tokenizer
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

In [None]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"Using GPU: {torch.cuda.get_device_name(0)}")
else:
    device = torch.device("cpu")
    print("Using CPU")

In [None]:
#Load list models
from transformers import AutoModelForSequenceClassification

neutral_emotional_classifier = AutoModelForSequenceClassification.from_pretrained('experiarms777/Neutral_Emotional_Classifier')

positive_negative_classifier = AutoModelForSequenceClassification.from_pretrained('experiarms777/Positive_Negative_Japanese_Classifier')

eight_emotions_classifier = AutoModelForSequenceClassification.from_pretrained('experiarms777/Eight_Emotions_Japanese_Classifier')

surprise_otheremotions_classifier = AutoModelForSequenceClassification.from_pretrained("experiarms777/Surprise_Detection_Japanese")

negative_emotions_classifier = AutoModelForSequenceClassification.from_pretrained("experiarms777/Negative_Emotions_Classifier_Japanese")

positive_emotions_classifier = AutoModelForSequenceClassification.from_pretrained("experiarms777/Positive_Emotions_Classifier_Japanese")

positive_verypositive_classifier = AutoModelForSequenceClassification.from_pretrained("experiarms777/Positive_Very_Positive_Classifier_Japanese")

negative_verynegative_classifier = AutoModelForSequenceClassification.from_pretrained("experiarms777/Negative_Very_Negative_Classifier_Japanese")

joy_trust_classifier = AutoModelForSequenceClassification.from_pretrained("experiarms777/Joy_Trust_Classifier_Japanese")

anger_disgust_classifier = AutoModelForSequenceClassification.from_pretrained("experiarms777/Anger_Disgust_Classifier_Japanese")

In [None]:
#Load single label models
from transformers import AutoModelForSequenceClassification

neutral_emotional_classifier = AutoModelForSequenceClassification.from_pretrained('experiarms777/Neutral_Emotional_Classifier')

positive_negative_classifier = AutoModelForSequenceClassification.from_pretrained('experiarms777/Positive_Negative_Japanese_Classifier')

eight_emotions_classifier = AutoModelForSequenceClassification.from_pretrained('experiarms777/Single_Label_Eight_Emotions_Classifier_Japanese')

surprise_otheremotions_classifier = AutoModelForSequenceClassification.from_pretrained("experiarms777/Surprise_Detection_Japanese")

negative_emotions_classifier = AutoModelForSequenceClassification.from_pretrained("experiarms777/Single_Label_Negative_Emotions_Classifier_Japanese")

positive_emotions_classifier = AutoModelForSequenceClassification.from_pretrained("experiarms777/Single_Label_Positive_Emotions_Classifier_Japanese")

positive_verypositive_classifier = AutoModelForSequenceClassification.from_pretrained("experiarms777/Positive_Very_Positive_Classifier_Japanese")

negative_verynegative_classifier = AutoModelForSequenceClassification.from_pretrained("experiarms777/Negative_Very_Negative_Classifier_Japanese")

joy_trust_classifier = AutoModelForSequenceClassification.from_pretrained("experiarms777/Single_Label_Joy_Trust_Classifier_Japanese")

anger_disgust_classifier = AutoModelForSequenceClassification.from_pretrained("experiarms777/Single_Label_Anger_Disgust_Classifier_Japanese")

In [None]:
neutral_emotional_classifier = neutral_emotional_classifier.to(device)
positive_negative_classifier = positive_negative_classifier.to(device)
eight_emotions_classifier = eight_emotions_classifier.to(device)
surprise_otheremotions_classifier = surprise_otheremotions_classifier.to(device)
negative_emotions_classifier = negative_emotions_classifier.to(device)
positive_emotions_classifier = positive_emotions_classifier.to(device)
positive_verypositive_classifier = positive_verypositive_classifier.to(device)
negative_verynegative_classifier = negative_verynegative_classifier.to(device)
joy_trust_classifier = joy_trust_classifier.to(device)
anger_disgust_classifier = anger_disgust_classifier.to(device)


In [None]:
def classify_neutral_postivenegative(inputs, threshold):
    # neutral_emotional_classifier is trained to classify between Neutral and Positive/Negative
    with torch.no_grad():
        outputs = neutral_emotional_classifier(**inputs)
    probabilities = torch.softmax(outputs.logits, dim=-1).squeeze().tolist()
    # Classify as "Positive or Negative" if the probability exceeds the threshold
    # probabilities[1] corresponds to the "Positive/Negative" class
    if probabilities[1] > threshold:
        return "Positive or Negative"
    else:
        return "Neutral"

In [None]:
def classify_eight_emotions(inputs, threshold = 0.4):
    # eight_emotions_classifier is trained to classify between all the eight emotions
    with torch.no_grad():
        outputs = eight_emotions_classifier(**inputs)
    probabilities = torch.softmax(outputs.logits, dim=-1).squeeze().tolist()
    # probabilities[7] corresponds to the "Trust" class
    if probabilities[7] > threshold:
        return "Trust"
    # Process the model output (e.g., extract the predicted class)
    predictions = torch.argmax(outputs.logits, dim=-1)
    labels = [ 'Joy', 'Sadness', 'Anticipation', 'Surprise', 'Anger', 'Fear', 'Disgust', 'Trust']
    predicted_label = labels[predictions.item()]
    return predicted_label

In [None]:
def classify_positive_negative(inputs, threshold):
    # positive_negative_classifier is trained to classify between Postive and Negative
    with torch.no_grad():
        outputs = positive_negative_classifier(**inputs)
    probabilities = torch.softmax(outputs.logits, dim=-1).squeeze().tolist()
    # probabilities[1] corresponds to the "Positive" class and probabilities[0] corresponds to the "Negative"
    if probabilities[1] > threshold:
        return "Positive"
    elif probabilities[0] > threshold:
        return "Negative"
    else:
      return "Neutral"

In [None]:
def classify_surprise_other(inputs, threshold):
    # surprise_otheremotions_classifier is trained to classify between Surprise and Other Emotions
    with torch.no_grad():
        outputs = surprise_otheremotions_classifier(**inputs)
    probabilities = torch.softmax(outputs.logits, dim=-1).squeeze().tolist()
    # Classify as "Surprise" if the probability exceeds the threshold
    # Assumes probabilities[1] corresponds to the "Surprise" class
    if probabilities[1] > threshold:
        return "Surprise"
    else:
      return "Other Emotions"

In [None]:
def classify_Joy_Trust(inputs, threshold):
    # joy_trust_classifier is trained to classify between Joy and Trust
    with torch.no_grad():
        outputs = joy_trust_classifier(**inputs)
    probabilities = torch.softmax(outputs.logits, dim=-1).squeeze().tolist()
    # Classify as "Joy" if the probability exceeds the threshold
    if probabilities[0] > threshold:
        return "Joy"
    else:
      return "Trust"

In [None]:
def classify_Anger_Disgust(inputs, threshold):
    # anger_disgust_classifier is trained to classify between Anger and Disgust
    with torch.no_grad():
        outputs = anger_disgust_classifier(**inputs)
    probabilities = torch.softmax(outputs.logits, dim=-1).squeeze().tolist()
    # Classify as "Disgust" if the probability exceeds the threshold
    if probabilities[1] > threshold:
        return "Disgust"
    else:
      return "Anger"

In [None]:
def classify_positive_verypositive(inputs):
    with torch.no_grad():
        # Predict with the model
        outputs = positive_verypositive_classifier(**inputs)
    probabilities = torch.softmax(outputs.logits, dim=-1).squeeze().tolist()
    # Extract the very positive probability
    very_positive_probability = probabilities[1]
    return very_positive_probability

def adjust_probabilities_with_positivity(probabilities, inputs):
    """
    Adjust probabilities based on the positivity score and the inherent sentiment score of each emotion.
    """
    positivity_score = classify_positive_verypositive(inputs)
    adjusted_probabilities = []
    sentiment_scores = [1.117353, 0.653459, 1.197159]  # Joy, Anticipation, Trust sentiment scores
    max_sentiment_score = max(sentiment_scores)

    for prob, sentiment_score in zip(probabilities, sentiment_scores):
        adjustment_factor = 1 + (sentiment_score / max_sentiment_score) * positivity_score
        adjusted_prob = prob * adjustment_factor
        adjusted_probabilities.append(adjusted_prob)

    total = sum(adjusted_probabilities)
    normalized_probabilities = [prob / total for prob in adjusted_probabilities]

    return normalized_probabilities

def classify_positive_emotions(inputs, threshold = 0.4):
    with torch.no_grad():
        outputs = positive_emotions_classifier(**inputs)
    probabilities = torch.softmax(outputs.logits, dim=-1).squeeze().tolist()
    adjusted_probabilities = adjust_probabilities_with_positivity(probabilities, inputs)
    predictions = torch.argmax(torch.tensor(adjusted_probabilities), dim=-1)
    labels = ['Joy', 'Anticipation', 'Trust']
    if adjusted_probabilities[2] > threshold:
        return "Trust"
    predicted_label = labels[predictions.item()]
    return predicted_label

In [None]:
def classify_negative_verynegative(inputs):
    with torch.no_grad():
        # Predict with the model
        outputs = negative_verynegative_classifier(**inputs)
    probabilities = torch.softmax(outputs.logits, dim=-1).squeeze().tolist()
    # Extract the very positive probability
    very_negative_probability = probabilities[1]
    return very_negative_probability

def adjust_probabilities_with_negativity(probabilities, inputs):
    """
    Adjust probabilities based on the negativity score and the inherent sentiment score of each emotion.
    """
    negativity_score = classify_negative_verynegative(inputs)
    adjusted_probabilities = []
    sentiment_scores = [-1.029128, -1.365032, -0.892089, -1.148589] #'Avg. Readers_Sadness','Avg. Readers_Anger', 'Avg. Readers_Fear', 'Avg. Readers_Disgust'
    max_sentiment_score = min(sentiment_scores)

    for prob, sentiment_score in zip(probabilities, sentiment_scores):
        adjustment_factor = 1 + (sentiment_score / max_sentiment_score) * negativity_score
        adjusted_prob = prob * adjustment_factor
        adjusted_probabilities.append(adjusted_prob)

    total = sum(adjusted_probabilities)
    normalized_probabilities = [prob / total for prob in adjusted_probabilities]

    return normalized_probabilities

def classify_negative_emotions(inputs):
    with torch.no_grad():
        outputs = negative_emotions_classifier(**inputs)
    probabilities = torch.softmax(outputs.logits, dim=-1).squeeze().tolist()
    adjusted_probabilities = adjust_probabilities_with_negativity(probabilities, inputs)
    predictions = torch.argmax(torch.tensor(adjusted_probabilities), dim=-1)
    labels = ['Sadness','Anger', 'Fear', 'Disgust']
    predicted_label = labels[predictions.item()]
    return predicted_label

In [None]:
def predict_primary_emotion(text, thresholds_neutral_positive_negative, thresholds_positive_negative, thresholds_surprise_other):
    # Neutral vs. Positive/Negative
    sentiment = classify_neutral_postivenegative(text, thresholds_neutral_positive_negative) # Neutral or Positive・Negative
    if sentiment == "Neutral":
        # Handle Neutral-specific classifications
        if classify_surprise_other(text, thresholds_surprise_other) == 'Surprise':
          return 'Surprise'
        else:
          predicted_emotion = classify_eight_emotions(text)
          return predicted_emotion

    elif sentiment == "Positive or Negative":
        # Classify Positive/Negative texts
        if classify_positive_negative(text, thresholds_positive_negative) == 'Positive':
          positive_emotion = classify_positive_emotions(text)
          return positive_emotion
        elif classify_positive_negative(text, thresholds_positive_negative) == 'Negative':
          negative_emotion = classify_negative_emotions(text)
          return negative_emotion
        else:
          if classify_surprise_other(text, thresholds_surprise_other) == 'Surprise':
            return 'Surprise'
          else:
            predicted_emotion = classify_eight_emotions(text)
            return predicted_emotion

def main(text, thresholds_neutral_positive_negative, thresholds_positive_negative, thresholds_surprise_other, thresholds_Joy_Trust, thresholds_Anger_Disgust):
    # Implement model loading, text preprocessing, and classification
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
    inputs = {k: v.to(device) for k, v in inputs.items()}
    primary_emotion = predict_primary_emotion(inputs, thresholds_neutral_positive_negative, thresholds_positive_negative, thresholds_surprise_other)
    if primary_emotion == 'Joy':
      predicted_emotion = classify_Joy_Trust(inputs,thresholds_Joy_Trust)
      return predicted_emotion
    elif primary_emotion == 'Disgust':
      predicted_emotion = classify_Anger_Disgust(inputs, thresholds_Anger_Disgust)
      return predicted_emotion
    else:
      return primary_emotion


In [None]:
threshold_npn = 0.9
threshold_pn = 0.9
threshold_so = 0.9
threshold_ad = 0.4
threshold_jt = 0.3

In [None]:
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

predicted_emotions = df_test['Sentence'].apply(lambda x: main(x, threshold_npn, threshold_pn, threshold_so, threshold_ad, threshold_jt))

# True labels
true_emotions = df_test['Primary_Emotion']

# Calculate metrics
accuracy = accuracy_score(true_emotions, predicted_emotions)
precision = precision_score(true_emotions, predicted_emotions, average='macro', zero_division=0)
recall = recall_score(true_emotions, predicted_emotions, average='macro', zero_division=0)
f1 = f1_score(true_emotions, predicted_emotions, average='macro', zero_division=0)

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

# For a detailed classification report
print(classification_report(true_emotions, predicted_emotions))


In [None]:
# Add the predictions to the DataFrame
df_test['Predicted_Emotion'] = predicted_emotions

# Filter for rows where the prediction does not match the true label
incorrect_predictions = df_test[df_test['Primary_Emotion'] != df_test['Predicted_Emotion']]

# Display the sentences with incorrect predictions along with their true and predicted labels
incorrect_predictions[['Sentence', 'Primary_Emotion', 'Predicted_Emotion']].to_csv('listalonelincorrect_predictions.csv', index=False)



In [None]:
thresholds_neutral_positive_negative = [0.9]
thresholds_positive_negative = [0.9]
thresholds_surprise_other = [0.9]
thresholds_Anger_Disgust = [0.4]
thresholds_Joy_Trust = [0.3, 0.4, 0.5]

In [None]:
#Grid search for thresholds
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

best_f1 = 0
best_thresholds = (0, 0, 0, 0, 0)

for threshold_npn in thresholds_neutral_positive_negative:
  for threshold_pn in thresholds_positive_negative:
    for threshold_so in thresholds_surprise_other:
       for threshold_ad in thresholds_Anger_Disgust:
        for threshold_jt in thresholds_Joy_Trust:
          current_accuracy, precision, recall, f1 = evaluate_model_with_thresholds(df_dev, threshold_npn, threshold_pn, threshold_so, threshold_ad, threshold_jt)
          print(f"Accuracy with NPN={threshold_npn} and PN={threshold_pn} and SO={threshold_so} and AD={threshold_ad} and JT={threshold_jt}: {current_accuracy} {precision} {recall} {f1}")

          if f1 > best_f1:
              best_f1 = f1
              best_thresholds = (threshold_npn, threshold_pn, threshold_so, threshold_ad, threshold_jt)

print(f"Best thresholds: NPN={best_thresholds[0]}, PN={best_thresholds[1]}, SO={best_thresholds[2]}, AD={best_thresholds[3]}, JT={best_thresholds[4]} with accuracy: {best_f1}")