In [1]:
"""
Example usage of SentimentFlow with the expanded NRC lexicon.
"""

from nrclex import NRCLex
from ExpandNRC import EmotionDistanceCalculator
import matplotlib.pyplot as plt

# Use local lexicon json for updated version of NRC
feelings_nrc = NRCLex("/Users/Panos/Library/CloudStorage/Dropbox/PI_Squared/PycharmProjects/Research/NRCLex/nrc_v3.json")
emotion_lexicon = feelings_nrc.__lexicon__

# Initialize the calculator (using CPU for simplicity)
calculator = EmotionDistanceCalculator(emotion_lexicon, device="cpu")

# Demonstration: get emotions for single words and a batch
result_single = calculator.get_emotions("happy")
print("Emotions for 'happy':", result_single)

result_batch = calculator.get_emotions(["happy", "sad", "morning"])
print("Emotions for batch:", result_batch)

nrc_emotions = calculator.nrc_emotions(["happy", "sad", "morning"])
print("NRC emotions:", nrc_emotions)

Device set to use cpu


Loaded cached embeddings for 4566 words from /Users/Panos/.cache/ExpandNRC/lexicon_embeddings_cache.pkl
Emotions for 'happy': {'happy': {'anticipation': np.float64(1.0), 'joy': np.float64(1.0), 'trust': np.float64(1.0)}}
Emotions for batch: {'happy': {'anticipation': np.float64(1.0), 'joy': np.float64(1.0), 'trust': np.float64(1.0)}, 'sad': {'fear': np.float64(0.5472803863453726), 'sadness': np.float64(0.6876299497867969)}, 'morning': {'neutral': np.float64(0.5074773250951323)}}
NRC emotions: {'happy': ['anticipation', 'joy', 'trust'], 'sad': ['fear', 'sadness'], 'morning': ['neutral']}


In [6]:
# Helper function to extract a numeric value from a value
def get_numeric_value(val):
    if isinstance(val, (int, float)):
        return val
    elif isinstance(val, dict):
        # Sum numeric values within the nested dictionary
        total = 0
        for sub_val in val.values():
            if isinstance(sub_val, (int, float)):
                total += sub_val
        return total
    else:
        return 0

# Function to aggregate emotion dictionaries.
def aggregate_emotions(emotion_results):
    aggregated = {}
    for emo in emotion_results:
        if isinstance(emo, dict):
            for key, value in emo.items():
                # Use our helper to get a numeric value for each emotion
                aggregated[key] = aggregated.get(key, 0) + get_numeric_value(value)
        else:
            # Skip results that are not dictionaries
            print("Warning: Expected dictionary, got:", emo)
    return aggregated

# Function to compute a simple sentiment score from aggregated emotions.
# Positive emotions: joy and trust; Negative emotions: anger, fear, sadness, disgust.
def compute_sentiment(aggregated_emotions):
    positive = aggregated_emotions.get("joy", 0) + aggregated_emotions.get("trust", 0)
    negative = (aggregated_emotions.get("anger", 0) +
                aggregated_emotions.get("fear", 0) +
                aggregated_emotions.get("sadness", 0) +
                aggregated_emotions.get("disgust", 0))
    return positive - negative

# Sample text to analyze
sample_text = """
I am very happy today. The sun is shining and I feel full of energy.
However, sometimes I feel a tinge of sadness when I think about the past.
Overall, I trust that better days are coming.
"""

# Split the sample text into sentences (a simple period-based split)
sentences = [sentence.strip() for sentence in sample_text.split('.') if sentence.strip()]

sentiment_scores = []
for sentence in sentences:
    # Split sentence into words (basic tokenization; consider using a more robust tokenizer)
    words = sentence.split()
    # Process each word individually to get its emotion score
    word_emotions = [calculator.get_emotions(word) for word in words]
    # Aggregate emotion scores for the entire sentence
    agg_emotions = aggregate_emotions(word_emotions)
    # Compute sentiment score for the sentence
    score = compute_sentiment(agg_emotions)
    sentiment_scores.append(score)
    print(f"Sentence: {sentence}")
    print(f"Aggregated Emotions: {agg_emotions}")
    print(f"Sentiment score: {score}")

# Plot the sentiment flow over sentences
plt.figure(figsize=(10, 4))
plt.plot(sentiment_scores, marker='o')
plt.title("Sentiment Flow")
plt.xlabel("Sentence Index")
plt.ylabel("Sentiment Score")
plt.grid(True)
plt.show()

Sentence: I am very happy today
Aggregated Emotions: {'I': 0.5, 'am': 0.5, 'very': 0.5, 'happy': np.float64(3.0), 'today': 0.5}
Sentiment score: 0
Sentence: The sun is shining and I feel full of energy
Aggregated Emotions: {'The': np.float64(1.5787563877728714), 'sun': np.float64(2.557617025706988), 'is': 0.5, 'shining': np.float64(4.619284115727158), 'and': 0.5, 'I': 0.5, 'feel': np.float64(1.0), 'full': np.float64(0.5788951832837793), 'of': 0.5, 'energy': 0.5}
Sentiment score: 0
Sentence: However, sometimes I feel a tinge of sadness when I think about the past
Aggregated Emotions: {'However,': 0.5, 'sometimes': 0.5, 'I': 1.0, 'feel': np.float64(1.0), 'a': 0.5, 'tinge': 0.5, 'of': 0.5, 'sadness': np.float64(3.5112136562423255), 'when': 0.5, 'think': np.float64(1.5090485987139286), 'about': 0.5, 'the': np.float64(1.5787563877728714), 'past': 0.5}
Sentiment score: -3.5112136562423255
Sentence: Overall, I trust that better days are coming
Aggregated Emotions: {'Overall,': 0.5, 'I': 0.5, 

KeyboardInterrupt: 

In [17]:
from SentimentFlow import SpeechProcessor
feelings_nrc = NRCLex("/Users/Panos/Library/CloudStorage/Dropbox/PI_Squared/PycharmProjects/Research/NRCLex/nrc_v3.json")
processor = SpeechProcessor("../data/senticnet.tsv")

In [19]:
"""
Comparison: Lexicon evolution (using nrc_emotions) vs. SentimentFlow simulation evolution.
"""

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.integrate import odeint

# --- Setup for Lexicon Analysis using ExpandNRC ---
from nrclex import NRCLex
from ExpandNRC import EmotionDistanceCalculator

# Load the updated NRC lexicon JSON (update the path as needed)
feelings_nrc = NRCLex("/Users/Panos/Library/CloudStorage/Dropbox/PI_Squared/PycharmProjects/Research/NRCLex/nrc_v3.json")
emotion_lexicon = feelings_nrc.__lexicon__

# Initialize the calculator (using CPU)
calculator = EmotionDistanceCalculator(emotion_lexicon, device="cpu")

# We'll use the nrc_emotions() method so that we get only the emotion labels.
# Define the standard NRC emotions (you can adjust this list as needed)
nrc_emotions_list = ["anger", "anticipation", "disgust", "fear", "joy", "sadness", "surprise", "trust"]

# --- Lexicon-based Evolution: Split text into sentences and, for each sentence,
# use nrc_emotions on its words to produce a binary indicator per emotion:
# 0 if at least one word in the sentence has that emotion, 1 otherwise.
sample_text = (
    "I am very happy today. The sun is shining and I feel full of energy. "
    "However, sometimes I feel a tinge of sadness when I think about the past. "
    "Overall, I trust that better days are coming."
)
sentences = [s.strip() for s in sample_text.split('.') if s.strip()]

# For each sentence, produce a dictionary: {emotion: 0 (if present) or 1 (if absent)}
lexicon_evolution = {emo: [] for emo in nrc_emotions_list}
for sentence in sentences:
    words = sentence.split()  # basic tokenization
    # nrc_emotions() expects a list of words; assume it returns a list of lists,
    # one per word, each being the set of emotions detected.
    word_emotions = calculator.nrc_emotions(words)
    # For each NRC emotion, set value 0 if at least one word has that emotion; else 1.
    for emo in nrc_emotions_list:
        present = any( (isinstance(em_list, list) and emo in em_list) for em_list in word_emotions )
        lexicon_evolution[emo].append(0 if present else 1)

# --- Setup for SentimentFlow Simulation ---
# We use the SentimentFlow package components.
from SentimentFlow import SpeechProcessor, SentimentFlowCalculator

# Process the text with SenticNet-based SpeechProcessor.
# (Update the path to your SenticNet data file as needed.)
processor = SpeechProcessor("../data/senticnet.tsv")
df_text = pd.DataFrame({"text": [sample_text]})
processed_texts = processor.process_texts(df_text["text"])

# For simulation, we work with the first (and only) processed text.
processed_row = processed_texts.iloc[0]
# Assume all columns except 'text' hold the baseline emotional values.
sentiment_columns = processed_texts.columns.difference(["text"])
s0 = processed_row[sentiment_columns].to_numpy(dtype=float)

# We'll define a mapping between NRC emotions and the SenticNet dimensions.
nrc_to_sentic = {
    "joy": "INTROSPECTION#joy",
    "anger": "ATTITUDE#annoyance",
    "disgust": "SENSITIVITY#loathing",
    "anticipation": "SENSITIVITY#enthusiasm",
    "surprise": "TEMPER#serenity",
    "sadness": "INTROSPECTION#ecstasy",
    "fear": "POLARITY",      # arbitrary mapping
    "trust": "ATTITUDE"      # arbitrary mapping
}

# Find indices in the baseline vector corresponding to each mapped dimension.
sentic_columns = list(sentiment_columns)
mapped_indices = {}
for emo, col in nrc_to_sentic.items():
    if col in sentic_columns:
        mapped_indices[emo] = sentic_columns.index(col)
    else:
        print(f"Warning: {col} not found in processed features.")

# Prepare the simulation using SentimentFlowCalculator methods.
sf_calc = SentimentFlowCalculator()

# Compute the external contextual force from POLARITY.
g_context = SentimentFlowCalculator._calculate_external_contextual_force(processed_row["POLARITY"])
# Compute sentiment density, pressure, and viscosity from s0.
rho_sent = SentimentFlowCalculator._calculate_sentiment_density(s0)
# For pressure, the original code applies the _calculate_sentiment_pressure to each component.
p_sent = np.array([sf_calc._calculate_sentiment_pressure(score, processed_row["text"]) for score in s0])
nu_sent = SentimentFlowCalculator._calculate_sentiment_viscosity(s0)
speech_info = (rho_sent, p_sent, nu_sent, g_context)

# Increase simulation steps by integrating over a longer time.
# For example, simulate from t=0 to t=10 with 11 steps.
t = np.linspace(0, 10, 11)
s_simulation = odeint(SentimentFlowCalculator._differential_equation, s0, t, args=(speech_info,))

# For each mapped NRC emotion, extract its simulation evolution.
simulation_evolution = {}
for emo, idx in mapped_indices.items():
    simulation_evolution[emo] = s_simulation[:, idx]

# --- Plotting ---
# For each NRC emotion in our list, if it is mapped, plot two curves:
# 1. Lexicon evolution over sentences (x-axis: sentence index; y: binary value)
# 2. Simulation evolution over time (x-axis: simulation time; y: simulated value)
for emo in nrc_emotions_list:
    if emo in mapped_indices:
        fig, ax = plt.subplots(2, 1, figsize=(8, 6), sharex=False)
        # Plot lexicon evolution (discrete per sentence)
        ax[0].plot(range(len(sentences)), lexicon_evolution[emo], marker='o', linestyle='--', color='blue')
        ax[0].set_title(f"Lexicon-based Evolution for NRC '{emo}'")
        ax[0].set_xlabel("Sentence Index")
        ax[0].set_ylabel("Binary Indicator (0 = Present)")
        ax[0].set_ylim(-0.5, 1.5)
        ax[0].set_xticks(range(len(sentences)))
        # Plot simulation evolution for the mapped baseline dimension
        ax[1].plot(t, simulation_evolution[emo], marker='x', linestyle='-', color='red')
        ax[1].set_title(f"SentimentFlow Simulation for '{nrc_to_sentic[emo]}' (mapped from NRC '{emo}')")
        ax[1].set_xlabel("Time Step")
        ax[1].set_ylabel("Simulated Value")
        ax[1].grid(True)
        plt.tight_layout()
        plt.show()
    else:
        print(f"Skipping NRC emotion '{emo}' because it is not mapped to a SenticNet dimension.")

Device set to use cpu


Loaded cached embeddings for 4566 words from /Users/Panos/.cache/ExpandNRC/lexicon_embeddings_cache.pkl


27-Feb-25 11:21:54 - Starting to process


Processing texts:   0%|          | 0/1 [00:00<?, ?it/s]

27-Feb-25 11:21:55 - Saving results to results/processed_texts.csv
  s_simulation = odeint(SentimentFlowCalculator._differential_equation, s0, t, args=(speech_info,))


In [37]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.integrate import odeint

# Assumed predefined variables:
# feelings_nrc = NRCLex("/Users/Panos/Library/CloudStorage/Dropbox/PI_Squared/PycharmProjects/Research/NRCLex/nrc_v3.json")
# processor = SpeechProcessor("../data/senticnet.tsv")
# And the ExpandNRC calculator is initialized as:
calculator = EmotionDistanceCalculator(feelings_nrc.__lexicon__, device="cpu")

# ---------- Part 1: Lexicon Analysis Per Word ----------
# Define the eight standard NRC emotions.
nrc_emotions_list = ["anger", "anticipation", "disgust", "fear", "joy", "sadness", "surprise", "trust"]

# Tokenize the sample text into words.
sample_text = (
    "I am very happy today. The sun is shining and I feel full of energy. "
    "However, sometimes I feel a tinge of sadness when I think about the past. "
    "Overall, I trust that better days are coming."
)
words = sample_text.split()  # simple tokenization

# For each word, obtain the NRC emotion labels using nrc_emotions.
lexicon_results = []
for word in words:
    result = calculator.nrc_emotions([word], threshold=0.4)
    # When passing a single word, assume the method returns a dict keyed by that word.
    if isinstance(result, dict):
        emotions = result.get(word, [])
    elif isinstance(result, list):
        emotions = result[0]
    else:
        emotions = []
    lexicon_results.append(emotions)

# Create binary indicator per word for each of the 8 NRC emotions.
lexicon_indicators = {emo: [] for emo in nrc_emotions_list}
for em_list in lexicon_results:
    for emo in nrc_emotions_list:
        lexicon_indicators[emo].append(1 if (isinstance(em_list, list) and emo in em_list) else 0)

# ---------- Part 2: SenticNet/SentimentFlow Analysis of the Whole Text ----------
# Process the whole text using the SpeechProcessor.
df_text = pd.DataFrame({"text": [sample_text]})
processed_texts = processor.process_texts(df_text["text"])
processed_row = processed_texts.iloc[0]

# The baseline sentiment features (SenticNet dimensions) are in all columns except 'text'.
sentiment_columns = processed_texts.columns.difference(["text"])
# Create the initial state vector (s0) from the processed features.
s0 = processed_row[sentiment_columns].to_numpy(dtype=float)

# For simulation, use the SentimentFlowCalculator’s methods.
from SentimentFlow import SentimentFlowCalculator
sf_calc = SentimentFlowCalculator()

# Compute external contextual force, sentiment density, pressure, and viscosity.
g_context = SentimentFlowCalculator._calculate_external_contextual_force(processed_row["POLARITY"])
rho_sent = SentimentFlowCalculator._calculate_sentiment_density(s0)
p_sent = np.array([sf_calc._calculate_sentiment_pressure(score, processed_row["text"]) for score in s0])
nu_sent = SentimentFlowCalculator._calculate_sentiment_viscosity(s0)
speech_info = (rho_sent, p_sent, nu_sent, g_context)

# Increase simulation time resolution to match the number of words.
t = np.arange(len(words))
s_simulation = odeint(SentimentFlowCalculator._differential_equation, s0, t, args=(speech_info,))

# For readability, clip the simulation values.
clip_min = -3
clip_max = 4
s_simulation_clipped = np.clip(s_simulation, clip_min, clip_max)

# Get the list of SenticNet (sentiment flow) dimensions.
sentic_columns = list(sentiment_columns)

# ---------- Part 3: Plotting and Saving Each Plot ----------
# Create the plots folder if it doesn't exist.
os.makedirs("plots", exist_ok=True)

# For each lexicon emotion, create an individual plot.
for emo in nrc_emotions_list:
    fig, ax = plt.subplots(figsize=(8, 6))
    x = range(len(words))  # x-axis: word index

    # Plot lexicon binary indicator (blue solid line)
    ax.plot(x, lexicon_indicators[emo], marker='o', linestyle='-', color='blue', label='Lexicon Indicator')

    # Overlay all SenticNet simulation dimensions (red dashed lines)
    for j, col in enumerate(sentic_columns):
        ax.plot(x, s_simulation_clipped[:, j], marker='x', linestyle='--',
                label=f"SenticNet: {col}")

    ax.set_title(f"Lexicon Emotion: '{emo}'")
    ax.set_xlabel("Word Index")
    ax.set_ylabel("Clipped Value")
    ax.legend(fontsize=8, loc="upper right")
    ax.grid(True)
    plt.tight_layout()

    # Save each plot in the "plots" folder, using the emotion as the filename.
    filename = os.path.join("plots", f"{emo}.png")
    plt.savefig(filename)
    plt.close(fig)

Device set to use cpu


Loaded cached embeddings for 4566 words from /Users/Panos/.cache/ExpandNRC/lexicon_embeddings_cache.pkl


27-Feb-25 12:01:42 - Starting to process


Processing texts:   0%|          | 0/1 [00:00<?, ?it/s]

27-Feb-25 12:01:42 - Saving results to results/processed_texts.csv
  s_simulation = odeint(SentimentFlowCalculator._differential_equation, s0, t, args=(speech_info,))
