# Data exploration - looking for neutral interval

In [2]:
import json

In [4]:
file_path = "../data/eng-houn.json"
with open(file_path, "r") as f:
    data = json.load(f)

In [192]:
example_sentence = data["sentences"][38]
sentence_text = example_sentence["text"]

words_with_sentiment = []
for concept in example_sentence.get("concepts", []):
    if "sentiment" in concept:
        sentiment = concept["sentiment"]
        clemma = concept.get("clemma", "")
        words_with_sentiment.append((clemma, sentiment))

sentence_text, words_with_sentiment

("'Has anything escaped me?' I asked, with some self-importance. ",
 [('escape', -34.0)])

In [90]:
concept_sentiment_words = []

for sentence in data.get("sentences", []):
    for concept in sentence.get("concepts", []):
        if "sentiment" in concept:
            clemma = concept.get("clemma")
            sentiment_value = concept["sentiment"]
            if clemma:
                concept_sentiment_words.append((clemma, sentiment_value))

concept_sentiment_dict = {}
for clemma, sentiment in concept_sentiment_words:
    if sentiment not in concept_sentiment_dict:
        concept_sentiment_dict[sentiment] = set()
    concept_sentiment_dict[sentiment].add(clemma)

concept_sentiment_examples = {
    sentiment: sorted(clemmata)[:5] for sentiment, clemmata in concept_sentiment_dict.items()
}

concept_sentiment_examples


{-64.0: ['abhor', 'agitated', 'agitation', 'agonize', 'agony'],
 64.0: ['admirable', 'affection', 'beautiful', 'beautifully', 'beauty'],
 34.0: ['able', 'accomplish', 'achievement', 'admirable', 'admiration'],
 95.0: ['best', 'brilliant', 'excellent', 'exultation', 'formidable'],
 0.0: ['Sherlock Holmes', 'a few', 'a good deal', 'a great deal', 'ability'],
 -34.0: ['abandon', 'abet', 'abortive', 'absence', 'absent-minded'],
 -95.0: ['despair', 'diabolical', 'evil', 'fiendish', 'foul'],
 -22.0: ['careless', 'hesitate'],
 21.0: ['safe'],
 19.0: ['convenient'],
 -47.0: ['by thunder', 'thunder'],
 -49.0: ['by thunder'],
 -25.0: ['hesitation', 'use'],
 -44.0: ['thunder'],
 60.0: ['come along', 'keen', 'rich', 'unwarlike'],
 -31.0: ['cry', 'no'],
 32.0: ['power'],
 72.0: ['play'],
 -77.0: ['game'],
 -20.0: ['abandon', 'barren', 'blot out', 'blur', 'body'],
 20.0: ['able', 'advice', 'animation', 'assure', 'atone'],
 44.0: ['natural', 'one'],
 48.0: ['mind'],
 -5.0: ['checkmate'],
 43.0: ['wel

In [92]:
neutral_clemmata = []

for clemma, sentiment in concept_sentiment_words:
    if -10 <= sentiment <= 10:
        neutral_clemmata.append((clemma, sentiment))

# Sort by sentiment value for better readability
neutral_clemmata_sorted = sorted(neutral_clemmata, key=lambda x: x[1])

neutral_clemmata_sorted

[('help', -6.399999999999999),
 ('checkmate', -5.0),
 ('rueful', -3.0),
 ('a great deal', 0.0),
 ('deduction', 0.0),
 ('pass through', 0.0),
 ('positively', 0.0),
 ('evil', 0.0),
 ('canada', 0.0),
 ('opinion', 0.0),
 ('him', 0.0),
 ('be', 0.0),
 ('shadow', 0.0),
 ('look', 0.0),
 ('impossible', 0.0),
 ('no one', 0.0),
 ('my', 0.0),
 ('ruin', 0.0),
 ('sombre', 0.0),
 ('i', 0.0),
 ('stare', 0.0),
 ('tap', 0.0),
 ('gloomy', 0.0),
 ('fight', 0.0),
 ('defy', 0.0),
 ('mysterious', 0.0),
 ('motionless', 0.0),
 ('creep', 0.0),
 ('noiselessly', 0.0),
 ('peep', 0.0),
 ('rigid', 0.0),
 ('blackness', 0.0),
 ('intently', 0.0),
 ('deep', 0.0),
 ('groan', 0.0),
 ('impatient', 0.0),
 ('gesture', 0.0),
 ('instantly', 0.0),
 ('make', 0.0),
 ('way', 0.0),
 ('come', 0.0),
 ('stealthy', 0.0),
 ('pass', 0.0),
 ('return', 0.0),
 ('secret', 0.0),
 ('go on', 0.0),
 ('furnish', 0.0),
 ('make', 0.0),
 ('found', 0.0),
 ('make', 0.0),
 ('have', 0.0),
 ('surprise', 0.0),
 ('not', 0.0),
 ('have', 0.0),
 ('clear', 0.0

In [96]:
# Filter for -20 < sentiment <= -10
mild_negative = [
    (clemma, sentiment) for clemma, sentiment in concept_sentiment_words
    if -20 < sentiment <= -10
]

# Filter for 10 <= sentiment < 20
mild_positive = [
    (clemma, sentiment) for clemma, sentiment in concept_sentiment_words
    if 10 <= sentiment < 20
]

# Sort both lists by sentiment
mild_negative_sorted = sorted(mild_negative, key=lambda x: x[1])
mild_positive_sorted = sorted(mild_positive, key=lambda x: x[1])

# Show first few results
print("Mildly Negative Sentiment (-20 to -10):")
for clemma, sentiment in mild_negative_sorted:
    print(f"{clemma} → {sentiment}")

print("\nMildly Positive Sentiment (10 to 20):")
for clemma, sentiment in mild_positive_sorted:
    print(f"{clemma} → {sentiment}")


Mildly Negative Sentiment (-20 to -10):
supreme → -19.5

Mildly Positive Sentiment (10 to 20):
occupy → 11.3
think → 11.85
sir → 12.0
more → 13.0
convenient → 19.0
