In [2]:
import os
import numpy as np
import pandas as pd

local_sentiment_data = "../data/Data_Set_S1.txt"

if os.path.exists(local_sentiment_data):
    df = pd.read_csv(local_sentiment_data, sep="\t", skiprows=2, header=0)

df["word"] = df["word"].astype("string")
for col in ["twitter_rank", "google_rank", "nyt_rank", "lyrics_rank"]:
    df[col] = df[col].replace("--", np.nan)
    df[col] = pd.to_numeric(df[col], errors="coerce").astype("Int64")

word_to_happiness_avg = dict(zip(df["word"], df["happiness_average"]))


def sentiment_score(text: str):
    tokens_lower = text.lower().split()
    scores = [
        word_to_happiness_avg[token]
        for token in tokens_lower
        if token in word_to_happiness_avg
    ]
    if scores:
        return sum(scores) / len(scores)
    return None

In [7]:
location_llm_descriptions = (
    "data/location_descriptions_llm_withSceneAction_gemini_2_5_flash.json"
)

with open(location_llm_descriptions, "r") as fh:
    location_data = pd.read_json(fh)

In [None]:
location_sentiments = {}

for idx, row in location_data.iterrows():
    location_name = row["location_name"]
    description = row["description"]
    sentiment = sentiment_score(description)
    location_sentiments[location_name] = sentiment

location_sentiments

{'Another Cockpit': 5.236639344262295,
 'Asteroid Cave': 5.217848101265823,
 'Barge Observation Deck': 5.259925093632959,
 'Battlefield': 5.128311688311689,
 "Biggs' Cockpit": 5.192008928571428,
 'Boiler Room': 5.120582010582011,
 'Bottom Of Cloud City': 5.2815510204081635,
 'Bunker': 5.125482233502538,
 "Chief's Hut": 5.342488262910798,
 'Cloud City': 5.278895899053628,
 "Creature's House": 5.391134453781512,
 'Dagobah': 5.273680555555555,
 'Dagobah Swamp': 5.325893536121673,
 "Darth Vader's Cockpit": 5.29337552742616,
 'Death Star': 5.16278350515464,
 'Detention Area': 5.292946859903382,
 'Docking Bay': 5.224083333333334,
 'Docking Bay 94': 5.305404530744337,
 'Dune Sea': 5.340378151260504,
 'Dungeon Cell': 5.221379310344828,
 'Dungeon Corridor': 5.141718061674009,
 'East Landing Platform': 5.196393442622951,
 "Emperor's Throne Room": 5.179917695473251,
 "Emperor's Tower": 5.206460481099656,
 'Endor': 5.283703703703704,
 'Endor Forest': 5.198274509803921,
 'Ewok Village': 5.386923076

In [13]:
# Location sentiment statistics

top_10_happiest_locations = sorted(
    location_sentiments.items(),
    key=lambda x: x[1] if x[1] is not None else -np.inf,
    reverse=True,
)[:10]

top_10_saddest_locations = sorted(
    location_sentiments.items(), key=lambda x: x[1] if x[1] is not None else np.inf
)[:10]

mean_location_sentiment = np.mean(
    [s for s in location_sentiments.values() if s is not None]
)
median_location_sentiment = np.median(
    [s for s in location_sentiments.values() if s is not None]
)

print("Top 10 Happiest Locations:")
for loc, score in top_10_happiest_locations:
    print(f"{loc}: {score:.2f}")
print("\nTop 10 Saddest Locations:")
for loc, score in top_10_saddest_locations:
    print(f"{loc}: {score:.2f}")
print(f"\nMean Location Sentiment: {mean_location_sentiment:.4f}")
print(f"Median Location Sentiment: {median_location_sentiment:.4f}")

Top 10 Happiest Locations:
Power Station: 5.45
Kenobi's Dwelling: 5.42
Ewok Village Square: 5.40
Creature's House: 5.39
Ewok Village: 5.39
Millennium Falcon: 5.37
Lars Homestead: 5.35
Chief's Hut: 5.34
Dune Sea: 5.34
Sail Barge: 5.34

Top 10 Saddest Locations:
Zev's Snowspeeder, Rogue Two: 5.05
Ext/Int: 5.07
Red Leader Starship: 5.09
Red Ten's Cockpit: 5.12
Skiff: 5.12
Boiler Room: 5.12
Bunker: 5.13
Luke's Snowspeeder, Rogue Leader: 5.13
Battlefield: 5.13
Gold Five's Y-Wing: 5.14

Mean Location Sentiment: 5.2435
Median Location Sentiment: 5.2393
