In [1]:
import pandas as pd
import json

In [3]:
def synthesize_perception(df: pd.DataFrame):
    """
    Aggregates the 'perception' values from multiple rows and columns in a DataFrame
    by computing the arithmetic mean per (value, entity).

    :param df: A pandas DataFrame, where each column (e.g. 'economic', 'functional', etc.)
               contains a JSON string of a list of dicts with keys {value, entity, perception}.
    :return: A list of dictionaries, each with keys {'value', 'entity', 'perception'}.
    """
    
    aggregator = {}  # Will hold data in the form: {(value, entity): {"sum_perception": float, "count": int}}

    # Iterate over each row and column
    for idx, row in df.iterrows():
        for col in df.columns:
            # Parse the JSON string in the cell
            json_list = json.loads(row[col])

            # Accumulate sums and counts
            for entry in json_list:
                key = (entry["value"], entry["entity"])  # e.g. ("Economic", "Open AI")
                if key not in aggregator:
                    aggregator[key] = {"sum_perception": 0.0, "count": 0}
                
                aggregator[key]["sum_perception"] += entry["perception"]
                aggregator[key]["count"] += 1

    # Compute mean and build final list of dicts
    result = []
    for (val, ent), stats in aggregator.items():
        mean_perception = round(stats["sum_perception"] / stats["count"])
        result.append({
            "value": val,
            "entity": ent,
            "perception": mean_perception  # or int(round(mean_perception)) if you want integer
        })

    return result

In [4]:
df = pd.read_csv("perception_processed_staged.csv")
len(df)

297

In [6]:
aggregated_demand_data = synthesize_perception(df)
aggregated_demand_data

[{'value': 'Economic', 'entity': 'Open AI', 'perception': 79},
 {'value': 'Economic', 'entity': 'Google', 'perception': 74},
 {'value': 'Economic', 'entity': 'Microsoft', 'perception': 81},
 {'value': 'Economic', 'entity': 'Meta', 'perception': 65},
 {'value': 'Economic', 'entity': 'IBM', 'perception': 71},
 {'value': 'Functional', 'entity': 'Open AI', 'perception': 83},
 {'value': 'Functional', 'entity': 'Google', 'perception': 80},
 {'value': 'Functional', 'entity': 'Microsoft', 'perception': 84},
 {'value': 'Functional', 'entity': 'Meta', 'perception': 70},
 {'value': 'Functional', 'entity': 'IBM', 'perception': 74},
 {'value': 'Emotional', 'entity': 'Open AI', 'perception': 80},
 {'value': 'Emotional', 'entity': 'Google', 'perception': 71},
 {'value': 'Emotional', 'entity': 'Microsoft', 'perception': 78},
 {'value': 'Emotional', 'entity': 'Meta', 'perception': 63},
 {'value': 'Emotional', 'entity': 'IBM', 'perception': 69},
 {'value': 'Social', 'entity': 'Open AI', 'perception': 79