In [1]:
import pandas as pd
import json

In [None]:
pd.read_csv("demand_processed_staged.csv")

In [2]:
def synthesize_demand(df: pd.DataFrame):
    """
    Aggregates the 'demand' values from multiple rows and columns in a DataFrame
    by computing the arithmetic mean per (value, entity).

    :param df: A pandas DataFrame, where each column (e.g. 'economic', 'functional', etc.)
               contains a JSON string of a list of dicts with keys {value, entity, demand}.
    :return: A list of dictionaries, each with keys {'value', 'entity', 'demand'}.
    """
    
    aggregator = {}  # Will hold data in the form: {(value, entity): {"sum_demand": float, "count": int}}

    # Iterate over each row and column
    for idx, row in df.iterrows():
        for col in df.columns:
            # Parse the JSON string in the cell
            json_list = json.loads(row[col])

            # Accumulate sums and counts
            for entry in json_list:
                key = (entry["value"], entry["entity"])  # e.g. ("Economic", "Open AI")
                if key not in aggregator:
                    aggregator[key] = {"sum_demand": 0.0, "count": 0}
                
                aggregator[key]["sum_demand"] += entry["demand"]
                aggregator[key]["count"] += 1

    # Compute mean and build final list of dicts
    result = []
    for (val, ent), stats in aggregator.items():
        mean_demand = round(stats["sum_demand"] / stats["count"])
        result.append({
            "value": val,
            "entity": ent,
            "demand": mean_demand  # or int(round(mean_demand)) if you want integer
        })

    return result

In [6]:
df = pd.read_csv("demand_processed_staged.csv") 
len(df)


299

In [7]:
aggregated_demand_data = synthesize_demand(df)
aggregated_demand_data

[{'value': 'Economic', 'entity': 'Open AI', 'demand': 82},
 {'value': 'Economic', 'entity': 'Google', 'demand': 76},
 {'value': 'Economic', 'entity': 'Microsoft', 'demand': 83},
 {'value': 'Economic', 'entity': 'Meta', 'demand': 67},
 {'value': 'Economic', 'entity': 'IBM', 'demand': 70},
 {'value': 'Functional', 'entity': 'Open AI', 'demand': 85},
 {'value': 'Functional', 'entity': 'Google', 'demand': 80},
 {'value': 'Functional', 'entity': 'Microsoft', 'demand': 85},
 {'value': 'Functional', 'entity': 'Meta', 'demand': 71},
 {'value': 'Functional', 'entity': 'IBM', 'demand': 73},
 {'value': 'Emotional', 'entity': 'Open AI', 'demand': 82},
 {'value': 'Emotional', 'entity': 'Google', 'demand': 72},
 {'value': 'Emotional', 'entity': 'Microsoft', 'demand': 78},
 {'value': 'Emotional', 'entity': 'Meta', 'demand': 64},
 {'value': 'Emotional', 'entity': 'IBM', 'demand': 68},
 {'value': 'Social', 'entity': 'Open AI', 'demand': 81},
 {'value': 'Social', 'entity': 'Google', 'demand': 73},
 {'va

In [8]:
len(aggregated_demand_data)

25