# Sandbox for Support Distribution Aggregation #41

In [33]:
import polars as pl
import pandas as pd
import numpy as np

import importlib
import aggregation

In [21]:
importlib.reload(aggregation)
from aggregation import AggregationManager

manager = AggregationManager(
    index_cols=['time', 'entity_id'],
    target_cols=['conflict_prob', 'death_count']
)

pdf = pl.DataFrame({
    "time": [1, 1],
    "entity_id": [1, 2],
    "conflict_prob": pl.Series("conflict_prob", [[0.1, 0.2], [0.3, 0.4]], dtype=pl.List(pl.Float64)),
    "death_count": pl.Series("death_count", [[0.5, 0.6], [0.7, 0.8]], dtype=pl.List(pl.Float64))
})

pdf_pd = pd.DataFrame({
    "time": [1, 1],
    "entity_id": [1, 2],
    "conflict_prob": [[0.1, 0.2], [0.3, 0.4]],
    "death_count": [[0.5, 0.6], [0.7, 0.8]]
})

manager.add_model(pdf)
manager.add_model(pdf_pd)

In [37]:
test_parquet_path = "views-models/models/fast_car/data/generated/predictions_forecasting_20250909_131353.parquet"

test_parquet = pl.read_parquet(test_parquet_path)

def random_float_list(n=5):
    return np.random.rand(n).tolist()

# Add column of lists (each row gets a random list of floats)
test_parquet = test_parquet.with_columns(
    pl.Series("test_ged_sb", [random_float_list(5) for _ in range(len(test_parquet))], dtype=pl.List(pl.Float64))
)

In [42]:
test_parquet2_path = "predictions_forecasting_20250807.parquet"
test_parquet2 = pl.read_parquet(test_parquet2_path)
test_parquet2.head()

pred_ln_ged_sb_dep,month_id,country_id
list[f64],i64,i64
[0.000706],547,1
[0.00158],547,2
[0.072077],547,3
[0.011363],547,4
[0.001575],547,5


In [38]:
test_parquet.head()

pred_ln_ged_sb_dep,month_id,country_id,test_ged_sb
f64,i64,i64,list[f64]
0.0,548,1,"[0.62407, 0.530657, … 0.422733]"
0.0,548,2,"[0.041434, 0.702877, … 0.749599]"
0.0,548,3,"[0.258695, 0.384361, … 0.636447]"
0.0,548,4,"[0.754387, 0.075952, … 0.121407]"
0.0,548,5,"[0.330836, 0.302485, … 0.621282]"


In [39]:
pq_manager = AggregationManager(
    index_cols=['month_id', 'country_id'],
    target_cols=['test_ged_sb']
)

pq_manager.add_model(test_parquet)

In [40]:
pq_manager.models

[shape: (6_876, 4)
 ┌────────────────────┬──────────┬────────────┬─────────────────────────────────┐
 │ pred_ln_ged_sb_dep ┆ month_id ┆ country_id ┆ test_ged_sb                     │
 │ ---                ┆ ---      ┆ ---        ┆ ---                             │
 │ f64                ┆ i64      ┆ i64        ┆ list[f64]                       │
 ╞════════════════════╪══════════╪════════════╪═════════════════════════════════╡
 │ 0.0                ┆ 548      ┆ 1          ┆ [0.62407, 0.530657, … 0.422733… │
 │ 0.0                ┆ 548      ┆ 2          ┆ [0.041434, 0.702877, … 0.74959… │
 │ 0.0                ┆ 548      ┆ 3          ┆ [0.258695, 0.384361, … 0.63644… │
 │ 0.0                ┆ 548      ┆ 4          ┆ [0.754387, 0.075952, … 0.12140… │
 │ 0.0                ┆ 548      ┆ 5          ┆ [0.330836, 0.302485, … 0.62128… │
 │ …                  ┆ …        ┆ …          ┆ …                               │
 │ 0.0                ┆ 583      ┆ 242        ┆ [0.626163, 0.428294, … 0.85409…

In [27]:
pdf_test = pd.DataFrame({
    "time": [1, 1],
    "entity_id": ["dsf", "sdf"],
    "conflict_prob": [[0.5, 0.6], [0.7, 0.8]],
    "death_count": [[0.5, 0.6], [0.7, 0.8]]
})

manager.add_model(pdf_test)

TypeError: Index column 'entity_id' must be integer, got String

In [18]:
manager.models

[shape: (2, 4)
 ┌──────┬───────────┬───────────────┬─────────────┐
 │ time ┆ region_id ┆ conflict_prob ┆ death_count │
 │ ---  ┆ ---       ┆ ---           ┆ ---         │
 │ i64  ┆ i64       ┆ list[f64]     ┆ list[f64]   │
 ╞══════╪═══════════╪═══════════════╪═════════════╡
 │ 1    ┆ 1         ┆ [0.1, 0.2]    ┆ [0.5, 0.6]  │
 │ 1    ┆ 2         ┆ [0.3, 0.4]    ┆ [0.7, 0.8]  │
 └──────┴───────────┴───────────────┴─────────────┘,
 shape: (2, 4)
 ┌──────┬───────────┬───────────────┬─────────────┐
 │ time ┆ region_id ┆ conflict_prob ┆ death_count │
 │ ---  ┆ ---       ┆ ---           ┆ ---         │
 │ i64  ┆ i64       ┆ list[f64]     ┆ list[f64]   │
 ╞══════╪═══════════╪═══════════════╪═════════════╡
 │ 1    ┆ 1         ┆ [0.1, 0.2]    ┆ [0.5, 0.6]  │
 │ 1    ┆ 2         ┆ [0.3, 0.4]    ┆ [0.7, 0.8]  │
 └──────┴───────────┴───────────────┴─────────────┘]