In [1]:
import polars as pl
from pydantic import BaseModel
from typing import List, Union, Dict
from typing import Optional
from hierarchical import (
    losses_csv_to_hierarchical_json,
    losses_csv_to_hierarchical_jsonv2,
    parent_index,
    test_index_validity,
    dict_to_records,
    df_to_records,
    Record,
    path_to_root)
from collections import defaultdict

In [3]:
losses_csv_to_hierarchical_jsonv2(r"/mnt/z/DATASETS/ukraine/losses_russia.csv", r"../frontend/static/lossesv3.json")

In [2]:
df = pl.read_csv(r"/mnt/z/DATASETS/ukraine/losses_russia.csv")
by_equipment = df.partition_by("equipment")
test_index_validity(df)

True

In [3]:
df

equipment,model,sub_model,manufacturer,losses_total,abandoned,abandoned and destroyed,captured,captured and destroyed,captured and stripped,damaged,damaged and abandoned,damaged and captured,damaged beyond economical repair,destroyed,destroyed in a non-combat related incident,sunk
str,str,str,str,i64,f64,f64,f64,f64,f64,f64,f64,f64,str,f64,str,str
"""Tanks""","""T-62 Obr. 1967...",,"""the Soviet Uni...",1,,,1.0,,,,,,,,,
"""Tanks""","""T-62M""",,"""the Soviet Uni...",50,1.0,,29.0,,,1.0,,5.0,,14.0,,
"""Tanks""","""T-62MV""",,"""the Soviet Uni...",12,,,7.0,,,,1.0,1.0,,3.0,,
"""Tanks""","""T-64A""",,"""the Soviet Uni...",2,,,,,,,,,,2.0,,
"""Tanks""","""T-64BV""",,"""the Soviet Uni...",48,2.0,,4.0,,,3.0,,1.0,,38.0,,
"""Tanks""","""T-72A""",,"""the Soviet Uni...",36,1.0,,13.0,,,1.0,,,,21.0,,
"""Tanks""","""T-72AV""",,"""the Soviet Uni...",9,,,3.0,,,,,2.0,,4.0,,
"""Tanks""","""T-72B""",,"""the Soviet Uni...",230,4.0,,63.0,3.0,1.0,5.0,5.0,5.0,,144.0,,
"""Tanks""","""T-72B Obr. 198...",,"""the Soviet Uni...",71,4.0,1.0,17.0,,1.0,1.0,2.0,3.0,,42.0,,
"""Tanks""","""T-72BA""",,"""Russia""",25,,,14.0,,,2.0,,1.0,,8.0,,


In [None]:
path_to_root()

In [9]:
root = df_to_records(df)
len(root.children)

7

In [5]:
def walk_dict(d: Dict):
    for k,v in d.items():
        if isinstance(v, Dict):
            yield from walk_dict(v)
        else:
            yield v

def walk_records(r: Record):
    for child in r.children:
        yield from walk_records(child)
    yield r


In [7]:
for stuff in walk_records(nodes):
    assert stuff.children or stuff.losses_total

In [38]:
n = 1
print(by_equipment[n]["equipment"].unique().to_list())
print('\n'.join(by_equipment[n]["model"].unique().to_list()))

['Armoured Fighting Vehicles']
BRDM-2
BTR-80-based ZS-88 PsyOps vehicle
MT-LBVM and MT-LBVMK
Unknown AFV
BRDM-2-based ZS-82 PsyOps vehicle
GAZ-3344-20 'Aleut' articulated tracked carrier
BRM-1K Obr. 2021
MT-LBu
2S1 with ZU-23 AA gun
Vityaz DT-10PM articulated tracked carrier
Vityaz DT-30 articulated tracked carrier
BMM-80 ambulance
BRDM-2RKhb chemical reconnaissance vehicle
MT-LB with ZU-23 AA gun
Unknown BTR-D/BMD-2
MT-LB
BRM-1 and BRM-1K reconnaissance vehicle


In [6]:
df.filter(pl.col("losses_total")==357)

equipment,model,sub_model,manufacturer,losses_total,abandoned,abandoned and destroyed,captured,captured and destroyed,captured and stripped,damaged,damaged and abandoned,damaged and captured,damaged beyond economical repair,destroyed,destroyed in a non-combat related incident,sunk
str,str,str,str,i64,f64,f64,f64,f64,f64,f64,f64,f64,str,f64,str,str
"""Armoured Fight...","""MT-LB""",,"""the Soviet Uni...",357,14.0,,128.0,1.0,,3.0,,7.0,,204.0,,


In [5]:
for model in df["model"].unique().to_list():
    assert model in parent_index

In [10]:
for k,v in parent_index.items():
    assert v in parent_index or v=="All"
    if v not in parent_index:
        print(v)

All
All
All
All
All
All
All


In [8]:
def path_to_root(category: str):
    path = []
    current_cat = category
    while (parent:=parent_index.get(current_cat)):
        path.append(parent)
        current_cat = parent
    return path

class Record(BaseModel):
    name: str
    losses_total: Optional[int] = None
    children: List["Record"] = []
path_to_root("BRM-1 and BRM-1K reconnaissance vehicle")

['BRM', 'AFV', 'Armor', 'All']

In [9]:
for k in parent_index.keys():
    assert path_to_root(k)[-1]=="All"

In [20]:
tree = lambda: defaultdict(tree)
root = tree()

for row in df.rows():
    walker = root
    for asc in path_to_root(row[0]):
        walker = walker[asc]
    walker[row[1]] = row[4]

In [29]:
def dict_to_nodes(key: str, val: Union[int, defaultdict]):
    if isinstance(val, int):
        return Record(name=key, losses_total=val)
    return Record(name=key, children=[dict_to_nodes(k,v) for k,v in val.items()])

In [3]:
by_equipment = df.partition_by("equipment")

In [4]:
class Node(BaseModel):
    name: str
    color: Optional[str] = None
    losses_total: Optional[int] = None
    children: List["Node"] = []

In [5]:
colors = [
    "#F35623",
    "#EF2732",
    "#8A161F",
    "#B13396",
    "#EA579E",
    "#F693BF",
    "#653414",
    "#CBAB84",
    "#007FB2",
    "#025C65",
    "#009247",
    "#81A028",
    "#F5F243",
    "#FEF200",
    "#F67321",
    "#F25920",
    "#FFFFFF",
    "#737278",
    "#231F20",
    "#2C1543",
    "#6E3B98",
    "#59469F",
    "#122C67",
    "#185B90"
]

In [6]:
children = []
losses_total = 0
for idx, equipment_df in enumerate(by_equipment):
    by_model = equipment_df.partition_by("model")
    children_of_model = []
    losses_of_model = 0
    for model_df in by_model:
        children_of_model.append(Node(name=model_df["model"][0], losses_total=model_df["losses_total"][0]))
        losses_of_model += model_df["losses_total"][0]
    children.append(Node(name=equipment_df["equipment"][0], children=children_of_model, color=colors[idx]))
    losses_total += losses_of_model

In [7]:
with open(r"../frontend/static/losses_tree_russia.json" ,"w") as fp:
    fp.write(Node(name="total", children=children).json())