In [None]:
import re
import os
import sys
import numpy as np
import pandas as pd

In [None]:
from dataclasses import dataclass, field
from collections import OrderedDict

@dataclass
class MaterialTreelet:
    id: int = 0
    textures: list = field(default_factory=list)
    materials: list = field(default_factory=list)
    size: int = 0

In [None]:
MAX_BYTES = 1_000_000_000
TEX_RE = re.compile(r"^T(\d+) TEX(\d+) (\d+)$")
MAT_RE = re.compile(r"^T(\d+) MAT(\d+) (\d+)( TEX.+)?$")

textures = {}
materials = {}
tex_to_mat = {}

with open("textures.txt") as fin:
    for line in fin:
        line = line.strip()
        if match := TEX_RE.match(line):
            treelet_id = int(match.group(1))
            texture_id = int(match.group(2))
            texture_size = int(match.group(3))
            textures[texture_id] = texture_size
        elif match := MAT_RE.match(line):
            treelet_id = int(match.group(1))
            material_id = int(match.group(2))
            material_size = int(match.group(3))
            texs = ()
            if match.group(4):
                texs = match.group(4).strip().split(" ")
                texs = tuple(int(t[3:]) for t in texs)
            materials[material_id] = (texs, material_size,
                                      sum([textures[x] for x in texs]))
            tex_to_mat[texs] = tex_to_mat.get(texs, []) + [material_id] 
        else:
            raise Exception(f"unmatched line: {line}")
            
merged_tex_to_mat = {}
            
for tex_i, mat_i in tex_to_mat.items():
    merged_tex_to_mat[tex_i] = mat_i
    
    for tex_j, mat_j in tex_to_mat.items():
        if tex_i == tex_j: continue
        if set(tex_i) <= set(tex_j):
            
texture_keys = set([v[0] for k, v in materials.items() if len(v[0]) > 0])
texture_keys = [(y, sum([textures[x] for x in y])) for y in texture_keys]

texture_keys.sort(key=lambda x: x[1], reverse=True)

treelets = OrderedDict()
current_id = 54

for tk in texture_keys:
    allotted = False
    
    for _, treelet in treelets.items():
        if treelet.size + tk[1] <= MAX_BYTES:
            treelet.textures.append(tk[0])
            treelet.size += tk[1]
            allotted = True
            break
            
    if not allotted:
        new_treelet = MaterialTreelet()
        new_treelet.id = current_id
        new_treelet.textures = [tk[0]]
        new_treelet.size = tk[1]
        treelets[current_id] = new_treelet
        
        current_id += 1

for tid, tdata in treelets.items():
    for tex in tdata.textures:
        tdata.materials += tex_to_mat[tex]
#     tdata.textures = sorted([x for s in tdata.textures for x in s])
#     tdata.materials = sorted(list(set(tdata.materials)))
        
        
        
# print(treelets[55].materials)
print(treelets[54])