In [None]:
from typing import *
from IPython.display import HTML, Javascript, display
import ipywidgets as wi
from pathlib import Path
from random import Random
import pickle
from tqdm.notebook import tqdm
from pickle import Unpickler
PX = 128
root = Path("/data/natsuki")
def bucket(_id: str) -> str:
    return _id[-3:].zfill(4)
def prepare(prefix):
    (root/f"danbooru2020/{prefix}").mkdir(exist_ok=True)
    for i in range(1000):
        (root/f"danbooru2020/{prefix}/{str(i).zfill(4)}").mkdir(exist_ok=True)
def fname2id(fname: str) -> str:
    return fname.split("/")[-1].split(".")[0]
def id2fname(_id, prefix="512white", ext=lambda _: "png", bucket=bucket):
    return str(root/f"danbooru2020/{prefix}/{bucket(_id)}/{_id}.{ext(_id)}")
def budget(N, workers=8):
    def g(n, d):
        if not d <= n:
            return ""
        arr = [n//d]*d
        for i in range(n-(n//d)*d):
            arr[i] += 1
        assert sum(arr) == n
        cumsum = [0]
        for a in arr:
            cumsum.append(cumsum[-1]+a)
        return f"<tr><td>{d}</td>"+"".join(f"<td>{{{s}..{e-1}}}</td>" for s, e in zip(cumsum[:-1], cumsum[1:]))+"</tr>"
    display(HTML("<table>"+"\n".join(g(N, d) for d in range(1, workers+1))+"</table>"))
def split_dump(fnames, name, N=1000):
    if N <= 0:
        N = len(fnames)
    (root/f"danbooru2020/dump_{name}").mkdir(exist_ok=True)
    for i in range((len(fnames)-1)//N+1):
        with (root/f"danbooru2020/dump_{name}/{i}").open("w") as f:
            f.write('\n'.join(fnames[i*N:(i+1)*N])+"\n")
    budget((len(fnames)-1)//N+1)
def search_dump(txt, name):
    for fname in tqdm(list((root/f"danbooru2020/dump_{name}/").glob("*"))):
        if txt in fname.read_text():
            print(str(fname))
            return
class Filter(dict):
    def __getitem__(self, k):
        if type(k) == int:
            k = list(self.keys())[k]
        return super().__getitem__(k)
    def __setitem__(self, k, v):
        if type(k) == int:
            k = list(self.keys())[k]
        super().__setitem__(k, v)
        i = list(self.keys()).index(k)
        print("{:>3}  {:>9,}  {}".format(i, len(v), k))
       

In [None]:
class Pannel(wi.VBox):
    def on_click(self, event) -> None:
        if not event["new"]:
            self.bag.add(self.fname)
        else:
            self.bag.discard(self.fname)
        self.output.clear_output()
        with self.output:
            print(self.id, len(self.bag))
    def __init__(
        self,
        fname: str,
        bag: Set[str],
        output: wi.Output,
    ) -> None:
        self.fname = fname
        self.bag = bag
        self.output = output
        self.id = fname2id(fname)
        try:
            with open(fname, "rb") as f:
                buf = f.read()
            self.image = wi.Image(value=buf, format='png', width=PX, height=PX)
        except Exception as e:
            self.image = wi.Image(width=PX, height=PX)
        self.button = wi.ToggleButton(description=self.id, layout=wi.Layout(width=f"{PX}px"), value=self.fname not in bag, button_style="info")
        self.button.observe(self.on_click, "value")
        super().__init__([self.image, self.button])

In [None]:
class Viewer(wi.Box):
    def __len__(self) -> int:
        return (len(self.fnames)-1)//self.N+1
    def __getitem__(self, i: int) -> List[Pannel]:
        assert i in range(len(self))
        return [
            Pannel(fname, self.bag, self.output)
            for fname in self.fnames[i*self.N:(i+1)*self.N]
        ]
    def refresh(self) -> None:
        self.status.clear_output()
        with self.status:
            print(f"{self.i}/{len(self)}", len(self.fnames))
        self.children = tuple(self[self.i]+[self.controller])
    def on_click_p(self, event) -> None:
        self.i -= 1; self.i %= len(self);
        self.select.value = self.i
        #self.refresh()
    def on_click_n(self, event) -> None:
        self.i += 1; self.i %= len(self);
        self.select.value = self.i
        #self.refresh()
    def on_select(self, event) -> None:
        self.i = event["new"]; self.refresh()
    def is_saved(self) -> bool:
        return  (root/"viewer"/f"{self.name}.pkl").is_file()
    def save(self, event=None) -> None:
        self.status.clear_output()
        with self.status:
            print("saving...")
        (root/"viewer").mkdir(exist_ok=True)
        with open(root/"viewer"/f"{self.name}.pkl", "wb") as f:
            pickle.dump(self.bag, f, protocol=4)
        self.status.clear_output()
        with self.status:
            print(f"{self.i}/{len(self)}", len(self.fnames), "saved")
    def load(self, event=None) -> None:
        if not self.is_saved():
            self.status.clear_output()
            with self.status:
                print(f"{self.i}/{len(self)}", len(self.fnames), "not saved")
            return
        self.status.clear_output()
        with self.status:
            print("loading...")
        with open(root/"viewer"/f"{self.name}.pkl", "rb") as f:
            self.bag = pickle.load(f)
        self.refresh()
        self.status.clear_output()
        with self.status:
            print(f"{self.i}/{len(self)}", len(self.fnames), "loaded")
    def __init__(
        self,
        fnames: Iterable[str],
        N: int = 23,
        seed: int = 0,
        name: str = '',
        key = None,
    ) -> None:
        self.N = N
        self.seed = seed
        self.fnames = list(fnames)
        if len(self.fnames) == 0:
            super().__init__()
            print("0");  return
        if key == None:
            self.fnames.sort()
            Random(self.seed).shuffle(self.fnames)
        else:
            self.fnames.sort(key=lambda fname: key(fname2id(fname)))
        self.fnames = tuple(self.fnames)
        if name == '':
            self.name = hash(self.fnames)
        else:
            self.name = name
        self.i = 0
        self.bag = set()
        self.select = wi.Select(options=range(len(self)), layout=wi.Layout(width=f"{PX//2}px", height=f"{round(PX)*0.95}px"))
        self.button_n = wi.Button(description="next", layout=wi.Layout(width=f"{PX//2}px"))
        self.button_p = wi.Button(description="prev", layout=wi.Layout(width=f"{PX//2}px"))
        self.button_s = wi.Button(description="save", layout=wi.Layout(width=f"{PX//2}px"))
        self.button_l = wi.Button(description="load", layout=wi.Layout(width=f"{PX//2}px"))
        self.status = wi.Output(layout=wi.Layout(width=f"{PX}px"))
        self.output = wi.Output(layout=wi.Layout(width=f"{PX}px"))
        self.controller = wi.VBox([
            wi.HBox([wi.VBox([
                self.button_n,
                self.button_p,
                self.button_s,
                self.button_l,
            ]), self.select]),
            self.status,
            self.output,
        ])
        self.layout = layout = wi.Layout(flex_flow="row wrap")
        self.button_n.on_click(self.on_click_n)
        self.button_p.on_click(self.on_click_p)
        self.button_s.on_click(self.save)
        self.button_l.on_click(self.load)
        self.select.observe(self.on_select, "value")
        super().__init__()
        if self.is_saved():
            self.load()
        else:
            self.refresh()

In [None]:
class Manager(Viewer):
    MANAGED = dict()
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        Manager.MANAGED[kwargs["name"]] = self
    @classmethod
    def allbag(cls):
        retval = set()
        for k, v in cls.MANAGED.items():
            print(k, len(v.bag))
            retval |= set(map(fname2id, v.bag))
        return retval

In [None]:
class TQDMBytesReader(object):
    def __init__(self, fd, **kwargs):
        self.fd = fd
        self.tqdm = tqdm(**kwargs)
    def read(self, size=-1):
        bytes = self.fd.read(size)
        self.tqdm.update(len(bytes))
        return bytes
    def readline(self):
        bytes = self.fd.readline()
        self.tqdm.update(len(bytes))
        return bytes
    def __enter__(self):
        self.tqdm.__enter__()
        return self
    def __exit__(self, *args, **kwargs):
        return self.tqdm.__exit__(*args, **kwargs)
def tqdm_load(fname):
    with open(fname, "rb") as fd:
         total = Path(fname).stat().st_size
         with TQDMBytesReader(fd, total=total) as pbfd:
             up = Unpickler(pbfd)
             obj = up.load()
    return obj