In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import cv2
import random

In [3]:
NUM_SAMPLES = 5000

DS = Path("../../datasets/")
BASE_DS_DIR = DS / "T004-taco-crops"
EXP_BASE = BASE_DS_DIR / PROJECT_CODE
OUTS = BASE_DS_DIR / "synth"
LOG_BASE = EXP_BASE / "log"
TACO_BASE_DIR = Path("/Users/hariomnarang/Desktop/personal/TACO/data/")
ANN_FILE = TACO_BASE_DIR / "annotations.json"
TEST_BIG_IMG = BASE_DS_DIR / "14325.jpeg"

LOG_BASE.mkdir(parents=True, exist_ok=True)
DS.exists(), TACO_BASE_DIR.exists(), ANN_FILE.exists()

NameError: name 'PROJECT_CODE' is not defined

In [4]:
# im assuming hte model is good, we only rename whatever is done

def _is_numeric_dir(d):
    try:
        int(d.name)
        return True
    except:
        return False

def _get_max_num_dir():
    r = []
    for d in BASE_DS_DIR.glob("*"):
        if not d.is_dir():
            continue
        if _is_numeric_dir(d):
            r.append(int(d.name))
    if not r:
        return 1
    return max(r)

def _num_to_dir(n):
    return f"{n:03d}"

def _next_dir_name():
    existing_max = _get_max_num_dir()
    return _num_to_dir(existing_max + 1)

_next_dir_name()


'006'

In [None]:
import shutil
import json
import csv
from typing import Optional

def _dict_to_markdown_table(d):
    keys = list(d.keys())
    vals = list(d.values())

    max_key = max(len(str(k)) for k in keys + ["Key"])
    max_val = max(len(str(v)) for v in vals + ["Value"])

    lines = []
    lines.append(f"| {'Key'.ljust(max_key)} | {'Value'.ljust(max_val)} |")
    lines.append(f"|{'-' * (max_key + 2)}|{'-' * (max_val + 2)}|")

    for k, v in d.items():
        lines.append(f"| {str(k).ljust(max_key)} | {str(v).ljust(max_val)} |")

    return "\n".join(lines)

class ParsedDir:
    def __init__(
        self, orig_path, params
    ):
        self.orig_path = orig_path
        self.params = params

    @classmethod
    def parse(cls, dir_path) -> "ParsedDir":
        res = {}
        parts = dir_path.name.split("-")
        for part in parts:
            try:
                k, v = part.split("=")
            except Exception as ex:
                raise Exception(f"part: {part}") from ex
            res[k] = v
        return cls(dir_path, res)
    
    def get_img_size(self) -> Optional[int]:
        try:
            return int(self.params["FILE_SIZE"])
        except Exception as ex:
            print(f"could not get image size for {self.orig_path} reason={ex}")
            return None

    @property
    def export_pkl(self):
        return self.orig_path / "log" / "export.pkl"

    @property
    def history(self):
        return self.orig_path / "log" / "history.csv"

    @property
    def result_png(self):
        return self.orig_path / "res.png"

    def dump(self, new_dir, dry_run=True):
        dest = self.orig_path / "README.md"
        content = self._get_content_readme()
        print(content)
        print(f"writing to {dest}")
        if not dry_run:
            self._dump_params(self.orig_path / "params.json")
            with open(dest, "a") as f:
                f.write(content)

        new_dir = Path(new_dir)
        print(f"Move: {self.orig_path} -> {new_dir}")
        if not dry_run:
            shutil.move(self.orig_path, new_dir)

    def _get_content_readme(self):
        content = "\n# Params\n\n"
        content += _dict_to_markdown_table(self.params)
    
        if self.history.exists():
            content += "\n\n---"
            content += "\n\n# History"
            content += "\n\n"
            content += csv_to_markdown(self.history)
            content += "\n\n---"
        return content

    def _dump_params(self, path):
        with open(path, "w")as f:
            json.dump(self.params, f)

def csv_to_markdown(path):
    with open(path, newline="", encoding="utf-8") as f:
        rows = list(csv.reader(f))

    if not rows:
        return ""

    cols = len(rows[0])
    widths = [
        max(len(row[i]) if i < len(row) else 0 for row in rows)
        for i in range(cols)
    ]

    def fmt(row):
        return "| " + " | ".join(
            (row[i] if i < len(row) else "").ljust(widths[i])
            for i in range(cols)
        ) + " |"

    header = fmt(rows[0])
    sep = "| " + " | ".join("-" * w for w in widths) + " |"
    body = "\n".join(fmt(r) for r in rows[1:])

    return "\n".join([header, sep, body])

In [44]:
drs = filter(Path.is_dir, BASE_DS_DIR.glob("*"))
drs = filter(lambda d: not _is_numeric_dir(d), drs)
drs = filter(lambda d: d.name.startswith("FINE_TUNE"), drs)
drs = map(ParsedDir.parse, drs)
drs = filter(lambda dr: dr.export_pkl.exists(), drs)
drs = list(drs)

In [45]:
len(drs)

99

In [46]:
# now create res png if it does not exist
from tqdm import tqdm
from mtrain.smallnet.predict import tile_image_and_predict
from fastai.vision.all import load_learner

def make_res_pngs(drs, dry_run=True):
    for dr in tqdm(drs):
        if dr.result_png.exists():
            continue
        sz = dr.get_img_size()
        if dry_run:
            print(f"run inference: {dr.orig_path} size={sz}")
        else:
            learn = load_learner(dr.export_pkl)
            if sz:
                res = tile_image_and_predict(TEST_BIG_IMG, learn, sz)
                plt.imsave(dr.result_png, res)

In [47]:
make_res_pngs(drs, False)

100%|██████████| 99/99 [00:00<00:00, 54874.60it/s]


In [52]:
for dr in drs:
    if not dr.orig_path.exists():
        # already moved
        continue
        
    if dr.result_png.exists() and dr.export_pkl.exists() and dr.history.exists():
        next_dir = BASE_DS_DIR / _next_dir_name()
        dr.dump(next_dir, False)

In [72]:
import json

class DumpedDir:
    def __init__(self, d: Path, params: dict):
        self.d = d
        self.params = params

    def dump_params(self):
        with open(self.d / "params.json", "w") as f:
            json.dump(self.params, f)

    @classmethod
    def parse_markdown(cls, d: Path):
        mkd = d / "README.md"
        if not mkd.exists():
            return None
        with open(mkd) as f:
            params, _ = parse_markdown_params_and_history(f.read())
        return cls(d, params)
        
def parse_markdown_params_and_history(md):
    def parse_table(lines):
        header = [h.strip() for h in lines[0].strip("|").split("|")]
        rows = []
        for line in lines[2:]:  # skip header + separator
            vals = [v.strip() for v in line.strip("|").split("|")]
            rows.append(dict(zip(header, vals)))
        return rows

    lines = [l.rstrip() for l in md.splitlines()]
    tables = []
    current = []

    for line in lines:
        if line.startswith("|"):
            current.append(line)
        else:
            if current:
                tables.append(current)
                current = []
    if current:
        tables.append(current)

    params_rows = parse_table(tables[0])
    history_rows = parse_table(tables[1])

    params = {row["Key"]: row["Value"] for row in params_rows}

    return params, history_rows

In [73]:
dumps = BASE_DS_DIR.glob("*")
dumps = filter(Path.is_dir, dumps)
dumps = filter(_is_numeric_dir, dumps)
dumps = filter(lambda d: int(d.name) >= 6, dumps)
dumps = map(DumpedDir.parse_markdown, dumps)
dumps = list(dumps)

In [74]:
for d in dumps:
    d.dump_params()