# Experiment Template Notebook

In [6]:
import os
from pathlib import Path
import random
from pprint import pprint
import json
import pandas as pd

from exp_utils import PointnetPath, JumpDir, PointCloudTable

path_obj = PointnetPath("template")
with JumpDir(path_obj.PN_REPO, path_obj.HERE):
    import train_partseg as train
    import test_partseg as test

_ = [
    print(att,": ", getattr(path_obj, att))
    for att in dir(path_obj)
    if not att.startswith("__")
    and att != "where_am_i"
]
print(f"train_partseg.ROOT_DIR: {train.ROOT_DIR}")
print(f"test_partseg.ROOT_DIR: {test.ROOT_DIR}")

DATA_ROOT :  /home/kuwaharah436/Documents/paprika-paper-2024/qwa-work/data
HERE :  /home/kuwaharah436/Documents/paprika-paper-2024/qwa-work/pointnet-pytorch-jupyter/experiments
PN_REPO :  /home/kuwaharah436/Documents/paprika-paper-2024/qwa-work/pointnet-pytorch-jupyter
QW_REPO :  /home/kuwaharah436/Documents/paprika-paper-2024/qwa-work
data_dir :  /home/kuwaharah436/Documents/paprika-paper-2024/qwa-work/pointnet-pytorch-jupyter/data/template
log_dir :  template
log_root :  /home/kuwaharah436/Documents/paprika-paper-2024/qwa-work/data/pointnet-log
train_partseg.ROOT_DIR: /home/kuwaharah436/Documents/paprika-paper-2024/qwa-work/pointnet-pytorch-jupyter
test_partseg.ROOT_DIR: /home/kuwaharah436/Documents/paprika-paper-2024/qwa-work/pointnet-pytorch-jupyter


## Data Prep

- `qwa-work/`
  - `data/`
    - `split-data/`: from here
  - `pointnet-pytorch-jupyter/`
    - `data/`
      - `template/`: to here
        - `paprika/`
        - `train_test_split/`
        - `synsetoffset2category.txt`

In [2]:
RAW_PATH = path_obj.DATA_ROOT / "split-data"
assert RAW_PATH.exists()
RAW_PATH#, list(RAW_PATH.iterdir())

PosixPath('/home/kuwaharah436/Documents/paprika-paper-2024/qwa-work/data/split-data')

### txt file clumns

| 0 | 1 | 2 | 3 | 4 | 5 | 6 |
|---|---|---|---|---|---|---|
|`x`|`y`|`z`|`nx`|`ny`|`nz`|`label`: 0\|1|

label

- `0`: non-leaf
- `1`: leaf

In [3]:
# Only columns below are left
# Order sensitive; be carefull
export_cols = ["x", "y", "z", "nx", "ny", "nz", "label"]

In [4]:
with open(
    path_obj.DATA_ROOT / "complete-data" / "labels.json", 'r', encoding="utf-8"
) as js_f:
    labels = json.load(js_f)
label_map = {
    labels["marker"]: 0,
    labels["rod"]   : 0,
    labels["stem"]  : 0,
    labels["fruit"] : 0,
    labels["leaves"]: 1,
}
pprint(labels)
pprint(label_map)

{'fruit': 4, 'leaves': 2, 'leaves-fruit': 24, 'marker': 0, 'rod': 1, 'stem': 3}
{0: 0, 1: 0, 2: 1, 3: 0, 4: 0}


### Read and Write Data

### labels

In [15]:
with open(
    path_obj.DATA_ROOT / "complete-data" / "labels.json", "r", encoding="utf-8"
) as js_f:
    labels = json.load(js_f)
pprint(labels)

{'fruit': 4, 'leaves': 2, 'leaves-fruit': 24, 'marker': 0, 'rod': 1, 'stem': 3}


RAW_PATH / paprika id

In [18]:
raw_pprkid = list(RAW_PATH.iterdir())
pprint(raw_pprkid)

[PosixPath('/home/kuwaharah436/Documents/paprika-paper-2024/qwa-work/data/split-data/Nagano_0209_2'),
 PosixPath('/home/kuwaharah436/Documents/paprika-paper-2024/qwa-work/data/split-data/Nagano_0316_2'),
 PosixPath('/home/kuwaharah436/Documents/paprika-paper-2024/qwa-work/data/split-data/Artega_0309_1'),
 PosixPath('/home/kuwaharah436/Documents/paprika-paper-2024/qwa-work/data/split-data/Nagano_0309_2'),
 PosixPath('/home/kuwaharah436/Documents/paprika-paper-2024/qwa-work/data/split-data/Nesditt_0316_2'),
 PosixPath('/home/kuwaharah436/Documents/paprika-paper-2024/qwa-work/data/split-data/Nesditt_0209_2'),
 PosixPath('/home/kuwaharah436/Documents/paprika-paper-2024/qwa-work/data/split-data/Trirosso_0209_2'),
 PosixPath('/home/kuwaharah436/Documents/paprika-paper-2024/qwa-work/data/split-data/Nesditt_0316_1'),
 PosixPath('/home/kuwaharah436/Documents/paprika-paper-2024/qwa-work/data/split-data/Trirosso_0309_1'),
 PosixPath('/home/kuwaharah436/Documents/paprika-paper-2024/qwa-work/data/s

In [None]:
txt_list = [] # qwa-work/data/split-data/paprika/
for pprk_path in path_obj.data_dir:
    txt_list = txt_lit

In [5]:
raw_file_paths = [] # qwa-work/data/split-data/<paprika id>/_*.txt
for pprk_dir in RAW_PATH.iterdir():
    raw_file_paths = raw_file_paths + list(pprk_dir.iterdir())
#raw_file_paths[:10]

In [12]:
skip_if_exists = False
for f_path in raw_file_paths:
    # skip json
    if f_path.suffix != ".txt":
        continue

    # skip if already exists
    out_path = path_obj.data_dir / "paprika" / (
        f_path.parent.name + f_path.name
    )
    if skip_if_exists and out_path.exists():
        continue

    # read and prep data
    raw_df = pd.read_table(f_path, header=0, sep=" ")
    cleaned_df = PointCloudTable.prep(
        raw_df, export_cols, label_map
    )
    if cleaned_df is None:
        continue

    # write
    out_path.parent.mkdir(exist_ok=True, parents=True)
    cleaned_df.to_csv(
        out_path, sep=" ", header=False, index=False
    )
#cleaned_df

### Train Test Validation split

In [13]:
train_ratio = 0.7
test_ratio  = 0.2
val_ratio   = 0.1

ratio_sum = (train_ratio + test_ratio + val_ratio)
train_ratio = train_ratio / ratio_sum
test_ratio = test_ratio / ratio_sum
val_ratio = val_ratio / ratio_sum
train_ratio, test_ratio, val_ratio

(0.7000000000000001, 0.20000000000000004, 0.10000000000000002)

In [14]:
train_data = []
test_data = []
val_data = []

txt_ls = os.listdir(path_obj.data_dir / "paprika")
txt_ls = [
    "shape_data/paprika/" + fname.strip(".tx") for fname in txt_ls
]
random.shuffle(txt_ls)
ls_len = len(txt_ls)
train_test_idx = round(ls_len * train_ratio)
test_val_idx = train_test_idx + round(ls_len * test_ratio)
train_data = train_data + txt_ls[:train_test_idx - 1]
test_data = test_data + txt_ls[train_test_idx:test_val_idx - 1]
val_data = val_data + txt_ls[test_val_idx:]
len(train_data), len(test_data), len(val_data)

(3426, 978, 490)

In [15]:
train_data[:3], test_data[:3], val_data[:3]

(['shape_data/paprika/Nagano_0209_1_361',
  'shape_data/paprika/Artega_0309_1_240',
  'shape_data/paprika/Nagano_0309_2_302'],
 ['shape_data/paprika/Artega_0209_2_319',
  'shape_data/paprika/Nesditt_0309_3_157',
  'shape_data/paprika/Nesditt_0316_1_176'],
 ['shape_data/paprika/Nesditt_0316_2_477',
  'shape_data/paprika/Artega_0309_1_345',
  'shape_data/paprika/Artega_0309_1_380'])

In [16]:
split_dir = path_obj.data_dir / "train_test_split"
split_dir.mkdir(exist_ok=True, parents=False)
with open(split_dir / "shuffled_train_file_list.json", "w", encoding="utf-8") as f:
    json.dump(train_data, f, indent=4)
with open(split_dir / "shuffled_test_file_list.json", "w", encoding="utf-8") as f:
    json.dump(test_data, f, indent=4)
with open(split_dir / "shuffled_val_file_list.json", "w", encoding="utf-8") as f:
    json.dump(val_data, f, indent=4)

In [17]:
with open(path_obj.data_dir / "synsetoffset2category.txt", "w", encoding="utf-8") as f:
    f.write("paprika    paprika")

## Parameters

In [None]:
# check gpu usage to set gpu_idx
!nvidia-smi

In [2]:
gpu_idx = "6" # str

seg_classes = {
    "paprika": [0, 1],
    # 0: non-leaves
    # 1: leaves

    # padding for 2:49
    'Earphone'  : [16, 17, 18],
    'Motorbike' : [30, 31, 32, 33, 34, 35],
    'Rocket'    : [41, 42, 43],
    'Car'       : [8, 9, 10, 11],
    'Laptop'    : [28, 29],
    'Cap'       : [6, 7],
    'Skateboard': [44, 45, 46],
    'Mug'       : [36, 37],
    'Guitar'    : [19, 20, 21],
    'Bag'       : [2, 3, 4, 5],
    'Lamp'      : [24, 25, 26, 27],
    'Table'     : [47, 48, 49],
    'Pistol'    : [38, 39, 40],
    'Chair'     : [12, 13, 14, 15],
    'Knife'     : [22, 23]
}

train_args = {
    # model params
    "model"     : "pointnet2_part_seg_msg",
    # "model"    : "pointnet2_part_seg_ssg",

    # data params
    "normal"    : True,
    "log_root"  : path_obj.log_root,
    "log_dir"   : path_obj.log_dir,
    "data_dir"  : path_obj.data_dir,

    # training params
    "gpu"       : gpu_idx,
    # "npoint"    : 2048,
    # "batch_size": 16,
    # "decay_rate": 1e-4,
    # "step_size" : 20,
    # "lr_decay"  : 0.5,
    #"epoch"      : 500,
    "epoch"     : 1000,
    # "optimizer" : "Adam",
}
test_args = {
    # data params
    "normal"    : True,
    "log_root"  : path_obj.log_root,
    "log_dir"   : path_obj.log_dir,
    "data_dir"  : path_obj.data_dir,

    # testing params
    "gpu"       : gpu_idx,
    # "num_points": 2048,
    # "batch_size": 24,
    # "num_votes" : 3,
}

## Training

In [None]:
train.main(
    train.CommandLineArgs(**train_args),
    seg_classes
)

## Testing

In [None]:
(
    test_metrics,
    shape_ious,
    total_correct_class,
    total_seen_class
) = test.main(
    test.CommandLineArgs(**test_args),
    seg_classes
)

In [8]:
test_metrics, shape_ious["paprika"]

({'accuracy': np.float64(0.8492094636694786),
  'class_avg_accuracy': np.float64(nan),
  'class_avg_iou': np.float64(nan),
  'instance_avg_iou': np.float64(0.6821685047558276)},
 np.float64(0.6821685047558276))

In [9]:
seg_ids = [seg_id for seg_val_sublist in seg_classes.values() for seg_id in seg_val_sublist]
seg_correct = dict(zip(range(len(seg_ids)), total_correct_class))
seg_total = dict(zip(range(len(seg_ids)), total_seen_class))

seg_acc = {}
for id, correct_n in seg_correct.items():
    total_n = seg_total[id]
    if total_n == 0:
        seg_acc[id] = 0
    else:
        seg_acc[id] = correct_n / total_n
# print(seg_acc)

seg_class_acc = {}
for cat in seg_classes:
    seg_class_acc[cat] = {}
    for id in seg_classes[cat]:
        seg_class_acc[cat][id] = seg_acc[id]

pprint(seg_class_acc["paprika"])

{0: np.float64(0.6697862220942594), 1: np.float64(0.9504354508196722)}
