In [1]:
## Tests and Utilities to verify/query the per weld dataset

In [2]:
import numpy as np
import os
from utils import parse_labelfile, parse_dataset_file
import csv

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [3]:
## Test 1: 
## Check that all in the training set has corresponding weld point clouds and label files
## Specifically: 
## Count the number of point cloud and label files for matching number, and identify anomalies
## Check the counts against the expected counts according to the original label file and identify the discrepancy
## 
## 

In [4]:
#################
# ON TRAINING SET
#################

# root_dir = "C:/Users/KZTYLF/Documents/playground/GNN UIs/GNN InstanceSegmentation/Recreating Dataset/"
# set_text = root_dir+"_data/train.txt"
# set_weld_pcs = root_dir+"locationing_dataset/train/point_clouds/"
# set_weld_labels = root_dir+"locationing_dataset/train/labels/"
# set_root = root_dir+"locationing_dataset/train/"

#################
# ON TEST SET
#################

root_dir = "C:/Users/KZTYLF/Documents/playground/GNN UIs/GNN InstanceSegmentation/Recreating Dataset/"
set_text = root_dir+"_data/test.txt"
set_weld_pcs = root_dir+"locationing_dataset/test/point_clouds/"
set_weld_labels = root_dir+"locationing_dataset/test/labels/"
set_root = root_dir+"locationing_dataset/test/"

In [5]:
## Counting Samples from the set list
set_samples = parse_dataset_file(set_text)
print("Original set samples count:", len(set_samples))
print()

## To verify point clouds and label file counts match
print("Checking counts of weld pointclouds and labels")
set_weld_labels = os.listdir(set_weld_labels)
set_weld_pcs = os.listdir(set_weld_pcs)
print("Labels: ", len(set_weld_labels))
print("Point Clouds: ", len(set_weld_pcs))
print()

Original set samples count: 121

Checking counts of weld pointclouds and labels
Labels:  526
Point Clouds:  526



In [6]:
print("Checking respective counts generated against the expected number as itemized in orginal label file, and identifying anomalies...")

label_and_pc_anomalies = []
vs_orig_anomalies = []
vs_orig_anomalies_expanded = []

## Looping through each Sample
for i, sample in enumerate(set_samples):
    tag = sample['name']
    
    ## Expected number as itemized in orginal label file
    orig_count = len(parse_labelfile(sample["label_path"])['welds'])

    ## Looping through each file of generaed labels
    ## Looping through each file of generated point clouds 
    ## Counting common source
    
    label_count = 0
    pc_count = 0
    
    for label_filename in set_weld_labels:
        if tag in label_filename:
            label_count += 1
            
    for pc_filename in set_weld_pcs:
        if tag in pc_filename:
            pc_count += 1
            
    ## Check label counts and point cloud counts
    ## Collecting anomolous pairings
    if label_count != pc_count:
        label_and_pc_anomalies.append(tag)
        
    ## Check label counts and original label counts
    ## Collecting anomolous pairings
    if label_count != orig_count:
        vs_orig_anomalies.append(tag)
        vs_orig_anomalies_expanded.append([tag, orig_count, label_count, pc_count])
        print(tag, orig_count, label_count, pc_count)

## Summary Prints
print("label_and_pc_anomalies count: ", len(label_and_pc_anomalies))
print("vs_orig_anomalies count: ", len(vs_orig_anomalies))

Checking respective counts generated against the expected number as itemized in orginal label file, and identifying anomalies...
LH-3-231201605-Pass-2023_06_12-10-46-06-374 6 4 4
LH-2-231201596-Pass-2023_06_12-9-22-40-303 4 2 2
LH-6-231201595-Pass-2023_06_12-9-21-22-515 4 2 2
LH-3-231201600-Pass-2023_06_12-9-38-37-588 6 4 4
LH-3-231201597-Pass-2023_06_12-9-27-19-422 6 4 4
RH-7-231201596-Fail-2023_06_13-12-19-11-036 6 4 4
LH-6-231201598-Pass-2023_06_12-9-30-40-570 4 2 2
LH-2-231201599-Pass-2023_06_12-9-34-34-210 4 2 2
LH-3-231201596-Pass-2023_06_12-9-22-44-921 6 4 4
LH-6-231201602-Pass-2023_06_12-9-53-25-942 4 2 2
LH-2-231201595-Pass-2023_06_12-9-21-03-886 4 2 2
RH-9-231201591-Pass-2023_06_09-10-51-11-577 4 2 2
RH-2-231201593-Pass-2023_06_09-10-54-31-162 4 3 3
RH-4-231201590-Fail-2023_06_09-10-48-34-421 6 3 3
RH-7-231201616-Fail-2023_06_12-12-14-28-799 6 3 3
RH-2-231201588-Pass-2023_06_09-10-44-32-644 4 3 3
RH-8-231201592-Pass-2023_06_09-10-53-26-003 4 2 2
LH-6-231201603-Fail-2023_06_13

In [7]:
## Sorting anomolies by the tag name befor writing to CSV
vs_orig_anomalies_expanded = sorted(vs_orig_anomalies_expanded, key=lambda vs_orig_anomalies_expanded: vs_orig_anomalies_expanded[0])

# Outputting to csv
with open(set_root+"anomalies.csv", "w") as csvfile:
    _writer = csv.writer(csvfile)
    _writer.writerow(['tag', 'orig_count', 'label_count', 'pc_count'])
    
    for i in vs_orig_anomalies_expanded:
        _writer.writerow(i)