# Let's bring up how many files do we have

In [2]:
import os
from skelcast.data.prepare_data import get_missing_files, get_skeleton_files, filter_missing

DATA_DIR = os.environ['NTU_RGBD_DATA_DIR']
MISSING_FILES_DIR = '../data/missing'

if not DATA_DIR:
    raise ValueError("NTU_RGBD_DATA_DIR environment variable is not set or empty.")

missing_skel_files = get_missing_files(MISSING_FILES_DIR)
print(f'Missing skeleton files: {len(missing_skel_files)}')
skeleton_files = get_skeleton_files(DATA_DIR)

print(f"Found {len(skeleton_files)} .skeleton files.")

skeleton_files_filtered = filter_missing(skeleton_files=skeleton_files,
                                         missing_skeleton_names=missing_skel_files)

nturgb_d.txt
nturgb_d_120.txt
Missing skeleton files: 837
Found 114480 .skeleton files.
Skeleton files after filtering: 114478 files left.


In [3]:
import numpy as np

from skelcast.data.dataset import read_skeleton_file

mat = read_skeleton_file(file_path=skeleton_files_filtered[884])

mat.get('skel_body0').shape

(23, 25, 3)

In [6]:
def should_blacklist(filename):
    with open(filename, 'r') as file:
        try:
            # Read the first line to get the number of following lines
            num_lines = int(file.readline().strip())
            
            # Read the next 'num_lines' lines
            lines = [file.readline().strip() for _ in range(num_lines)]
            
            # Check if all the lines are '0' and the count matches num_lines
            if all(line == '0' for line in lines) and len(lines) == num_lines:
                return True
            else:
                return False
        
        except ValueError:
            # Handle the case where the first line is not a number
            print(f"Error: The file {filename} does not start with a number.")
            return False
        except Exception as e:
            # Handle other possible exceptions such as file not found, etc.
            print(f"An error occurred: {e}")
            return False
        
blacklisted_files = []
for fname in skeleton_files:
    if should_blacklist(fname):
        print(f'Found file that needs to be blacklisted: {fname}')
        blacklisted_files.append(fname)


Found file that needs to be blacklisted: /home/kaseris/Documents/data_ntu_rbgd/nturgbd_skeletons_s018_to_s032/S031C003P082R002A067.skeleton
Found file that needs to be blacklisted: /home/kaseris/Documents/data_ntu_rbgd/nturgbd_skeletons_s018_to_s032/S027C003P080R002A061.skeleton
Found file that needs to be blacklisted: /home/kaseris/Documents/data_ntu_rbgd/nturgbd_skeletons_s018_to_s032/S020C002P041R001A063.skeleton
Found file that needs to be blacklisted: /home/kaseris/Documents/data_ntu_rbgd/nturgbd_skeletons_s018_to_s032/S031C002P067R001A067.skeleton
Found file that needs to be blacklisted: /home/kaseris/Documents/data_ntu_rbgd/nturgbd_skeletons_s018_to_s032/S026C003P069R002A075.skeleton
Found file that needs to be blacklisted: /home/kaseris/Documents/data_ntu_rbgd/nturgbd_skeletons_s018_to_s032/S022C003P061R002A061.skeleton
Found file that needs to be blacklisted: /home/kaseris/Documents/data_ntu_rbgd/nturgbd_skeletons_s018_to_s032/S020C002P044R001A084.skeleton
Found file that need

In [7]:
len(blacklisted_files)

356

In [8]:
skeleton_filenames = {os.path.splitext(os.path.basename(f))[0] for f in skeleton_files}

# Now check which files from missing_skel_files are in the skeleton_filenames set
contained_files = [f for f in missing_skel_files if f in skeleton_filenames]

In [18]:
with open(skeleton_files[0], 'r') as f:
    data = f.read()

In [21]:
with open('../data/skeletons/sample.txt', 'w') as f:
    f.write(data)