In [2]:
import numpy as np
import numpy.ma as ma
import os
import glob
from multiprocessing import Pool

path = "H:\\Yiding\\muti_layer\\2018_true_wide_90-150w_abi_idx\\2018283185613_66324_CS_2B-CLDCLASS-LIDAR_GRANULE_P1_R05_E08_F03.npz"

abi_path = "H:\\Yiding\\muti_layer\\2018_feature_wide_90-150w\\"


def get_chip(data, x, y):
    SIZE = 128, 128
    chip = data[x - 64: x + 64, y - 64: y + 64]
    if chip.shape == SIZE: return chip


def process_file(data):
    primary, secondary = data[0], data[1]

    this_abi, x, y = primary
    all_abi_files = glob.glob(abi_path + this_abi)

    # CHECK FOR VALID x, y COORDINATES
    dim_test_file = np.load(all_abi_files[0])['data']

    MAX_X, MAX_Y = dim_test_file.shape[0] - 64, dim_test_file.shape[1] - 64
    MIN_X, MIN_Y = 0 + 64, 0 + 64

    del dim_test_file

    if (MIN_X > x) or (x > MAX_X) or (MIN_Y > y) or (y > MAX_Y):
        return None

    channels = []
    for file in all_abi_files:
        with np.load(file) as abi:
            if len(abi) == 1:
                c = get_chip(abi['data'], x, y)
                if c is not None: channels.append(c)
            else:  # MASK | 0 == GOOD DATA, 1 == SKIP
                mask = abi['mask']
                if not mask[x][y]:
                    c = get_chip(abi['data'], x, y)
                    if c is not None: channels.append(c)
                else:
                    pass

    if channels:
        SIZE = 128, 128, 16
        chip = np.stack(channels, axis=2)
        if chip.shape == SIZE:
            return chip, secondary

    return None


def process_directory(directory):
    total_chips = 0

    # FOR EACH FILE
    for filename in list(os.listdir(directory))[1:2]:

        # IF FILE IS OF TYPE NUMPY
        if filename.endswith(".npz"):
            # CREATE FULL PATH
            path = os.path.join(directory, filename)

            # LOAD FILE FROM FULL PATH
            with np.load(path) as f:
                # GET NUMBER OF ENTRIES IN FILE
                entries = len(f['abi_file_name'])

                # PRIMARY (USED) DATA
                abi_files, xs, ys = f['abi_file_name'], f['abi_x'], f['abi_y']

                # INDEX
                ID = list(range(entries))

                def get_primary(IDX):
                    return [abi_files[IDX], xs[IDX], ys[IDX]]

                def get_secondary(IDX):
                    output = {}
                    names = ["ID", "x", "y"]
                    for name, each in zip(names, [ID, xs, ys]):
                        output.update({name: each[IDX]})
                    return output

                parameters = [(get_primary(IDX), get_secondary(IDX)) for IDX in range(entries)]

            # USE POOL TO MULTI–THREAD FILE PROCESS
            with Pool(processes=32) as pool:
                results = pool.map(process_file, parameters)

            # FILTER OUT NONE RESULTS
            chips = [result[0] for result in results if result is not None]
            associate_data = [result[1] for result in results if result is not None]

            # SAVE DATA AS NPZ (MUST ALLOW PICKLE)
            np.savez("H:\\chips_valid\\" + filename[0:-4]+"_chips.npz", chips=chips, data=associate_data)

            total_chips += len(chips)
            print(f"PROCESSED {filename}\nGENERATED {len(chips)} CHIPS, WITH {len(associate_data)} ASSOCIATE DATA. TOTAL CHIPS: {total_chips}")
            break


if __name__ == '__main__':
    directory = "H:\\Yiding\\muti_layer\\2018_true_wide_90-150w_abi_idx\\"
    process_directory(directory=directory)

ModuleNotFoundError: No module named 'h5py'

In [3]:
!pip install h5py

Defaulting to user installation because normal site-packages is not writeable
Collecting h5py
  Downloading h5py-3.11.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.3/5.3 MB[0m [31m12.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Installing collected packages: h5py
Successfully installed h5py-3.11.0


In [5]:
import numpy as np
import numpy.ma as ma
import os
import glob
from multiprocessing import Pool
import h5py

In [None]:
path = "H:\\Yiding\\muti_layer\\2018_true_wide_90-150w_abi_idx\\2018283185613_66324_CS_2B-CLDCLASS-LIDAR_GRANULE_P1_R05_E08_F03.npz"

abi_path = "H:\\Yiding\\muti_layer\\2018_feature_wide_90-150w\\"