# Data loader

Data loader will load .npy data for machine learning purposes

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
import glob
import pandas as pd
import os


In [2]:
DATA_FOLDER = "d:/MMT_BD_data/500"

In [3]:
folders = list(glob.iglob(f"{DATA_FOLDER}/*"))

In [11]:
def load_multiArray(filename):
    with open(filename, 'rb') as f:
        fsz = os.fstat(f.fileno()).st_size
        out = np.load(f)
        while f.tell() < fsz:
            out = np.vstack((out, np.load(f)))
    return out
    

In [12]:
for folder in folders:
    folder_name = os.path.split(folder)[1]
    stats = []
    files = list(glob.iglob(f"{folder}/*.npy"))
    for p in tqdm(files, desc=f"Processing {folder_name}: "):
        name = os.path.split(p)[1].split('.')[0]
        data = load_multiArray(p)
        count = len(data)
        n_points = np.sum(data != 0, axis=1)
        if count > 0:
            N = len(data[0])
            n_points = n_points/ N
            stats.append([name, count, np.mean(n_points)*100, np.max(n_points)*100,
                          np.min(n_points)*100, np.sum(n_points > 0.8)/count * 100,
                          np.sum(n_points > 0.5)/count * 100,
                          np.sum(n_points > 0.2)/count * 100])
        else:
            stats.append([name, 0, np.nan, np.nan, np.nan,np.nan,np.nan,np.nan])
    df_stats = pd.DataFrame(stats, columns=["name", "count", "mean %", "max %", "min %", "100-80%", "100-50%", "100-20%"])
    df_stats.to_csv(f"{DATA_FOLDER}/stats_{folder_name}.csv", index=False)

Processing Fall_2021:   0%|          | 0/178 [00:00<?, ?it/s]

Processing Spring_2020:   0%|          | 0/158 [00:00<?, ?it/s]

Processing stats_Fall_2021.csv: : 0it [00:00, ?it/s]

Processing stats_Fall_2021.xlsx: : 0it [00:00, ?it/s]

Processing stats_Spring_2020.csv: : 0it [00:00, ?it/s]

Processing stats_Spring_2020.xlsx: : 0it [00:00, ?it/s]

In [6]:
files

['d:/MMT_BD_data/500\\Fall_2021\\AISAT.npy',
 'd:/MMT_BD_data/500\\Fall_2021\\ANIK.npy',
 'd:/MMT_BD_data/500\\Fall_2021\\APEX.npy',
 'd:/MMT_BD_data/500\\Fall_2021\\AQUA.npy',
 'd:/MMT_BD_data/500\\Fall_2021\\ARABSAT.npy',
 'd:/MMT_BD_data/500\\Fall_2021\\ARASE.npy',
 'd:/MMT_BD_data/500\\Fall_2021\\ARIANE.npy',
 'd:/MMT_BD_data/500\\Fall_2021\\ASC.npy',
 'd:/MMT_BD_data/500\\Fall_2021\\ASIASAT.npy',
 'd:/MMT_BD_data/500\\Fall_2021\\ASTRA.npy',
 'd:/MMT_BD_data/500\\Fall_2021\\ASTRID.npy',
 'd:/MMT_BD_data/500\\Fall_2021\\ATLAS.npy',
 'd:/MMT_BD_data/500\\Fall_2021\\AURORA.npy',
 'd:/MMT_BD_data/500\\Fall_2021\\AVUM.npy',
 'd:/MMT_BD_data/500\\Fall_2021\\AZERSPACE.npy',
 'd:/MMT_BD_data/500\\Fall_2021\\BEIDOU.npy',
 'd:/MMT_BD_data/500\\Fall_2021\\BLOCK.npy',
 'd:/MMT_BD_data/500\\Fall_2021\\BRAZILSAT.npy',
 'd:/MMT_BD_data/500\\Fall_2021\\BREEZE-M.npy',
 'd:/MMT_BD_data/500\\Fall_2021\\CAPELLA-6.npy',
 'd:/MMT_BD_data/500\\Fall_2021\\CFOSAT.npy',
 'd:/MMT_BD_data/500\\Fall_2021\\CHIN