In [1]:
import sys
sys.path.append("../")
sys.path.append("../../")

import numpy as np
from src.config import FilterConfig, Config, DataConfig, FourierDatasetConfig, PACKAGE_PATH
from src.nn.datasets.utils import split_object_data_to_test_validation
from src.data.data_load import load_data
from src.data.filters import filter_data
from src.experiments.constants import *
import torch
import numpy as np

In [2]:
data_config = DataConfig(
    path=f"{PACKAGE_PATH}/resources/Fall_2021_R_B_globalstar.csv",
    labels=["cz_3", "falcon_9", "atlas",  "h2a", "globalstar"],
    regexes=[r'CZ-3B.*', r'FALCON_9.*', r'ATLAS_[5|V]_CENTAUR_R\|B$',  r'H-2A.*', r'GLOBALSTAR.*'],
    convert_to_mag=False,
    batch_size=BATCH_SIZE,
    number_of_training_examples_per_class = MAX_EXAMPLES,
    validation_split = 0.1,
    dataset_class="FourierDataset",
    dataset_arguments={},
    filter=FilterConfig(
        n_bins= 30,
        n_gaps= 10,
        gap_size= 5, 
        rms_ratio= 0.,
        non_zero_ratio=0.8
    )
)

In [3]:
data = load_data(data_config.path, data_config.labels, data_config.regexes, data_config.convert_to_mag)
filtered_data = filter_data(data, data_config.filter)

In [4]:
def split_data_to_test_validation(data, labels, k, split=0.1):
    X_train, X_val = None, None
    Y_train, Y_val = None, None
    train_objects, val_objects = [], []
    for i, label in enumerate(labels):
        obj_train, obj_val = split_object_data_to_test_validation(data, label, k, split)
        print(f"\n{label:15}: {len(obj_train):5} training examples, {len(obj_val):5} validation examples")
        
        if X_train is None:
            X_train = obj_train
            X_val = obj_val
            Y_train = np.array([i]*len(obj_train))
            Y_val = np.array([i]*len(obj_val))
            train_objects.append((obj_train, i))
        else:
            X_train = np.concatenate((X_train, obj_train))
            X_val = np.concatenate((X_val, obj_val))
            Y_train = np.concatenate((Y_train, np.array([i]*len(obj_train))))
            Y_val = np.concatenate((Y_val, np.array([i]*len(obj_val))))
            val_objects.append((obj_val, i))

    id_train = np.random.permutation(len(X_train))
    id_val = np.random.permutation(len(X_val))

    X_train, Y_train = X_train[id_train], Y_train[id_train]
    X_val, Y_val = X_val[id_val], Y_val[id_val]

    return (X_train, Y_train), (X_val, Y_val), train_objects, val_objects