In [1]:
from sklearn import preprocessing
import numpy as np
import pandas as pd

import manip

In [2]:
data, basenames = manip.loader("./data/")
data, basenames = manip.drop_nan(data, basenames)

In [3]:
mee_data = data[:, :3]

In [4]:
preprocessors = [
    # Eccentricity    -> (0, inf)
    # Inclination     -> [0, 2*pi)
    # Semi-major axis -> (~6.371e6, inf)

    preprocessing.RobustScaler,
    preprocessing.MinMaxScaler,
    preprocessing.RobustScaler,
]

datasets = [mee_data[:, i].reshape(-1, 1) for i in range(3)]

scaling_sets = [
    datasets[0],
    np.linspace(0, 2*np.pi, 2).reshape(-1, 1),
    datasets[2]
]

scalers = [scaler().fit(ds) for scaler, ds in zip(preprocessors, scaling_sets)]

transformed_datasets = [scaler.transform(ds) for scaler, ds in zip(scalers, datasets)]
transformed_datasets = np.column_stack(transformed_datasets)

In [5]:
pairs = manip.generate_pairs(transformed_datasets, basenames)
manip.shuffle_balance_data(pairs)

array([[-0.54300064,  0.14800441,  0.33895239, ...,  0.14771397,
         0.39723575,  1.        ],
       [-0.56580824,  0.14854096, -0.46839687, ...,  0.14732344,
        -0.5653497 ,  1.        ],
       [-0.09247816,  0.14781241, -1.06465638, ...,  0.14773022,
        -0.95827395,  1.        ],
       ...,
       [ 1.39647806,  0.14771749,  0.76291037, ...,  0.14739808,
        -0.99278694,  0.        ],
       [-0.79843915,  0.14809084,  0.33769614, ...,  0.14659168,
        -0.32969859,  0.        ],
       [-0.19812681,  0.14792947, -0.16265993, ...,  0.11987447,
         0.59077954,  0.        ]])