In [2]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import cv2 #deal with images
import matplotlib.pyplot as plt
%matplotlib inline
from tqdm import tqdm
np.random.seed(42)

training = pd.read_csv('../plastic_data/plasticc_train_lightcurves.csv')
meta_training = pd.read_csv("../plastic_data/plasticc_train_metadata.csv")
merged = training.merge(meta_training, on = "object_id")

In [8]:
###recurrent plot

def sigmoid(x):
    '''
    Returns the sigmoid of a value
    '''
    return 1/(1+np.exp(-x))

def R_matrix(signal, eps):
    '''
    Given a time series (signal) and an epsilon,
    return the Recurrent Plot matrix
    '''
    R = np.zeros((signal.shape[0], signal.shape[0]))
    for i in range(R.shape[0]):
        for j in range(R.shape[1]):
            R[i][j] = np.heaviside((eps - abs(signal[i] - signal[j])),1)
    return R

#using sigmoid rather than heaviside
#because in this dataset the epsilon parameter needs to
#change from object to object and therefore should be learned as well
def R_matrix_modified(signal):
    '''
    Given a time series (signal) and an epsilon,
    return the modified Recurrent Plot matrix
    using sigmoid rather than heaviside
    '''
    R = np.zeros((signal.shape[0], signal.shape[0]))
    for i in range(R.shape[0]):
        for j in range(R.shape[1]):
            R[i][j] = sigmoid((abs(signal[i] - signal[j])))
    return R

def create_objects_dict(merged_dataset):
    '''
    Input: dataset containing both training data and metadata
    Creates a dictionary using each object as keys and
    one R matrix for each passband in that object
    '''
    objects = {}
    for obj in tqdm(np.unique(merged_dataset.object_id)):
        R_passbands = []
        for passband in np.unique(merged_dataset.passband):
            obj_flux = merged_dataset[(merged_dataset.object_id == obj) & (merged_dataset.passband == passband)].flux.values
            R_passbands.append(R_matrix_modified(obj_flux))
        objects[obj] = (np.asarray(R_passbands), max(merged_dataset[merged_dataset.object_id == obj].target))
    return objects

def get_minmax_shapes(obj_R_matrices):
    '''
    Given an R matrix, get the min and max width 
    to be used to crop and let all images from a given
    object be of the same size so they can be concatenated
    '''
    min_length = 0
    max_length = 0
    for passband in np.unique(merged.passband):
        if passband == 0:
            length = len(obj_R_matrices[passband])
            min_length = length
            max_length = length
        else:
            length = len(obj_R_matrices[passband])
            min_length = min(min_length, length)
            max_length = max(max_length, length)
    return (min_length, max_length)

def crop_obj_plots(objects):
    '''
    Accepts a dictionary where each key is a different object
    and each value is a tuple - one slot with a list of R matrices and 
    the other with the target value (object class)
    '''
    for obj in tqdm(objects.keys()):
        min_len, max_len = get_minmax_shapes(objects[obj][0])
        for passband in np.unique(merged.passband):
            objects[obj][0][passband] = objects[obj][0][passband][:min_len, :min_len]
    return objects

objects = create_objects_dict(merged[merged['object_id']==615])
cropped_objects = crop_obj_plots(objects)

100%|██████████| 1/1 [00:00<00:00, 16.25it/s]
100%|██████████| 1/1 [00:00<00:00, 17.02it/s]


In [15]:
cropped_objects[615][0][5]

array([[0.5       , 1.        , 1.        , ..., 0.99999989, 1.        ,
        1.        ],
       [1.        , 0.5       , 0.997301  , ..., 1.        , 1.        ,
        1.        ],
       [1.        , 0.997301  , 0.5       , ..., 1.        , 1.        ,
        1.        ],
       ...,
       [0.99999989, 1.        , 1.        , ..., 0.5       , 1.        ,
        1.        ],
       [1.        , 1.        , 1.        , ..., 1.        , 0.5       ,
        1.        ],
       [1.        , 1.        , 1.        , ..., 1.        , 1.        ,
        0.5       ]])