# Creating a data set for the comparison with Huppenkothen+2017

Main classificatin experiments described in paper Orwat-Kapola+2021 were not directly comparable with the work of Daniela Huppenkothen, because the former classified whole light curves instead of 1024 s segments, and the cadence of the data was 1s/4s instead of 0.125 s.

Here we prepare a data set of 1024 s overlapping segments which are be further segmented into 16 s segments with cadence of 0.125s to make a direct comparison with Huppenkothen+2017.

In order to reduce the amount of generated data, the 1024 s segments created with a stride of 256 s and the 16 s segments are created with a stride of 16 s. The fact that light curve features are observed in only one phase shift position within those 16 s segments can affect the result. 

In [22]:
%load_ext autoreload
%autoreload 2

import sys; sys.path.insert(0, '../../') # add parent folder path where /src folder is
from src import data_preprocessing

from IPython.display import clear_output
import os
import fnmatch
import numpy as np
# import pickle
# import matplotlib.pyplot as plt

raw_data_dir = "../../../data_GRS1915/std1/" # directory path to where lightcurve files are located
raw_file_name_suffix = "_std1_lc.txt" # light curves were saved as txt files which are directly interpretable by numpy

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [8]:
# load light curves from text files

lcs=[] # light curves (time stamps, count rate, uncertainty)
lc_ids=[] # observation ids

for root, dirnames, filenames in os.walk(raw_data_dir): #Std1_PCU2
    for filename in fnmatch.filter(filenames, "*{}".format(raw_file_name_suffix)):
        lc = os.path.join(root, filename)
        lc_ids.append(filename.split("_")[0])
        f=np.loadtxt(lc)
        f=np.transpose(f)
        lcs.append(f)
        print("Loaded {} lightcurves".format(len(lcs)))
        clear_output(wait=True)

Loaded 1776 lightcurves


In [63]:
# segmentation of light curves

segments_counts=[]
segments_times = []
segments_errors=[]
seg_ids=[]

for lc_index, lc in enumerate(lcs):
    segments = data_preprocessing.segmentation(time_series = lc, 
                                       segment_length_sec = 1024, 
                                       stride_sec = 256, 
                                       keep_time_stamps = True, 
                                       input_cadence_sec = 0.125)
    if len(segments) > 0:
        segments_times.append(segments[:,0,:])
        segments_counts.append(segments[:,1,:])
        segments_errors.append(segments[:,2,:])
        for seg_index, seg in enumerate(segments):
            seg_ids.append(lc_ids[lc_index]+"_{}".format(seg_index))
            
    clear_output(wait=True)
    print("Processed {}/{} light curves.".format(lc_index+1, len(lcs)))
print("Successfully segmented {} light curves.".format(len(segments_times)))

segments_counts=np.vstack(segments_counts)
segments_errors=np.vstack(segments_errors)
segments_counts = np.expand_dims(segments_counts, axis=-1)
segments_errors = np.expand_dims(segments_errors, axis=-1)

print("Prepared {} segments.".format(len(segments_counts)))

Processed 1776/1776 light curves.
Successfully segmented 1427 light curves.
Prepared 10614 segments.


# segmentation of 1024 second segments

In [70]:
len(lcs[0][0])

11879

In [None]:
for gap in gaps

In [71]:
lcs[1][0][679]

90601720.25343166

In [73]:
lcs[1][0][680]

90601722.37843166

In [78]:
potential_lcs = 0

for lc in lcs:
    dts = lc[0][1:]-lc[0][:-1]
    gaps = np.where(dts>0.125)[0]
    if len(gaps) == 0 and len(lc[0])>=1024/0.125:
        potential_lcs+=1
        continue
    else:
        for gap_ind, gap in enumerate(gaps):
            if gap_ind == 0:
                gti_len =  gap/0.125
                if gti_len>=1024:
                    potential_lcs+=1
                    break
            else:
                gti_len = (gap-(gaps[gap_ind-1]+1))/0.125
                if gti_len>=1024:
                    potential_lcs+=1
                    break

In [79]:
potential_lcs

1484

In [76]:
len(lcs)

1776