# Blazars features extractions

In [3]:
import glob

import numpy as np

import feets
from feets import preprocess

import pandas as pd

In [4]:
def sort_lc(time, magnitude, error):
    order = np.argsort(time)
    return time[order], magnitude[order], error[order]

In [5]:
data = {}
for fname in glob.glob("blazars/*.dat"):
    df = pd.read_table(fname, skiprows=9, sep=",", names=["time", "magnitude", "error"])
    print(fname)
    print("FULL_SIZE", df.shape)
    print(df.sample(5))
    
    
    as_dict = {
        "time": df.time.values,
        "magnitude": df.magnitude.values,
        "error": df.error.values}
    clean_dict = dict(zip(as_dict.keys(), sort_lc(*preprocess.remove_noise(**as_dict))))
    
    print("CLEANED", clean_dict["time"].shape)
    
    data[fname] = clean_dict
    print("****************")

blazars/bla1717.dat
FULL_SIZE (64, 3)
         time  magnitude     error
7   55842.043  14.569349  0.029619
30  56159.018  14.711135  0.029506
40  56486.036  14.569436  0.027647
51  56514.039  14.493076  0.029762
29  56146.128  14.802676  0.033964
CLEANED (63,)
****************
blazars/bla1802.dat
FULL_SIZE (49, 3)
         time  magnitude     error
2   55374.346  14.865431  0.024841
34  56191.067  13.555287  0.006420
28  56153.001  13.861944  0.008583
24  56137.126  13.880656  0.007595
40  56381.356  14.786199  0.018592
CLEANED (49,)
****************
blazars/bla1823.dat
FULL_SIZE (44, 3)
         time  magnitude     error
3   56008.390  13.077231  0.004562
20  56378.361  12.853883  0.004052
23  56369.411  12.843698  0.003832
35  56134.149  13.125030  0.004709
7   56191.057  13.048705  0.004435
CLEANED (44,)
****************
blazars/bla1731.dat
FULL_SIZE (50, 3)
         time  magnitude     error
33  56364.296  13.323221  0.021055
47  56202.014  13.342775  0.018582
48  55829.032  13.19

  This is separate from the ipykernel package so we can avoid doing imports until


In [6]:
fs = feets.FeatureSpace(data=["time", "magnitude", "magnitude_error"], exclude=["AndersonDarling"])

In [8]:
feats = {}
for k, v in data.items():
    features, values = fs.extract(**v)
    fdict = dict(zip(features, values))
    fdict = {k: v for k, v in fdict.items() if "_dt_" not in k and "Signature_" not in k}
    feats[k] = fdict


In [9]:
pd.DataFrame(feats).to_csv("blazars/bla_features.csv")

In [12]:
fdict.keys()

dict_keys(['Amplitude', 'Autocor_length', 'Con', 'Eta_e', 'FluxPercentileRatioMid20', 'FluxPercentileRatioMid35', 'FluxPercentileRatioMid50', 'FluxPercentileRatioMid65', 'FluxPercentileRatioMid80', 'Freq1_harmonics_amplitude_0', 'Freq1_harmonics_amplitude_1', 'Freq1_harmonics_amplitude_2', 'Freq1_harmonics_amplitude_3', 'Freq1_harmonics_rel_phase_0', 'Freq1_harmonics_rel_phase_1', 'Freq1_harmonics_rel_phase_2', 'Freq1_harmonics_rel_phase_3', 'Freq2_harmonics_amplitude_0', 'Freq2_harmonics_amplitude_1', 'Freq2_harmonics_amplitude_2', 'Freq2_harmonics_amplitude_3', 'Freq2_harmonics_rel_phase_0', 'Freq2_harmonics_rel_phase_1', 'Freq2_harmonics_rel_phase_2', 'Freq2_harmonics_rel_phase_3', 'Freq3_harmonics_amplitude_0', 'Freq3_harmonics_amplitude_1', 'Freq3_harmonics_amplitude_2', 'Freq3_harmonics_amplitude_3', 'Freq3_harmonics_rel_phase_0', 'Freq3_harmonics_rel_phase_1', 'Freq3_harmonics_rel_phase_2', 'Freq3_harmonics_rel_phase_3', 'Gskew', 'LinearTrend', 'MaxSlope', 'Mean', 'Meanvariance'