# Blazars features extractions

In [39]:
import glob
import pickle

import numpy as np

from lib import feets_patch

from PyAstronomy import pyasl

import feets
from feets import preprocess

import pandas as pd

In [2]:
def sort_lc(time, magnitude, error):
    order = np.argsort(time)
    return time[order], magnitude[order], error[order]

In [41]:
data = {}
for fname in glob.glob("blazars/*.dat"):
    
    df = pd.read_table(fname, skiprows=9, sep=",", names=["time", "magnitude", "error"])
    ra, dec = map(float, open(fname).read().splitlines()[2].split()[1:3])
    
    df["time"] = df.time.apply(lambda mjd: pyasl.helio_jd(mjd, ra, dec))
    
    print(fname)
    print("FULL_SIZE", df.shape)
    print(df.sample(5))
    
    as_dict = {
        "time": df.time.values,
        "magnitude": df.magnitude.values,
        "error": df.error.values}
    clean_dict = dict(zip(as_dict.keys(), sort_lc(*preprocess.remove_noise(**as_dict))))
    
    print("CLEANED", clean_dict["time"].shape)
    
    data[fname] = clean_dict
    print("****************")

blazars/bla1823.dat
FULL_SIZE (44, 3)
            time  magnitude     error
41  56363.394673  12.846973  0.004008
17  56114.132684  13.124581  0.004680
6   56091.121596  13.137490  0.005736
42  56161.118378  13.128125  0.004547
15  56229.013159  13.015234  0.005331
CLEANED (44,)
****************
blazars/bla1802.dat
FULL_SIZE (49, 3)
            time  magnitude     error
39  56371.394876  14.951365  0.021535
17  56104.112624  14.869695  0.024889
13  56020.403231  14.411769  0.014018
32  56171.166103  13.926595  0.011284
9   55797.193845  15.859229  0.056659
CLEANED (49,)
****************
blazars/bla1731.dat
FULL_SIZE (50, 3)
            time  magnitude     error
19  56155.990905  13.309289  0.022411
13  56152.137226  13.388330  0.020749
33  56364.295834  13.323221  0.021055
18  55688.347556  15.295814  0.037652
40  55803.128797  13.387571  0.020284
CLEANED (50,)
****************
blazars/bla1717.dat
FULL_SIZE (64, 3)
            time  magnitude     error
16  56120.117073  14.696898  0.02

  after removing the cwd from sys.path.


In [42]:
with open("lc/lc.pkl", "wb") as fp:
    pickle.dump(data, fp)

In [43]:
fs = feets.FeatureSpace(data=["time", "magnitude", "error"], exclude=["AndersonDarling", "StetsonK"])

In [44]:
feats = {}
for k, v in data.items():
    features, values = fs.extract(**v)
    fdict = dict(zip(features, values))
    fdict = {k: v for k, v in fdict.items() if "_dt_" not in k and "Signature_" not in k}
    feats[k] = fdict




In [45]:
pd.DataFrame(feats).to_csv("blazars/bla_features.csv")

In [47]:
feats["blazars/bla1802.dat"]

{'Amplitude': 1.8001639999999997,
 'Autocor_length': 7.0,
 'Beyond1Std': 0.32653061224489793,
 'CAR_mean': 2278.572226838228,
 'CAR_sigma': 15.37745563259751,
 'CAR_tau': 0.006534193116117852,
 'Con': 0.0,
 'Eta_e': 4988604.270370119,
 'FluxPercentileRatioMid20': 0.15257878875122935,
 'FluxPercentileRatioMid35': 0.3470049710134725,
 'FluxPercentileRatioMid50': 0.4466070600826943,
 'FluxPercentileRatioMid65': 0.4751106020292483,
 'FluxPercentileRatioMid80': 0.5977872816433191,
 'Freq1_harmonics_amplitude_0': 1.1754918692083474,
 'Freq1_harmonics_amplitude_1': 0.5517090351552694,
 'Freq1_harmonics_amplitude_2': 0.5052430096899357,
 'Freq1_harmonics_amplitude_3': 0.2817840232910497,
 'Freq1_harmonics_rel_phase_0': 0.0,
 'Freq1_harmonics_rel_phase_1': -1.4220826507147462,
 'Freq1_harmonics_rel_phase_2': -2.1765044381132137,
 'Freq1_harmonics_rel_phase_3': 0.5466743455512325,
 'Freq2_harmonics_amplitude_0': 0.5841432899785339,
 'Freq2_harmonics_amplitude_1': 0.17401119580703514,
 'Freq2_har