In [10]:
import pandas as pd
from tsfresh import extract_features
from astropy.io import fits as pf
import numpy as np

In [2]:
def to_timeseries(id_, lightcurve):
    this_dataframe = lightcurve.to_pandas()
    this_dataframe.reset_index(drop=True)
    this_dataframe['id'] = id_
    return this_dataframe

def drop_single_value_columns(dataframe):
    return dataframe[dataframe.columns[dataframe.nunique() > 1.0]]

In [3]:
lc_data = pd.read_pickle('/home/miranda/ztf-rapid/data/raw/rcf_marshallc_sncosmo_200114_2018classupdate_addedcv.pkl')

In [4]:
timeseries = [to_timeseries(id_, lightcurve) for id_, lightcurve in lc_data.items()]
timeseries = pd.concat(timeseries)[:300]
timeseries

Unnamed: 0,mjd,band,flux,fluxerr,zp,zpsys,id
0,58676.1823,p48g,0.000000,22.752546,25.0,ab,ZTF19abjrkqn
1,58676.2043,p48r,0.000000,22.752546,25.0,ab,ZTF19abjrkqn
2,58683.2129,p48r,0.000000,24.718949,25.0,ab,ZTF19abjrkqn
3,58683.2411,p48r,0.000000,25.646612,25.0,ab,ZTF19abjrkqn
4,58683.2693,p48g,0.000000,35.402179,25.0,ab,ZTF19abjrkqn
...,...,...,...,...,...,...,...
194,58710.2694,p48r,436.515832,40.204594,25.0,ab,ZTF19abpidqn
195,58710.2698,p48r,416.869383,34.555580,25.0,ab,ZTF19abpidqn
196,58710.2856,p48r,432.513831,39.835996,25.0,ab,ZTF19abpidqn
197,58710.3016,p48r,413.047502,34.238773,25.0,ab,ZTF19abpidqn


In [5]:
y = {id_: lightcurve.meta['classification'] for id_, lightcurve in lc_data.items()}
y = pd.Series(y)
y

ZTF19abjrkqn     SN Ia
ZTF18acdxhus     SN Ia
ZTF19aayjhpg     SN II
ZTF18acbwxgn     SN Ia
ZTF19acaqqng    SN II?
                 ...  
ZTF18accnmri     SN II
ZTF18acbuwcq      None
ZTF18acbvuli      None
ZTF18accpnbj      None
ZTF18aceynvm     SN Ia
Length: 4578, dtype: object

In [6]:
features = extract_features(timeseries, column_id="id", column_sort="mjd", column_value='flux', column_kind='band', n_jobs=4)
features

Feature Extraction: 100%|██████████| 19/19 [00:00<00:00, 37.34it/s]


Unnamed: 0,p48g__variance_larger_than_standard_deviation,p48g__has_duplicate_max,p48g__has_duplicate_min,p48g__has_duplicate,p48g__sum_values,p48g__abs_energy,p48g__mean_abs_change,p48g__mean_change,p48g__mean_second_derivative_central,p48g__median,...,uvw2__fourier_entropy__bins_2,uvw2__fourier_entropy__bins_3,uvw2__fourier_entropy__bins_5,uvw2__fourier_entropy__bins_10,uvw2__fourier_entropy__bins_100,uvw2__permutation_entropy__dimension_3__tau_1,uvw2__permutation_entropy__dimension_4__tau_1,uvw2__permutation_entropy__dimension_5__tau_1,uvw2__permutation_entropy__dimension_6__tau_1,uvw2__permutation_entropy__dimension_7__tau_1
ZTF18acbwxgn,1.0,0.0,1.0,1.0,2918.030299,1079801.0,126.34758,29.768537,-7.236901,359.886673,...,,,,,,,,,,
ZTF18acdxhus,1.0,0.0,0.0,0.0,978.17404,293359.1,106.953444,76.265344,18.109998,231.206479,...,,,,,,,,,,
ZTF19aayjhpg,1.0,0.0,0.0,0.0,1926.227275,534822.2,16.590071,-14.200668,0.546065,267.916832,...,,,,,,,,,,
ZTF19abjrkqn,1.0,0.0,1.0,1.0,1818.853705,690120.8,48.186749,18.969533,-2.307968,0.0,...,,,,,,,,,,
ZTF19abpidqn,1.0,0.0,1.0,1.0,696.741118,242770.4,23.545545,0.0,-6.089365,0.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,,,,,
ZTF19acaqqng,1.0,0.0,0.0,0.0,618.936643,197289.3,107.219466,-107.219466,,309.468322,...,,,,,,,,,,


In [7]:
features = drop_single_value_columns(features)
features

Unnamed: 0,p48g__has_duplicate_min,p48g__has_duplicate,p48g__sum_values,p48g__abs_energy,p48g__mean_abs_change,p48g__mean_change,p48g__mean_second_derivative_central,p48g__median,p48g__mean,p48g__length,...,p48r__fourier_entropy__bins_2,p48r__fourier_entropy__bins_3,p48r__fourier_entropy__bins_5,p48r__fourier_entropy__bins_10,p48r__fourier_entropy__bins_100,p48r__permutation_entropy__dimension_3__tau_1,p48r__permutation_entropy__dimension_4__tau_1,p48r__permutation_entropy__dimension_5__tau_1,p48r__permutation_entropy__dimension_6__tau_1,p48r__permutation_entropy__dimension_7__tau_1
ZTF18acbwxgn,1.0,1.0,2918.030299,1079801.0,126.34758,29.768537,-7.236901,359.886673,291.80303,10.0,...,0.37677,0.735622,1.073543,1.494175,2.079442,1.660947,2.271869,2.302585,2.197225,2.079442
ZTF18acdxhus,0.0,0.0,978.17404,293359.1,106.953444,76.265344,18.109998,231.206479,195.634808,5.0,...,0.693147,0.693147,0.693147,0.693147,0.693147,-0.0,,,,
ZTF19aayjhpg,0.0,0.0,1926.227275,534822.2,16.590071,-14.200668,0.546065,267.916832,275.175325,7.0,...,0.682908,0.9557,1.549826,1.94591,1.94591,1.695743,2.043192,2.079442,1.94591,1.791759
ZTF19abjrkqn,1.0,1.0,1818.853705,690120.8,48.186749,18.969533,-2.307968,0.0,129.918122,14.0,...,0.37677,0.735622,0.735622,0.735622,1.667462,0.79373,1.098612,1.159589,1.227529,1.303092
ZTF19abpidqn,1.0,1.0,696.741118,242770.4,23.545545,0.0,-6.089365,0.0,22.47552,31.0,...,0.118064,0.118064,0.118064,0.135612,0.36975,0.467841,0.661501,0.785749,0.856339,0.870236
ZTF19acaqqng,0.0,0.0,618.936643,197289.3,107.219466,-107.219466,,309.468322,309.468322,2.0,...,0.562335,0.562335,0.900256,1.494175,1.906155,1.626021,2.094729,2.271869,2.302585,2.197225


In [11]:
# coldefs = [pf.Column(name=colname, format='F', array=features[colname]) for colname in features.columns]
# coldefs

coldefs = list()
coldefs.append(pf.Column(name='ztfid', format='12A', array=np.array(features.index)))
for idx, colname in enumerate(features.columns):
#     if len(colname) > 35:
#         feat_name = colname[:5] + colname[-35:]
#     else:
#         feat_name = colname
    coldefs.append(pf.Column(name=str(idx), format='F', array=features[colname]))
coldefs.append(pf.Column(name='target', format='16A', array=y[features.index]))

In [12]:
tbhdu = pf.BinTableHDU.from_columns(coldefs)
tbhdu.writeto('/home/miranda/ztf-rapid/data/interim/rcf_tsfresh_features.fits', checksum=True, overwrite=True)

In [13]:
pd.DataFrame(features.columns).to_csv('/home/miranda/ztf-rapid/data/interim/rcf_tsfresh_features_names.csv')