In [None]:
# This version uses only power values and FAP as parameters to predictors, the earlier version used power, FAP and frequencies

In [1]:
import lightkurve as lk
import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from astropy.timeseries import LombScargle
from sklearn.preprocessing import StandardScaler

In [3]:
file = pd.read_csv('TIC_List_Test.csv', sep=';')
data_table = pd.DataFrame(columns=['p0', 'p1', 'p2', 'p3', 'p4', 'p5', 'p6', 'p7', 'p8', 'p9', 'fap', 'Class'])

for i in range(len(file)):
    search = lk.search_lightcurve(f'TIC {file.iloc[i, 0]}', author='SPOC', exptime=120)
    for j in range(len(search)):
        sector = search.mission[j][12:]
        lc = search[j].download(download_dir='E:/lightkurve_fits/').normalize().remove_outliers(sigma=3.5)
        pg = lc.to_periodogram(maximum_frequency=50)
        
        # This block lists the power values and then order from the maximum value
        df_pg_tab = pg.to_table().to_pandas()
        table = df_pg_tab.filter(['power'])
        df_ord_power = pd.DataFrame(data=sorted(table.power, reverse=True), columns=['ord_power'])
        
        # Calculating the False Alarm Probability (FAP)
        time = lc.time
        flux = lc.flux
        fap = LombScargle(time, flux).false_alarm_probability(pg.max_power)*100
        
        # Inserting the data (power values, FAP and Classes) in the data_table
        temp_tab = pd.DataFrame(data={'p0': df_ord_power.ord_power[0], 'p1':df_ord_power.ord_power[1], 'p2':df_ord_power.ord_power[2], 'p3':df_ord_power.ord_power[3], 'p4':df_ord_power.ord_power[4], 'p5':df_ord_power.ord_power[5], 'p6':df_ord_power.ord_power[6], 'p7':df_ord_power.ord_power[7], 'p8':df_ord_power.ord_power[8], 'p9':df_ord_power.ord_power[9], 'fap':fap.value, 'Class':file.iloc[i, 1]}, index=[f'{file.iloc[i, 0]}_{sector}'])
        data_table = pd.concat([data_table, temp_tab], axis=0)
        
data_table = data_table.rename_axis('TIC_Sect').reset_index()
data_table

Unnamed: 0,TIC_Sect,p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,fap,Class
0,118269334_17,0.00694,0.006893,0.006052,0.005924,0.00442,0.004254,0.003893,0.003862,0.003854,0.003734,2.314996e-12,Modul
1,118269334_58,0.005123,0.004966,0.004728,0.00434,0.00375,0.003334,0.002378,0.002246,0.002179,0.002168,4.062431e-14,Modul
2,50787573_06,0.015671,0.015464,0.01379,0.013156,0.010231,0.009232,0.005765,0.004725,0.004668,0.004634,1.857649e-43,Modul
3,50787573_32,0.015457,0.014439,0.014363,0.011431,0.011376,0.007103,0.007063,0.005167,0.004586,0.004565,4.709684e-51,Modul
4,16046443_20,0.002514,0.002499,0.002488,0.002476,0.002474,0.002455,0.002437,0.002436,0.002436,0.002429,0.0185604,Noisy
5,16046443_47,0.002292,0.002105,0.002097,0.002045,0.002035,0.002017,0.002001,0.001988,0.001986,0.001977,0.1415987,Noisy
6,16046443_60,0.002581,0.00252,0.00242,0.002389,0.002387,0.002288,0.002271,0.002264,0.002261,0.002202,0.8331588,Noisy
7,165547989_25,0.001382,0.001319,0.001296,0.001106,0.001086,0.000803,0.000779,0.000519,0.000407,0.000337,64.88566,Modul
8,165547989_26,0.001672,0.001648,0.001517,0.001451,0.001203,0.001125,0.000781,0.000731,0.000554,0.000552,11.06366,Modul
9,165547989_52,0.001246,0.001211,0.001133,0.001116,0.001101,0.001043,0.000994,0.000924,0.000909,0.0009,100.0,Modul


In [4]:
data_table.shape

(18, 13)

# Dividing between predictors and class

In [5]:
x_param = data_table.iloc[:,1:12].values
x_param

array([[6.94034347e-03, 6.89329752e-03, 6.05225580e-03, 5.92381497e-03,
        4.42029079e-03, 4.25380594e-03, 3.89306702e-03, 3.86229861e-03,
        3.85400785e-03, 3.73445888e-03, 2.31499595e-12],
       [5.12316017e-03, 4.96628399e-03, 4.72783391e-03, 4.34006272e-03,
        3.75047843e-03, 3.33434633e-03, 2.37787001e-03, 2.24598373e-03,
        2.17881752e-03, 2.16766605e-03, 4.06243145e-14],
       [1.56710955e-02, 1.54636061e-02, 1.37897980e-02, 1.31558309e-02,
        1.02307707e-02, 9.23152114e-03, 5.76541548e-03, 4.72519616e-03,
        4.66836218e-03, 4.63364880e-03, 1.85764887e-43],
       [1.54574825e-02, 1.44394407e-02, 1.43628732e-02, 1.14305886e-02,
        1.13761797e-02, 7.10330909e-03, 7.06277916e-03, 5.16748409e-03,
        4.58606829e-03, 4.56468751e-03, 4.70968386e-51],
       [2.51403113e-03, 2.49858853e-03, 2.48789428e-03, 2.47609202e-03,
        2.47411097e-03, 2.45458030e-03, 2.43710344e-03, 2.43606168e-03,
        2.43555865e-03, 2.42868648e-03, 1.85604038e-

In [6]:
y_param = data_table.iloc[:,12].values
y_param

array(['Modul', 'Modul', 'Modul', 'Modul', 'Noisy', 'Noisy', 'Noisy',
       'Modul', 'Modul', 'Modul', 'Modul', 'Modul', 'Noisy', 'Noisy',
       'Noisy', 'Noisy', 'Noisy', 'Noisy'], dtype=object)

### -- Attribute scaling

In [7]:
scaler = StandardScaler()
x_param = scaler.fit_transform(x_param)
x_param

array([[ 0.62876835,  0.66938696,  0.55652512,  0.6790977 ,  0.41661889,
         0.68015862,  0.84594849,  1.16338243,  1.29521299,  1.2646958 ,
        -0.6660685 ],
       [ 0.22176571,  0.2220671 ,  0.22863971,  0.23225127,  0.19622272,
         0.27445108, -0.00842756,  0.04091435,  0.06390454,  0.08786752,
        -0.6660685 ],
       [ 2.58423382,  2.65882251,  2.4720984 ,  2.71956869,  2.32850881,
         2.87655377,  1.90171197,  1.76263136,  1.89378459,  1.9400832 ,
        -0.6660685 ],
       [ 2.53638995,  2.42108182,  2.61397387,  2.23280167,  2.70539607,
         1.93748947,  2.63325806,  2.0697832 ,  1.83329644,  1.88828593,
        -0.6660685 ],
       [-0.36261256, -0.35076187, -0.32589923, -0.29365716, -0.22375533,
        -0.11374185,  0.02497247,  0.17291613,  0.25261594,  0.28392166,
        -0.66562062],
       [-0.41232122, -0.44219599, -0.4226831 , -0.41523135, -0.36811607,
        -0.30680276, -0.22104984, -0.13832697, -0.07751733, -0.05525573,
        -0.662

### -- Dividing between training and test sample

In [8]:
x_train, x_test, y_train, y_test = train_test_split(x_param, y_param, test_size=0.15)
print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)

(15, 11) (15,)
(3, 11) (3,)


# Saving data

In [9]:
with open('data.pkl', 'wb') as file:
    pickle.dump([x_train, y_train, x_test, y_test], file)