In [8]:
import lightkurve as lk
import pandas as pd
import numpy as np
import pickle
import warnings
from sklearn.model_selection import train_test_split
from astropy.timeseries import LombScargle
from sklearn.preprocessing import StandardScaler

In [4]:
file = pd.read_csv('TIC_List_Test.csv', sep=';')
data_table = pd.DataFrame(columns=['f0', 'p0', 'f1', 'p1', 'f2', 'p2', 'f3', 'p3', 'f4', 'p4', 'fap', 'Class'])

for i in range(len(file)):
    search = lk.search_lightcurve(f'TIC {file.iloc[i, 0]}', author='SPOC', exptime=120)
    for j in range(len(search)):
        sector = search.mission[j][12:]
        lc = search[j].download(download_dir='E:/lightkurve_fits/').normalize().remove_outliers(sigma=3.5)
        pg = lc.to_periodogram(maximum_frequency=50)
        
        df_pg_tab = pg.to_table().to_pandas()
        table = df_pg_tab.filter(['frequency', 'power'])
        main_tab = pd.DataFrame(columns=['frequency', 'power'])
        for k in range(5):
            f = float(table.loc[table['power'] == sorted(table.power, reverse=True)[k], 'frequency'])
            p = sorted(table.power, reverse=True)[k]
            new_row = pd.DataFrame(data={'frequency': f, 'power': p}, columns=['frequency', 'power'], index=[k])
            main_tab = pd.concat([main_tab, new_row], axis=0)
            warnings.filterwarnings("ignore")
            
        array_table = main_tab.to_numpy()
        time = lc.time
        flux = lc.flux
        fap = LombScargle(time, flux).false_alarm_probability(pg.max_power)*100
        temp_tab = pd.DataFrame({'f0': array_table[0][0], 'p0':array_table[0][1], 'f1':array_table[1][0], 'p1':array_table[1][1], 'f2':array_table[2][0], 'p2':array_table[2][1], 'f3':array_table[3][0], 'p3':array_table[3][1], 'f4':array_table[4][0], 'p4':array_table[4][1], 'fap':fap.value, 'Class':file.iloc[i, 1]}, index=[f'{file.iloc[i, 0]}_{sector}'])
        data_table = pd.concat([data_table, temp_tab], axis=0)
        
data_table = data_table.rename_axis('TIC_Sect').reset_index()
data_table

  f = float(table.loc[table['power'] == sorted(table.power, reverse=True)[k], 'frequency'])


Unnamed: 0,TIC_Sect,f0,p0,f1,p1,f2,p2,f3,p3,f4,p4,fap,Class
0,118269334_17,0.321811,0.00694,0.330508,0.006893,0.313113,0.006052,0.339206,0.005924,0.304416,0.00442,2.314996e-12,Modul
1,118269334_58,0.310274,0.005123,0.317489,0.004966,0.303058,0.004728,0.324705,0.00434,0.295842,0.00375,4.062431e-14,Modul
2,50787573_06,0.652221,0.015671,0.643034,0.015464,0.661407,0.01379,0.633848,0.013156,0.670593,0.010231,1.857649e-43,Modul
3,50787573_32,0.648604,0.015457,0.656325,0.014439,0.640882,0.014363,0.664047,0.011431,0.633161,0.011376,4.709684e-51,Modul
4,16046443_20,14.849047,0.002514,44.843514,0.002499,49.897054,0.002488,31.970287,0.002476,23.854226,0.002474,0.0185604,Noisy
5,16046443_47,0.213697,0.002292,0.206065,0.002105,0.221329,0.002097,11.867807,0.002045,0.992164,0.002035,0.1415987,Noisy
6,16046443_60,0.312399,0.002581,1.046959,0.00252,0.320842,0.00242,0.042216,0.002389,0.033773,0.002387,0.8331588,Noisy
7,165547989_25,1.005016,0.001382,1.012806,0.001319,0.997225,0.001296,1.020597,0.001106,0.989434,0.001086,64.88566,Modul
8,165547989_26,0.997337,0.001672,0.989294,0.001648,1.00538,0.001517,0.981251,0.001451,1.013423,0.001203,11.06366,Modul
9,165547989_52,0.286409,0.001246,0.294592,0.001211,0.065465,0.001133,0.073648,0.001116,0.278226,0.001101,100.0,Modul


In [5]:
data_table.shape

(18, 13)

# Dividing between predictors and class

In [6]:
x_param = data_table.iloc[:,1:12].values
x_param

array([[3.21810723e-01, 6.94034347e-03, 3.30508311e-01, 6.89329752e-03,
        3.13113136e-01, 6.05225580e-03, 3.39205898e-01, 5.92381497e-03,
        3.04415549e-01, 4.42029079e-03, 2.31499595e-12],
       [3.10273538e-01, 5.12316017e-03, 3.17489201e-01, 4.96628399e-03,
        3.03057874e-01, 4.72783391e-03, 3.24704865e-01, 4.34006272e-03,
        2.95842210e-01, 3.75047843e-03, 4.06243145e-14],
       [6.52220571e-01, 1.56710955e-02, 6.43034366e-01, 1.54636061e-02,
        6.61406777e-01, 1.37897980e-02, 6.33848161e-01, 1.31558309e-02,
        6.70592982e-01, 1.02307707e-02, 1.85764887e-43],
       [6.48603963e-01, 1.54574825e-02, 6.56325438e-01, 1.44394407e-02,
        6.40882487e-01, 1.43628732e-02, 6.64046914e-01, 1.14305886e-02,
        6.33161011e-01, 1.13761797e-02, 4.70968386e-51],
       [1.48490470e+01, 2.51403113e-03, 4.48435140e+01, 2.49858853e-03,
        4.98970536e+01, 2.48789428e-03, 3.19702870e+01, 2.47609202e-03,
        2.38542265e+01, 2.47411097e-03, 1.85604038e-

In [7]:
y_param = data_table.iloc[:,12].values
y_param

array(['Modul', 'Modul', 'Modul', 'Modul', 'Noisy', 'Noisy', 'Noisy',
       'Modul', 'Modul', 'Modul', 'Modul', 'Modul', 'Noisy', 'Noisy',
       'Noisy', 'Noisy', 'Noisy', 'Noisy'], dtype=object)

### Attribute scaling

In [9]:
scaler = StandardScaler()
x_param = scaler.fit_transform(x_param)
x_param

array([[-0.55051802,  0.62876835, -0.45496294,  0.66938696, -0.35105462,
         0.55652512, -0.38389474,  0.6790977 , -0.4636062 ,  0.41661889,
        -0.6660685 ],
       [-0.55245518,  0.22176571, -0.45615243,  0.2220671 , -0.35191933,
         0.22863971, -0.38582586,  0.23225127, -0.46457841,  0.19622272,
        -0.6660685 ],
       [-0.49504055,  2.58423382, -0.42640912,  2.65882251, -0.32110312,
         2.4720984 , -0.34465693,  2.71956869, -0.42208168,  2.32850881,
        -0.6660685 ],
       [-0.4956478 ,  2.53638995, -0.42519478,  2.42108182, -0.32286811,
         2.61397387, -0.34063533,  2.23280167, -0.42632646,  2.70539607,
        -0.6660685 ],
       [ 1.88867804, -0.36261256,  3.61195149, -0.35076187,  3.91291509,
        -0.32589923,  3.82844918, -0.29365716,  2.20694222, -0.22375533,
        -0.66562062],
       [-0.56867089, -0.41232122, -0.46633268, -0.44219599, -0.35894761,
        -0.4226831 ,  1.15138104, -0.41523135, -0.38561554, -0.36811607,
        -0.662

### Dividing between training and test sample

In [11]:
x_train, x_test, y_train, y_test = train_test_split(x_param, y_param, test_size=0.25)
print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)

(13, 11) (13,)
(5, 11) (5,)


# Saving data

In [12]:
with open('data.pkl', 'wb') as file:
    pickle.dump([x_train, y_train, x_test, y_test], file)