In [14]:
import lightkurve as lk
import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import train_test_split

In [9]:
file = pd.read_csv('TIC_List_Test.csv', sep=';')
id_list = file.TIC
class_list = file.Class
data_table = pd.DataFrame(columns=['f0', 'p0', 'f1', 'p1', 'f2', 'p2', 'f3', 'p3', 'f4', 'p4', 'Class'])

for i in range(len(id_list)):
    search = lk.search_lightcurve(target=f'TIC {id_list[i]}', author='SPOC', exptime=120)
    for j in range(len(search)):
        sector = search.table['mission'][j][12:]
        pg = search[j].download().normalize().remove_outliers(sigma=3.5).to_periodogram(maximum_frequency=50)
        
        df_freq = pd.DataFrame(pg.frequency, columns=['Freq'])
        df_power = pd.DataFrame(pg.power, columns=['Power'])
        table = pd.concat([df_freq, df_power], axis=1)
        
        freq_list = []
        power_list = []
        for k in range(5):
            freq_index = table[table['Power'] == sorted(table.Power, reverse=True)[k]].index
            freq_list.append(table.Freq[freq_index[0]])
            power_list.append(sorted(table.Power, reverse=True)[k])
        main_freq = pd.DataFrame(freq_list, columns=['Freq'])
        main_power = pd.DataFrame(power_list, columns=['Power'])
        main_table = pd.concat([main_freq, main_power], axis=1)
        
        array_table = main_table.to_numpy()
        inter_tab = pd.DataFrame({'f0': array_table[0][0], 'p0':array_table[0][1], 'f1':array_table[1][0], 'p1':array_table[1][1], 'f2':array_table[2][0], 'p2':array_table[2][1], 'f3':array_table[3][0], 'p3':array_table[3][1], 'f4':array_table[4][0], 'p4':array_table[4][1], 'Class':class_list[i]}, index=[f'{id_list[i]}_{sector}'])
        data_table = pd.concat([data_table, inter_tab], axis=0)

data_table = data_table.rename_axis('TIC_Sect').reset_index()
data_table

Unnamed: 0,TIC_Sect,f0,p0,f1,p1,f2,p2,f3,p3,f4,p4,Class
0,118269334_17,0.321811,0.00694,0.330508,0.006893,0.313113,0.006052,0.339206,0.005924,0.304416,0.00442,Modul
1,118269334_58,0.310274,0.005123,0.317489,0.004966,0.303058,0.004728,0.324705,0.00434,0.295842,0.00375,Modul
2,50787573_06,0.652221,0.015671,0.643034,0.015464,0.661407,0.01379,0.633848,0.013156,0.670593,0.010231,Modul
3,50787573_32,0.648604,0.015457,0.656325,0.014439,0.640882,0.014363,0.664047,0.011431,0.633161,0.011376,Modul
4,16046443_20,14.849047,0.002514,44.843514,0.002499,49.897054,0.002488,31.970287,0.002476,23.854226,0.002474,Noisy
5,16046443_47,0.213697,0.002292,0.206065,0.002105,0.221329,0.002097,11.867807,0.002045,0.992164,0.002035,Noisy
6,16046443_60,0.312399,0.002581,1.046959,0.00252,0.320842,0.00242,0.042216,0.002389,0.033773,0.002387,Noisy
7,165547989_25,1.005016,0.001382,1.012806,0.001319,0.997225,0.001296,1.020597,0.001106,0.989434,0.001086,Modul
8,165547989_26,0.997337,0.001672,0.989294,0.001648,1.00538,0.001517,0.981251,0.001451,1.013423,0.001203,Modul
9,165547989_52,0.286409,0.001246,0.294592,0.001211,0.065465,0.001133,0.073648,0.001116,0.278226,0.001101,Modul


In [10]:
data_table.shape

(18, 12)

# Divisão entre Previsores e Classe

In [12]:
x_param = data_table.iloc[:,1:11].values
x_param

array([[3.21810723e-01, 6.94034347e-03, 3.30508311e-01, 6.89329752e-03,
        3.13113136e-01, 6.05225580e-03, 3.39205898e-01, 5.92381497e-03,
        3.04415549e-01, 4.42029079e-03],
       [3.10273538e-01, 5.12316017e-03, 3.17489201e-01, 4.96628399e-03,
        3.03057874e-01, 4.72783391e-03, 3.24704865e-01, 4.34006272e-03,
        2.95842210e-01, 3.75047843e-03],
       [6.52220571e-01, 1.56710955e-02, 6.43034366e-01, 1.54636061e-02,
        6.61406777e-01, 1.37897980e-02, 6.33848161e-01, 1.31558309e-02,
        6.70592982e-01, 1.02307707e-02],
       [6.48603963e-01, 1.54574825e-02, 6.56325438e-01, 1.44394407e-02,
        6.40882487e-01, 1.43628732e-02, 6.64046914e-01, 1.14305886e-02,
        6.33161011e-01, 1.13761797e-02],
       [1.48490470e+01, 2.51403113e-03, 4.48435140e+01, 2.49858853e-03,
        4.98970536e+01, 2.48789428e-03, 3.19702870e+01, 2.47609202e-03,
        2.38542265e+01, 2.47411097e-03],
       [2.13696846e-01, 2.29209213e-03, 2.06064816e-01, 2.10469862e-03,
   

In [13]:
y_param = data_table.iloc[:,11].values
y_param

array(['Modul', 'Modul', 'Modul', 'Modul', 'Noisy', 'Noisy', 'Noisy',
       'Modul', 'Modul', 'Modul', 'Modul', 'Modul', 'Noisy', 'Noisy',
       'Noisy', 'Noisy', 'Noisy', 'Noisy'], dtype=object)

In [16]:
x_treino, x_teste, y_treino, y_teste = train_test_split(x_param, y_param, test_size=0.25)
print(x_treino.shape, y_treino.shape)
print(x_teste.shape, y_teste.shape)

(13, 10) (13,)
(5, 10) (5,)


# Salvando dados

In [17]:
with open('data.pkl', 'wb') as file:
    pickle.dump([x_treino, y_treino, x_teste, y_teste], file)