In [58]:
import lightkurve as lk
import matplotlib.pyplot as plt
import numpy as np
from io import BytesIO
import os.path
import tensorflow as tf
import base64

def find_tce(kepid, tce_plnt_num, filenames):
    for filename in filenames:
        for record in tf.compat.v1.python_io.tf_record_iterator(filename):
            ex = tf.train.Example.FromString(record)
            if (ex.features.feature["kepid"].int64_list.value[0] == kepid and
                ex.features.feature["tce_plnt_num"].int64_list.value[0] == tce_plnt_num):
                print("Found {}_{} in file {}".format(kepid, tce_plnt_num, filename))
                return ex
    raise ValueError("{}_{} not found in files: {}".format(kepid, tce_plnt_num, filenames))

def getLocalView(kepid, dir):
    # Find Kepler-90 g.
    filenames = tf.io.gfile.glob(os.path.join(dir, "*"))
    assert filenames, "No files found in {}".format(dir)
    ex = find_tce(kepid, 1, filenames)

    # Get the local view.
    local_view = np.array(ex.features.feature["local_view"].float_list.value)
    #fig, axes = plt.subplots(1, 2, figsize=(20, 6))
    #axes[1].plot(local_view, ".")
    #print(local_view.shape)
    return local_view

def downloadLC(target):
    #search and download target light curve
    try: 
        search_result = lk.search_lightcurve(target, author='Kepler', cadence='long')
    except: 
        return "Invalid Target"

    kep_id = search_result.target_name.data[0]
    kep_id = str(kep_id.lstrip("kplr"))
    kep_id = int(kep_id.lstrip("0"))

    local_view = getLocalView(kep_id, "EDM/Kepler/TFRecords")
    local_view_matrix = np.expand_dims(local_view,axis=0)
    return local_view_matrix

def downloadLC_kaggle(target):
    search_result = lk.search_lightcurve(target, author='Kepler', cadence='long', quarter=3).download()
    period = np.linspace(1, 20, 10000)
    search_result = search_result.flatten().remove_outliers()
    bls = search_result.to_periodogram(method='bls', period=period, frequency_factor=500);
    
    planet_b_period = bls.period_at_max_power
    planet_b_t0 = bls.transit_time_at_max_power
    #planet_b_dur = bls.duration_at_max_power
    ax = search_result.fold(period=planet_b_period, epoch_time=planet_b_t0)
    flux_arr = np.array(ax.flux)
    flux_arr = flux_arr[~np.isnan(flux_arr)]
    return flux_arr

def getTPFimg(target):
    try:
        tpf = lk.search_targetpixelfile(target, author="Kepler", cadence="long")
    except:
        return "Invalid Target"
    tpf = tpf.download()
    tpf.plot()
    tmpfile = BytesIO()
    plt.savefig(tmpfile, format='png')
    encoded = base64.b64encode(tmpfile.getvalue())
    return str(encoded)

def getLCimg(target):
    try:
        lc = lk.search_lightcurve(target, author='Kepler', cadence='long')
    except:
        return "Invalid Target"
    lc = lc.download()
    lc.plot()
    tmpfile = BytesIO()
    plt.savefig(tmpfile, format='png')
    encoded = base64.b64encode(tmpfile.getvalue())
    return str(encoded)
    


In [59]:
import numpy as np
from tensorflow import keras
import pickle
from numpy import exp
import lightkurve
from sklearn import preprocessing
import pandas as pd
import os
#import inputlightcurve
from tsfresh import extract_features
from tsfresh.feature_extraction import MinimalFCParameters, EfficientFCParameters
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

def tobool(prob):
    if(prob<.5):                            #**might be .625/.75 instead of .5**
        return "false"
    else:
        return "true"

def contains(modelsList, model):
    for m in modelsList:
        if m == model:
            return True
    return False

def logistic_layer(y):
    y = np.array(y)
    y = 1 / (1 + exp(-y))
    y = y.ravel()
    return y

def normalize(data):
    mean = np.mean(data, axis=0)
    std = np.std(data, axis=0)
    data = (data - mean) / std
    return data

def light_curve_to_matrix_kaggle_lightkurve(lc):
    inputLC = downloadLC(lc)
    flux_copy = np.array(inputLC[0])
    amt_pad = 3197-(len(inputLC[0])%3197)                                              #amount of medians to pad the last row of dataframe
    flux_median = np.full(shape=amt_pad, fill_value=np.median(np.array(inputLC[0])))   #array of medians to pad the last row to reach a factor of 3197
    flux_copy = np.append(flux_copy, flux_median)
    final_matrix = np.reshape(flux_copy, (len(flux_copy)//3197,3197))
    normalized_matrix = normalize(final_matrix)
    return normalized_matrix

def light_curve_to_matrix_kepler_tsfresh(lc):
    inputLC = downloadLC(lc)
    #print(np.sum(np.isnan(np.array(inputLC))))
    matrix = pd.DataFrame({"id" : np.zeros(len(inputLC[0]), dtype=int),
                "time" : list(range(0, len(inputLC[0]))),
                "flux" : inputLC[0]})               #convert from big endian
    #print(matrix)
    extracted_features = extract_features(matrix, column_id= "id", column_sort= "time", 
                                      column_value= "flux", 
                                      default_fc_parameters= EfficientFCParameters())
    #print(np.sum(np.isnan(np.array(extracted_features))))
    extracted_features.dropna(axis=1, inplace=True)  #dropped the nan column
    #print(extracted_features)
    normalized_matrix = preprocessing.normalize(extracted_features,norm='max', axis=0)
    #print(normalized_matrix)
    return normalized_matrix

def light_curve_to_matrix_kaggle_tsfresh(lc):
    inputLC = downloadLC_kaggle(lc)
    #print(np.sum(np.isnan(np.array(inputLC))))
    matrix = pd.DataFrame({"id" : np.zeros(len(inputLC), dtype=int),
                "time" : list(range(0, len(inputLC))),
                "flux" : inputLC.byteswap().newbyteorder()})               #convert from big endian
    #print(matrix)
    extracted_features = extract_features(matrix, column_id= "id", column_sort= "time", 
                                      column_value= "flux", 
                                      default_fc_parameters= EfficientFCParameters())
    #print(np.sum(np.isnan(np.array(extracted_features))))
    extracted_features.dropna(axis=1, inplace=True)  #dropped the nan column
    #print(extracted_features)
    normalized_matrix = preprocessing.normalize(extracted_features,norm='max', axis=0)
    #print(normalized_matrix)
    return normalized_matrix

#still gotta work on func for kepler data and lightkurve

def predict(input, training_data, processing, models):
    results = {}
    

    #kaggle and tsfresh
    if training_data == "kaggle":
        if processing == "tsfresh":
            inputLC_kaggle_tsfresh = light_curve_to_matrix_kaggle_tsfresh(input)
            inputLC_kaggle_tsfresh_keras = np.expand_dims(inputLC_kaggle_tsfresh, -1) 
            #RNN
            if contains(models, "rnn"):
                reconstructed_model = keras.models.load_model("EDM/Kaggle/TSFresh/keras-models/KerasRNN")
                probability = reconstructed_model.predict(inputLC_kaggle_tsfresh_keras)
                results["RNN"] = {"Probability" : probability, "Classification" : tobool(probability)}
            #LSTM
            if contains(models, "lstm"):
                reconstructed_model = keras.models.load_model("EDM/Kaggle/TSFresh/keras-models/KerasLSTM")
                probability = reconstructed_model.predict(inputLC_kaggle_tsfresh_keras)
                results["LSTM"] = {"Probability" : probability, "Classification" : tobool(probability)}
            
            #GRU
            if contains(models, "gru"):
                reconstructed_model = keras.models.load_model("EDM/Kaggle/TSFresh/keras-models/KerasGRU")
                probability = reconstructed_model.predict(inputLC_kaggle_tsfresh_keras)
                results["GRU"] = {"Probability" : probability, "Classification" : tobool(probability)}

            #Logistic Regression
            if contains(models, "lr"):
                with open('EDM/Kaggle/TSFresh/tf-models/multi-lr.npy', 'rb') as f:
                    W = np.load(f)
                    b = np.load(f)
                predicted_y = W * inputLC_kaggle_tsfresh + b
                predicted_y = logistic_layer(predicted_y)
                probability = np.max(predicted_y)
                results["LR"] = {"Probability" : probability, "Classification" : tobool(probability)}

            
            #Decision Tree & Random Forest
            if contains(models, "rf"):
                with open('EDM/Kaggle/TSFresh/tf-models/decision-tree.pkl', 'rb') as f:
                    dt = pickle.load(f)
                probability = dt.predict(inputLC_kaggle_tsfresh)
                results["DT"] = {"Probability" : probability, "Classification" : tobool(probability)}

                with open('EDM/Kaggle/TSFresh/tf-models/random-forest.pkl', 'rb') as f:
                    rf = pickle.load(f)
                probability = rf.predict(inputLC_kaggle_tsfresh)
                results["RF"] = {"Probability" : probability, "Classification" : tobool(probability)}
    
    #kaggle and lightkurve
    if training_data == "kaggle":
        if processing == "lightkurve":
            inputLC_kaggle_lightkurve = light_curve_to_matrix_kaggle_lightkurve(input)
            inputLC_kaggle_lightkurve_keras = np.expand_dims(inputLC_kaggle_lightkurve, -1) 
            #RNN
            if contains(models, "rnn"):
                reconstructed_model = keras.models.load_model("EDM/Kaggle/Regular/keras-models/KerasRNN")
                probability = reconstructed_model.predict(inputLC_kaggle_lightkurve_keras)
                results["RNN"] = {"Probability" : probability, "Classification" : tobool(probability)}
            #LSTM
            if contains(models, "lstm"):
                reconstructed_model = keras.models.load_model("EDM/Kaggle/Regular/keras-models/KerasLSTM")
                probability = reconstructed_model.predict(inputLC_kaggle_lightkurve_keras)
                results["LSTM"] = {"Probability" : probability, "Classification" : tobool(probability)}
            
            #GRU
            if contains(models, "gru"):
                reconstructed_model = keras.models.load_model("EDM/Kaggle/Regular/keras-models/KerasGRU")
                probability = reconstructed_model.predict(inputLC_kaggle_lightkurve_keras)
                results["GRU"] = {"Probability" : probability, "Classification" : tobool(probability)}

            #Logistic Regression
            if contains(models, "lr"):
                with open('EDM/Kaggle/Regular/tf-models/multi-lr.npy', 'rb') as f:
                    W = np.load(f)
                    b = np.load(f)
                predicted_y = W * inputLC_kaggle_lightkurve + b
                predicted_y = logistic_layer(predicted_y)
                probability = np.max(predicted_y)
                results["LR"] = {"Probability" : probability, "Classification" : tobool(probability)}

            
            #Decision Tree & Random Forest
            if contains(models, "rf"):
                with open('EDM/Kaggle/Regular/tf-models/decision-tree.pkl', 'rb') as f:
                    dt = pickle.load(f)
                probability = dt.predict(inputLC_kaggle_lightkurve)
                results["DT"] = {"Probability" : probability, "Classification" : tobool(probability)}

                with open('EDM/Kaggle/Regular/tf-models/random-forest.pkl', 'rb') as f:
                    rf = pickle.load(f)
                probability = rf.predict(inputLC_kaggle_lightkurve)
                results["RF"] = {"Probability" : probability, "Classification" : tobool(probability)}

    #kepler and tsfresh
    if training_data == "kepler":
        if processing == "tsfresh":
            inputLC_kepler_tsfresh = light_curve_to_matrix_kepler_tsfresh(input)
            inputLC_kepler_tsfresh_keras = np.expand_dims(inputLC_kepler_tsfresh, -1) 
            #RNN
            if contains(models, "rnn"):
                reconstructed_model = keras.models.load_model("EDM/Kepler/TSFresh/keras-models/KerasRNN")
                probability = reconstructed_model.predict(inputLC_kepler_tsfresh_keras)
                results["RNN"] = {"Probability" : probability, "Classification" : tobool(probability)}
            #LSTM
            if contains(models, "lstm"):
                reconstructed_model = keras.models.load_model("EDM/Kepler/TSFresh/keras-models/KerasLSTM")
                probability = reconstructed_model.predict(inputLC_kepler_tsfresh_keras)
                results["LSTM"] = {"Probability" : probability, "Classification" : tobool(probability)}
            
            #GRU
            if contains(models, "gru"):
                reconstructed_model = keras.models.load_model("EDM/Kepler/TSFresh/keras-models/KerasGRU")
                probability = reconstructed_model.predict(inputLC_kepler_tsfresh_keras)
                results["GRU"] = {"Probability" : probability, "Classification" : tobool(probability)}

            #Logistic Regression
            if contains(models, "lr"):
                with open('EDM/Kepler/TSFresh/tf-models/multi-lr.npy', 'rb') as f:
                    W = np.load(f)
                    b = np.load(f)
                predicted_y = W * inputLC_kepler_tsfresh + b
                predicted_y = logistic_layer(predicted_y)
                probability = np.max(predicted_y)
                results["LR"] = {"Probability" : probability, "Classification" : tobool(probability)}

            
            #Decision Tree & Random Forest
            if contains(models, "rf"):
                with open('EDM/Kepler/TSFresh/tf-models/decision-tree.pkl', 'rb') as f:
                    dt = pickle.load(f)
                probability = dt.predict(inputLC_kepler_tsfresh)
                results["DT"] = {"Probability" : probability, "Classification" : tobool(probability)}

                with open('EDM/Kepler/TSFresh/tf-models/random-forest.pkl', 'rb') as f:
                    rf = pickle.load(f)
                probability = rf.predict(inputLC_kepler_tsfresh)
                results["RF"] = {"Probability" : probability, "Classification" : tobool(probability)}

    #kepler and lightkurve
    if training_data == "kepler":
        if processing == "lightkurve":
            inputLC_kepler_lightkurve = downloadLC(input)
            inputLC_kepler_lightkurve_keras = np.expand_dims(inputLC_kepler_lightkurve, -1)
            print(inputLC_kepler_lightkurve_keras.shape)
            #RNN
            if contains(models, "rnn"):
                reconstructed_model = keras.models.load_model("EDM/Kepler/Regular/keras-models/KerasRNN")
                probability = reconstructed_model.predict(inputLC_kepler_lightkurve_keras)
                results["RNN"] = {"Probability" : probability, "Classification" : tobool(probability)}
            #LSTM
            if contains(models, "lstm"):
                reconstructed_model = keras.models.load_model("EDM/Kepler/Regular/keras-models/KerasLSTM")
                probability = reconstructed_model.predict(inputLC_kepler_lightkurve_keras)
                results["LSTM"] = {"Probability" : probability, "Classification" : tobool(probability)}
            
            #GRU
            if contains(models, "gru"):
                reconstructed_model = keras.models.load_model("EDM/Kepler/Regular/keras-models/KerasGRU")
                probability = reconstructed_model.predict(inputLC_kepler_lightkurve_keras)
                results["GRU"] = {"Probability" : probability, "Classification" : tobool(probability)}

            #Logistic Regression
            if contains(models, "lr"):
                with open('EDM/Kepler/Regular/tf-models/multi-lr.npy', 'rb') as f:
                    W = np.load(f)
                    b = np.load(f)
                predicted_y = W * inputLC_kepler_lightkurve + b
                predicted_y = logistic_layer(predicted_y)
                probability = np.max(predicted_y)
                results["LR"] = {"Probability" : probability, "Classification" : tobool(probability)}

            
            #Decision Tree & Random Forest
            if contains(models, "rf"):
                with open('EDM/Kepler/Regular/tf-models/decision-tree.pkl', 'rb') as f:
                    dt = pickle.load(f)
                probability = dt.predict(inputLC_kepler_lightkurve)
                results["DT"] = {"Probability" : probability, "Classification" : tobool(probability)}

                with open('EDM/Kepler/Regular/tf-models/random-forest.pkl', 'rb') as f:
                    rf = pickle.load(f)
                probability = rf.predict(inputLC_kepler_lightkurve)
                results["RF"] = {"Probability" : probability, "Classification" : tobool(probability)}
    return results
            


In [56]:
list_of_models = ["rnn","lstm","gru"]
dict_data = {
    #"targetpixelfile" : getTPFimg("KIC 3733346"),
    #"lightcurve" : getLCimg("KIC 3733346"),
    "results" : predict("KIC 3733346", "kaggle", "tsfresh", list_of_models)
    }
print(dict_data)


Feature Extraction: 100%|██████████| 1/1 [00:01<00:00,  1.52s/it]


{'results': {'RNN': {'Probability': array([[0.50915194]], dtype=float32), 'Classification': 'true'}, 'LSTM': {'Probability': array([[0.29465556]], dtype=float32), 'Classification': 'false'}, 'GRU': {'Probability': array([[0.48944402]], dtype=float32), 'Classification': 'false'}}}


In [60]:
test = downloadLC_kaggle("KIC 3733346")
test_arr = test
print(test_arr.shape)
print(np.sum(np.isnan(np.array(test_arr))))

(4134,)
0
