# Importing Module

In [1]:
#kplr used to import koi data easier
import kplr
client = kplr.API()

#used to open .fits files
import pyfits
# import astropy

#Used for Plotting
#Experimenting with this instead of using matplot.lib
from bokeh.plotting import figure, show, output_file,output_notebook
#Allows for viewing of plots on jupyter notebooks
output_notebook()

#For Maximum Likelihood
from scipy import optimize

#Used for scientific computing
import numpy as np

#For making copies of data
import copy

#Needed to correct median with 'nan' data points
import math

#For Periodogram
from gatspy.periodic import LombScargleFast

#Importing and notebook setup
%matplotlib inline

#For Plotting
import matplotlib.pyplot as plt

# Making useful functions

In [2]:
#adjusting light curve data to be a horizontal line
# keeps the first point of the data and makes rest of data horizontal with respect to that point

def h_data (x, y):
    #Using point-slope to make a model linear fit of data
    slope_g = (f[-1]-f[0])/(t[-1]-t[0])
    pt_x = x[20]
    pt_y = y[20]
    print 'slope: ',slope_g
    x = np.linspace(min(t),max(t),len(f))

    #Rough linear fit of data
    new_y = slope_g*(x-pt_x)+pt_y
    
    #Making correction to original data to make horizontal
    first_pt = new_y[0]
    correction = new_y-first_pt
    new_data = y - correction
    
    return new_data

In [3]:
def periodogram(datax, datay, min_, max_, nyquist):
    #finding periodogram
    model = LombScargleFast().fit(datax, datay)
    period, power = model.periodogram_auto(nyquist_factor=nyquist) # Default 50

    #Plotting
    plt.figure
    plt.plot(period,power)
    plt.ylabel('Power')
    plt.xlabel('Period')# days
    plt.xscale('log')
    #used bottom line to zoom in periodogram
#     plt.xlim(min_-1,max_+10)

    # set range and find period
    model.optimizer.period_range=(min_, max_)
    period = model.best_period
    print("period = {0}".format(period))
    return period

In [4]:
#Making loop to get Median Smooth
 
#jump - The number of pixels you are taking the median
### ex. jump = 7, median over 7 points, replaces values inbetween with median
### Before: [1,2,3,4,5,6,7]
### After:  [1,4,4,4,4,4,7]


def median_smooth(flux,jump,peak_min):
    #n - determines section of data we are looking at
    n = 1
    
    #Data that is being changed and smoothed
    smooth_flux = copy.copy(flux)

    #median smoothing loop
    while n <= len(smooth_flux)-jump-1:    
        #delta of data from one point to another
        #used in for loop to leave troughs unchanged
        max_change = 0
        #Checks the max delta of data points for each jump
        for i in range (n-1,n+jump-1):
            delta = abs( flux[i]-flux[i+1])
            if delta > max_change:
                max_change = delta

        #Makes range of points equal to median
        if max_change < peak_min:
            median = np.median(flux[n-1:n+jump])
            
            #This line corrects errors with data labeled at 'nan'
            if math.isnan(median)==False:
                smooth_flux[n:n+jump-2] = median

#             #Leaves points unchaged when median is 'nan'
#             else:
#                 print 'Median: ', median
#                 print 'Data Numbers Reserved: ', n-1, ' to ', n+jump-1
#                 print 'Original Data: ', flux[n-1:n+jump]
#                 print 'Smooth Data: ', smooth_flux[n-1:n+jump]
#                 print 'Max Change: ', max_change
#                 print ''
#                 print ''
# 
#         #Leaves troughs unchaged
#         else:
#             print 'Data Numbers Reserved: ', n-1, ' to ', n+jump-1
#             print 'Original Data: ', flux[n-1:n+jump]
#             print 'Smooth Data: ', smooth_flux[n-1:n+jump]
#             print 'Max Change: ', max_change
#             print ''
#             print ''
        n=n+jump
    print 'First 100 Points'
    print 'Before: '
    print ''
    print flux[0:99]
    print ''
    print 'After: '
    print ''
    print smooth_flux[0:99]
    return smooth_flux


Loop that finds period of troughs in light curve

In [5]:
### Step 1: Make a cut_off and only look at that data
### Step 2: Find min flux of each trough
### Step 3: Find the avg. time diff between each local min
###Still need to makes this plot into a function and then will place it up above

def find_period(flux,cut_off):
    # cut_off is the max the point can be to pass the filter
    # f_change - change in flux from cutoff
#     cut_off = 69200
    min_f = np.array([])
    min_t = np.array([])

    #Step 1: filters data to just the points below the cut_off
    for i in range(0,len(flux)):
        f_change = flux[i] - cut_off

        #checks if it's lower than cut off
        if f_change < 0:
            min_f = np.append(min_f,[flux[i]])
            min_t = np.append(min_t,[t[i]])

    # print min_f
    # print min_t

    #local_min - lowest point of troughs place holder
    #check - checks to see if new point is lower than previous local_min
    local_min = min_f[0]
    troughs_f = np.array([])
    troughs_t = np.array([])

    #Step 2: filters to just show only the lowest point of each trough
    for i in range(0,len(min_f)-1):
        t_change = min_t[i+1] - min_t[i]
        check = min_f[i]
        # checks if it's lower than previous local_min
        if check < local_min:
            local_min = check
            ndata = i

        #documents lowest point and resets check for next trough
        if t_change > 1 or i > len(min_f)-3:
            troughs_f = np.append(troughs_f, [local_min])
            troughs_t = np.append(troughs_t, [min_t[ndata]])
            local_min = cut_off

    # print 'The min of each flux trough: ', troughs_f
    # print 'The corresponding time for each trough: ' ,troughs_t

    all_periods = np.array([])

    #Step 3: finding average change in time from each trough
    for i in range(0,len(troughs_t)-1):
        period = troughs_t[i+1]-troughs_t[i]
        all_periods = np.append(all_periods,period)

    avg_period = np.mean(all_periods)
    # print 'All periods: ', all_periods
#     print 'Avg. period: ', avg_period, ' days.'

    return avg_period

# Importing Data

In [6]:
# Find a KOI.
koi = client.koi(17.01)

#period, period error (postive & negative)
print 'Period w/ errors: ',(koi.koi_period, koi.koi_period_err1, koi.koi_period_err2)

# This KOI has an associated star.
star = koi.star
print "Associated Star Temperature: ",(star.kic_teff)

#Download the lightcurves for this KOI.
lightcurves = koi.get_light_curves()
# for lc in lightcurves:
#     print (lc.filename)

Period w/ errors:  (0.00078, -0.00078, None)
Associated Star Temperature:  None


Getting Like Curve Data

In [7]:
# Loop over the datasets and read in the data.
time, flux, ferr, quality = [], [], [], []
for lc in lightcurves:
    with lc.open() as f:
        # The lightcurve data are in the first FITS HDU.
        hdu_data = f[1].data
        time.append(hdu_data["time"])
        flux.append(hdu_data["sap_flux"])
        ferr.append(hdu_data["sap_flux_err"])
        quality.append(hdu_data["sap_quality"])

Plotting Data

In [8]:
#Time (BJD - 2454833)
#Flux (e-/sec) + ____e+4
#Which quarter do you want to look at?
quarter =1
t = time[quarter]
f = flux[quarter]
#Creating new plot with title and axis labels
#Plot is an object
lc_plot_17 = figure(
    title='KOI-17: Quarter %d '% quarter, 
    x_axis_label='Time (BJD - 2454833)',
    y_axis_label='Flux (e-/sec) e+4',
    tools = 'hover,crosshair,pan,wheel_zoom,box_zoom,reset,tap,save,box_select')

#adding x and y data for plot
legend = 'KOI: Quarter %d' % quarter
lc_plot_17.line(t, f,legend=legend)

#showing results
show(lc_plot_17)

Fixing Data to Make it Horizontal (Optional)

In [9]:
#Horizontalizing Data
h_flux = h_data(t,f)

#Plot is an object
lc_h_17 = figure(
    title='KOI-17: Horizontal, Quarter %d '% quarter, 
    x_axis_label='Time (BJD - 2454833)',
    y_axis_label='Flux (e-/sec) e+4',
    tools = 'hover,crosshair,pan,wheel_zoom,box_zoom,reset,tap,save,box_select')

#adding x and y data for plot
lc_h_17.line(t, h_flux,line_width=1,legend='Horizontal KOI-17',color='green')


#showing results
show(lc_h_17)

slope:  -32.3512869707


In [10]:
smooth_flux = median_smooth(h_flux,5,300)

First 100 Points
Before: 

[ 69638.625       69649.78607677  69654.95496604  69668.80354281
  69656.65993208  69654.62569635  69653.02114812  69662.04159989
  69665.83548916  69659.62156593  69663.8295177   69645.70153197
  69668.72979625  69656.55493552  69657.00507479  69669.87708906
  69668.65535333  69651.7851801   69664.94625687  69681.56827114
  69645.95591041  69681.92167468  69659.05150145  69660.42351572
  69692.66271749  69668.43316926  69678.96924603  69674.0365728
  69669.79139957  69663.63997634  69681.48855311  69665.28244238
  69665.95133165  69664.22959592  69660.6250477   69668.01268697
  69673.56438874  69666.49890301  69669.44122978  69685.44605655
  69677.31807082  69685.90883509  69664.83553686  69674.46536363
  69670.4936279   69684.76407967  69673.93296894  69679.79717071
  69682.38793498  69693.12713675  69688.36633852  69689.16022779
  69692.00880456  69688.31831883  69688.1981456   69684.90609737
  69677.02811165  69677.21262592  69683.13932769  69684.23009196



Median Smoothing of Data

In [11]:
#Plot is an object
lc_smooth_17 = figure(
    title='KOI-17', 
    x_axis_label='Time (BJD - 2454833)',
    y_axis_label='Flux (e-/sec) e+4',
    tools = 'hover,crosshair,pan,wheel_zoom,box_zoom,reset,tap,save,box_select')

#adding x and y data for plot
lc_smooth_17.line(t, smooth_flux,legend='Median Smooth Data',color='orange')
# lc_smooth_17.line(t, h_flux,legend='H Data',color='red')

print smooth_flux[180:200]
show(lc_smooth_17)

[ 69700.30631868  69702.66810707  69702.66810707  69702.66810707
  69698.62250076  69692.02576503  69696.55437748  69696.55437748
  69696.55437748  69699.53725962  69689.44833639  69697.82908368
  69697.82908368  69697.82908368  69684.67858097  69712.11309524
  69712.67651576  69712.67651576  69712.67651576  69786.81990232]


Finding Period of Transit

In [12]:
### Still need to work on Periodogram
# period = periodogram(t, f, 1,30,.5)

In [13]:
period = find_period(smooth_flux,69200)  
print period

3.23316453419


# Ignore Everything Below Here

In [14]:
# #using pyfits to open a quarter of data as .fits file
# pyfits.open('c:/Users/rscsa/.kplr/data/lightcurves/010874614/kplr010874614-2009131105131_llc.fits')
# print pyfits.hdu.image.PrimaryHDU()