In [None]:
import numpy as np
import matplotlib.pyplot as plt
from astropy.timeseries import LombScargle
from scipy.optimize import curve_fit

In [None]:
def clean_data(datarray):
    '''This function takes a 2D array as input and removes any rows containing nan values
    Eg: If datarray=[1325.29 nan
                  1325.43 139501.2434
                  nan nan
                  nan 139232.213
                  1350.12 134435.223]
        The function returns dataclean where dataclean=[1325.43 139501.2434
                                                        1350.12 134435.223]'''
    
    nan_rows = np.isnan(datarray).any(axis=1) #finds nan values using the numpy function isnan and stores in boolean form in nan_rows. axis=1 is the direction along the columns
    dataclean=datarray[~nan_rows] #this stores only the non-nan rows i.e. rows corresponding to true value in the nan_rows array
    return dataclean

In [None]:
def plot_lcurve(data,title):
    '''This is just a function made for convenience since we are plotting lightcurves repeatedly. It takes the data (which is a 2d array) 
    and the title of the plot as the parameters. The Y axis is the flux in electrons/s and X axis is time in days.'''
    
    plt.figure()
    plt.plot(data[:,0],data[:,1])
    plt.xlabel('Time(days)')
    plt.ylabel('Flux (electrons/s)')
    plt.title(title)
    plt.show()
    print('\n')
    return

In [None]:
def data_smoothing(lcdata,wsize):
    '''This function smoothens a lightcurve. The input parameters are the cleaned data obtained using the function clean_data 
    and the size of the sliding window. 
    The function averages each datapoint using the values on either side of it. The number of points is given by the size of this 'sliding window'. 
    Eg: If arr=[1,1.2,2.5,3.2,2.4,3.0] and the sliding window size is 5, 
    arr[2]= (1+1.2+2.5+3.2+2.4)/5= 2.06. 
    For edge cases like the first and last element, only as many elements on the left and right that are present are taken
    (so for arr[0], we would take the average of arr[0],arr[1],arr[2])
    In this function, we do the process for a 2D array considering the columns independently 
    since the lightcurve data has time in the first column and flux in the second column.'''
    
    smoothed_lcdata = np.zeros_like(lcdata, dtype=float) #creates an array of zeroes with same dimensions as the cleaned data array
    for i in range(len(lcdata)):
    
        start_idx = max(0, i - wsize//2) #finds the starting index for averaging, taking into account the edge cases like first element where there are no elements before it
        end_idx = min(len(lcdata), i + wsize//2 + 1) #finds position of last element to be taken for averaging in similar manner
        window_data = lcdata[start_idx:end_idx] #obtains only elements of the cleaned data within the window using slicing
        smoothed_lcdata[i]=np.mean(window_data,axis=0) #calculates average of elements in the window, axis=0 means the array is traversed row wise 
        #This process is repeated for each element using the loop
    return smoothed_lcdata

In [None]:
def eclipse_extract(smoothlc,ti,tf):
    '''This function extracts and returns a subset of the smoothened lightcurve using the initial lightcurve. 
    Specifically, we use this to find a part of the lightcurve where an eclipse corresponding to the transit of the exoplanet is present.
    smoothlc is the array for the lightcurve obtained from clean_data if we are using the unsmoothened lightcurve or data_clean if we are using the
    smoothened lightcurve. ti and tf are the initial and final values of time in days, given by the user. These are approximate values and the function
    finds the largest possible time interval between them and obtains the lightcurve data for the same.
    Eg: eclipse_extract(smoothlc,1330,1340) returns the subset of the lightcurve lying between days 1330 and 1340.'''
    
    flag=0
    start=0
    end=0
    for (row,col),value in np.ndenumerate(smoothlc):
        if(col==0 and value>ti and flag==0):  
            #checks if the value of time is greater than ti, and flag is used to make sure the first element that is greater is taken as the starting point for slicing 
            flag=1
            start=row  
        if(col==0 and value>tf and flag==1):
            #finds the index in data array where the time becomes greater than the tf
            end=row
            break
    ec=smoothlc[start:end] #obtains the required subset using slicing
    return ec

In [None]:
def calc_period(totdata, fupper):
    '''This function uses astropy's Lomb Scargle periodogram function to calculate the likely orbital period of the exoplanet. 
    It takes the data obtained from data_smoothing (smoothened lightcurve) as an input parameter and returns the orbital period in days. 
    fupper is a parameter to specify the upper limit for the frequency to find peaks till. 
    The Lomb Scargle periodogram has many harmonics, and we use our rough estimation of the orbital period to decide which peak to take 
    to calculate the exact period (in our data, the first significant peak).'''
    t=totdata[:,0] #time is the first column of data array
    y=totdata[:,1] #flux is second column
    frequency, power= LombScargle(t,y).autopower(minimum_frequency=0.1, maximum_frequency=3) 
    #our rough estimation of the orbital period is 3.3 days, so we set the minimum and maximum frequencies of the periodogram accordingly
    plt.plot(frequency,power)
    plt.xlabel('Frequency (1/days)')
    plt.ylabel('Power')
    plt.title('Lomb Scargle Periodogram')
    plt.show()
    max=0
    for i in range(len(power)):
        if(power[i]>max and frequency[i]<fupper): 
            #obtains value of frequency (in 1/days) for the first peak, setting upper limit as 0.4 since approximate is 3.3 days or 0.33 (1/days)
            max=power[i]
            idx=i
    reqfreq=frequency[idx]
    period=1/reqfreq #the orbital period in days is the reciprocal of frequency 
    return period

In [None]:
def calc_function(tint,dttot,dtin,tcent,d,fmed):
    '''This function calculates the functional form of the light curve flux when there is an eclipse. It takes the following parameters:
    1. tint- this is the overall time interval being modelled, obtained from the first column of the eclipse_extract function's return value
    2. dttot- total duration of the eclipse
    3. dtin- ingress (and egress time)
    4. tcent- time corresponding to midpoint of eclipse
    5. d- maximum depth of eclipse
    6. fmed- flux level when the star is not eclipsed by the exoplanet
    The function returns the flux for all points of time in the eclipse data array, for the functional form given in the figure in 3a. 
    of the assignment. 
    An example usage of this function:
    model=calc_function(eclipse[:,0],0.2,0.02,1345.23,460,135880))
    model contains the required functional form for this eclipse and can plotted using plt.plot(eclipse[:,0], model).
    '''
    maxd_dur=dttot-2*dtin  #duration when the eclipse is at maximum depth (not in ingress or egress)
    
    #the below 4 statements calculates the time value where ingress and egress start and end, using the fact that ingress and egress have the same duration (dtin), and tcent is the midpoint of the eclipse
    ig_end=tcent-maxd_dur/2 
    ig_start=ig_end-dtin
    eg_start=tcent+maxd_dur/2
    eg_end=eg_start+dtin

    flux_values=np.zeros_like(tint)

    #the below 4 statements create boolean arrays that identify the indices within the overall time interval array that correspond to ingress, egress, eclipse and non-eclipsed
    in_norm= (tint<=ig_start) | (tint>=eg_end)
    in_eclipse = (tint>=ig_end) & (tint<=eg_start)
    in_ingress = (tint>=ig_start) & (tint<=ig_end)
    in_egress = (tint>=eg_start) & (tint<=eg_end)
    
    flux_values[in_norm]=fmed #when not eclipsed, the flux is constant, given by the parameter fmed
    flux_values[in_eclipse]=fmed-d #when fully eclipsed, the flux is less by depth d
    #during ingress and egress, we get value of flux using the slope of the line joining (ig_start,fmed) and (ig_end,fmed-d) and the corresponding line for egress
    flux_values[in_ingress]=fmed-d*(tint[in_ingress]-ig_start)/dtin 
    flux_values[in_egress]=fmed+d*(tint[in_egress]-eg_start-dtin)/dtin
        
    return flux_values
    

In [None]:
def fitting(data,dttot,dtin,tcent,d,fmed):
    '''This function fits the function created in function to an eclipse using scipy.optimize.curve_fit. It takes the following parameters:
    1. data- this is the array containing eclipse data- the first column has time and second has flux values
    2. dttot- total duration of the eclipse
    3. dtin- ingress (and egress time)
    4. tcent- time corresponding to midpoint of eclipse
    5. d- maximum depth of eclipse
    6. fmed- flux level when the star is not eclipsed by the exoplanet
    The function prints the best values of the parameters used to model the eclipse, and plots the model function along with the data.
    Example execution:
    eclipse=eclipse_extract(cleandata,1328.2,1329.4)
    fitting(eclipse,0.2,0.02,1328.72,450,135910)
    This will fit the function for the first eclipse in the data, between 1328.2 and 1329.4 days. 
    '''
    params,param_covariance=curve_fit(calc_function,data[:,0],data[:,1],p0=[dttot,dtin,tcent,d,fmed])
    print("Best fitting parameters:\n1. Total eclipse time=",params[0],"days\n2. Ingress time=",params[1],"days\n3. Time corresponding to midpoint of eclipse=",params[2],"days\n4. Maximum depth of eclipse=",params[3],"electrons/s\n5. Flux when star is not eclipsed=",params[4],"electrons/s")
    plt.figure()
    plt.plot(data[:,0],data[:,1]) #plotting lightcurve
    plt.plot(data[:,0],calc_function(data[:,0],params[0],params[1],params[2],params[3],params[4]),"r") #plotting function fitted to lightcurve
    plt.xlabel('Time(days)',fontsize=14)
    plt.ylabel('Flux(electrons/s)',fontsize=14)
    plt.title('Cleaned light curve + Model')
    plt.show()
    print('\n')

In [None]:
data1=np.genfromtxt('C:\Sem1\Period1\Prog4AA\da_tess_lc1.dat') #reading data into numpy array
print("Results for first dataset\n")

cleandata=clean_data(data1)
plot_lcurve(cleandata,"Cleaned lightcurve")

smoothlc=data_smoothing(cleandata,5)
plot_lcurve(smoothlc,"Smooth lightcurve")
smooth_eclipse=eclipse_extract(smoothlc,1344.6,1345.8)
plot_lcurve(smooth_eclipse,"Eclipse- Smooth lightcurve")

orb_period=calc_period(smoothlc, 0.4)
print('Orbital period of the exoplanet is',orb_period,'days')

print("\nEclipse plot and curve fit for the eclipse 1:\n")
eclipse1=eclipse_extract(cleandata,1328.2,1329.4)
plot_lcurve(eclipse1,"Eclipse 1")
fitting(eclipse1,0.2,0.02,1328.72,450,135910)

print("Eclipse plot and curve fit for the eclipse 2:\n")
eclipse2=eclipse_extract(cleandata,1331.4,1332.6)
plot_lcurve(eclipse2,"Eclipse 2")
fitting(eclipse2,0.2,0.02,1331.96,420,135900)

print("Eclipse plot and curve fit for the eclipse 3:\n")
eclipse3=eclipse_extract(cleandata,1334.7,1335.9)
plot_lcurve(eclipse3,"Eclipse 3")
fitting(eclipse3,0.2,0.03,1335.3,450,135900)

print("Eclipse plot and curve fit for the eclipse 4:\n")
eclipse4=eclipse_extract(cleandata,1341.3,1342.5)
plot_lcurve(eclipse4,"Eclipse 4")
fitting(eclipse4,0.2,0.02,1341.92,200,135900)

print("Eclipse plot and curve fit for the eclipse 5:\n")
eclipse5=eclipse_extract(cleandata,1344.6,1345.8)
plot_lcurve(eclipse5,"Eclipse 5")
fitting(eclipse5,0.2,0.02,1345.23,460,135880)


print("Eclipse plot and curve fit for the eclipse 6:\n")
eclipse6=eclipse_extract(cleandata,1348,1349.2)
plot_lcurve(eclipse6,"Eclipse 6")
fitting(eclipse6,0.2,0.02,1348.52,470,135900)

print("Eclipse plot and curve fit for the eclipse 7:\n")
eclipse7=eclipse_extract(cleandata,1351.3,1352.5)
plot_lcurve(eclipse7,"Eclipse 7")
fitting(eclipse7,0.2,0.02,1351.82,470,135940)

#repeating same pr
data2=np.genfromtxt('C:\Sem1\Period1\Prog4AA\da_tess_lc2.dat')
print("Results for second dataset\n")

cleandata=clean_data(data2)
plot_lcurve(cleandata,"Cleaned lightcurve")

smoothlc=data_smoothing(cleandata,21)
plot_lcurve(smoothlc,"Smooth lightcurve")
smooth_eclipse=eclipse_extract(smoothlc,1334,1335.2)
plot_lcurve(smooth_eclipse,"Eclipse- Smooth lightcurve")

orb_period=calc_period(smoothlc,1.2)
print('Orbital period of the exoplanet is',orb_period,'days')

print("\nEclipse plot and curve fit for an eclipse using cleaned but unsmoothed data:\n")
eclipse_data2=eclipse_extract(cleandata,1334,1335.2)
plot_lcurve(eclipse_data2,"Eclipse")
fitting(eclipse_data2,0.02,0.002,1334.6,70,178210)
print("\nEclipse plot and curve fit for an eclipse using smoothed data:\n")
seclipse_data2=eclipse_extract(smoothlc,1334,1335.2)
plot_lcurve(seclipse_data2,"Eclipse- Smooth lightcurve")
fitting(seclipse_data2,0.02,0.002,1334.6,70,178210)

#The fits obtained are not good for unsmoothed data, so I have also fit for smoothed data which seems to be better. 