In [17]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib
import os
from glob import glob
from matplotlib.colors import LogNorm

from scipy.optimize import curve_fit

from astropy.table import Table
import astropy.io.fits as fits
from astropy.stats import LombScargle, BoxLeastSquares
import exoplanet as xo
from stuff import FINDflare, EasyE

matplotlib.rcParams.update({'font.size':18})
matplotlib.rcParams.update({'font.family':'serif'})

ftype = '.pdf'

In [4]:
# tess_dir = '/data/epyc/data/tess/'
tess_dir = '/Users/james/Desktop/tess/'

sectors = ['sector001', 'sector002', 'sector003', 'sector004', 'sector005', 'sector006']

# just in case glob wants to re-order things, be sure grab them in Sector order
sect1 = glob(tess_dir + sectors[0] + '/*.fits', recursive=True)
sect2 = glob(tess_dir + sectors[1] + '/*.fits', recursive=True)
sect3 = glob(tess_dir + sectors[2] + '/*.fits', recursive=True)
sect4 = glob(tess_dir + sectors[3] + '/*.fits', recursive=True)
sect5 = glob(tess_dir + sectors[4] + '/*.fits', recursive=True)
sect6 = glob(tess_dir + sectors[5] + '/*.fits', recursive=True)

files = sect1 + sect2 + sect3 + sect4 + sect5 + sect6
# make into an array for looping later!
s_lens = [len(sect1), len(sect2), len(sect3), len(sect4), len(sect5), len(sect6)]
print(s_lens, len(files))

[15889, 15990, 15991, 19996, 19990, 19981] 107837


In [18]:
def MultiSector(TICs, tess_dir = '/Users/james/Desktop/tess/', run_dir = '/Users/james/Desktop/helloTESS/'):
    '''
    Run the basic set of tools on every light curve -> NOW FOR MULTI-SECTOR DATA

    Produce a diagnostic plot for each light curve

    '''

    if not os.path.isdir(run_dir + 'figures/longerP'):
        os.makedirs(run_dir + 'figures/longerP')

    for k in range(len(TICs)):
        tbl = Table.read(TICs[k], format='fits')
        tbl['PDCSAP_FLUX'] = tbl['PDCSAP_FLUX'] - np.nanmedian(tbl['PDCSAP_FLUX'])
        
        if k==0:
            df_tbl = tbl.to_pandas()
            
        if k>0: 
            df_tmp = tbl.to_pandas()
            df_tbl = pd.concat([df_tbl, df_tmp], ignore_index=True, sort=False)
    
    df_tbl['PDCSAP_FLUX'] = df_tbl['PDCSAP_FLUX'] + np.nanmedian(df_tbl['SAP_FLUX'])
    
    # make harsh quality cuts, and chop out a known bad window of time (might add more later)
    AOK = (df_tbl['QUALITY'] == 0) & ((df_tbl['TIME'] < 1347) | (df_tbl['TIME'] > 1350))

    # do a running median for a basic smooth
    smo = df_tbl['PDCSAP_FLUX'][AOK].rolling(128, center=True).median().values
    med = np.nanmedian(smo)

    # make an output plot for every file
    plt.close() # just in case anything is open...
    plt.figure(figsize=(14,6))
    plt.errorbar(df_tbl['TIME'][AOK], df_tbl['PDCSAP_FLUX'][AOK]/med, yerr=df_tbl['PDCSAP_FLUX_ERR'][AOK]/med,
                 linestyle=None, alpha=0.25, label='PDC_FLUX')
    plt.plot(df_tbl['TIME'][AOK], smo/med, label='128pt MED', c='orange')

#     Smed = np.nanmedian(df_tbl['SAP_FLUX'][AOK])
#     plt.errorbar(df_tbl['TIME'][AOK], df_tbl['SAP_FLUX'][AOK]/Smed, yerr=df_tbl['SAP_FLUX_ERR'][AOK]/Smed,
#                  linestyle=None, alpha=0.25, label='SAP_FLUX')


    # require at least 1000 good datapoints for analysis
    if sum(AOK) > 1000:
        # find OK points in the smoothed LC
        SOK = np.isfinite(smo)


        # Lomb Scargle
        LS = LombScargle(df_tbl['TIME'][AOK][SOK], smo[SOK]/med, dy=df_tbl['PDCSAP_FLUX_ERR'][AOK][SOK]/med)
        frequency, power = LS.autopower(minimum_frequency=1./40.,
                                        maximum_frequency=1./0.1,
                                        samples_per_peak=7)
        best_frequency = frequency[np.argmax(power)]

        per_out = 1./best_frequency
        per_amp = np.nanmax(power)
        per_med = np.nanmedian(power)
        per_std = np.nanstd(smo[SOK]/med)

        if np.nanmax(power) > 0.2:
            LSmodel = LS.model(df_tbl['TIME'][AOK][SOK], best_frequency)
            plt.plot(df_tbl['TIME'][AOK][SOK], LSmodel,
                     label='L-S P='+format(1./best_frequency, '6.3f')+'d, pk='+format(np.nanmax(power), '6.3f'), 
                     c='green')


        # ACF w/ Exoplanet package
        acf = xo.autocorr_estimator(df_tbl['TIME'][AOK][SOK].values, smo[SOK]/med,
                                    yerr=df_tbl['PDCSAP_FLUX_ERR'][AOK][SOK].values/med,
                                    min_period=0.1, max_period=40, max_peaks=2)
        if len(acf['peaks']) > 0:
            ACF_1dt = acf['peaks'][0]['period']
            ACF_1pk = acf['autocorr'][1][np.where((acf['autocorr'][0] == acf['peaks'][0]['period']))[0]][0]

#         if ACF_1dt > 0:
            plt.plot(df_tbl['TIME'][AOK][SOK],
                     np.nanstd(smo[SOK]/med) * ACF_1pk * np.sin(df_tbl['TIME'][AOK][SOK] / ACF_1dt * 2 * np.pi) + 1,
                     label = 'ACF=' + format(ACF_1dt, '6.3f') + 'd, pk=' + format(ACF_1pk, '6.3f'), lw=2, 
                     alpha=0.7, c='FireBrick')


        # here is where a simple Eclipse (EB) finder goes
        EE = EasyE(smo[SOK]/med, df_tbl['PDCSAP_FLUX_ERR'][AOK][SOK]/med, N1=5, N2=3, N3=2)
        if np.size(EE) > 0:
            for j in range(len(EE[0])):
                plt.scatter(df_tbl['TIME'][AOK][SOK][(EE[0][j]):(EE[1][j]+1)],
                            smo[SOK] [(EE[0][j]):(EE[1][j]+1)] / med,
                            color='k', marker='s', s=5, alpha=0.75, label='_nolegend_')
            plt.scatter([],[], color='k', marker='s', s=5, alpha=0.75, label='Ecl?')
            EclFlg = 1


        # add BLS
#         bls = BoxLeastSquares(df_tbl['TIME'][AOK][SOK], smo[SOK]/med, dy=df_tbl['PDCSAP_FLUX_ERR'][AOK][SOK]/med)
#         blsP = bls.autopower(0.1, method='fast', objective='snr')
#         blsPer = blsP['period'][np.argmax(blsP['power'])]
#         if ((4*np.nanstd(blsP['power']) + np.nanmedian(blsP['power']) < np.nanmax(blsP['power'])) &
#             (np.nanmax(blsP['power']) > 50.) &
#             (blsPer < 0.95 * np.nanmax(blsP['period']))
#            ):
#             blsPeriod = blsPer
#             blsAmpl = np.nanmax(blsP['power'])
#             plt.plot([],[], ' ', label='BLS='+format(blsPer, '6.3f')+'d')


    plt.title(files_k[0].split('-')[2], fontsize=12)
    plt.ylabel('Flux')
    plt.xlabel('BJD - 2457000 (days)')
    plt.legend(fontsize=10)
    
    plt.savefig(run_dir + 'figures/longerP/' + TICs[0].split('/')[-1] + '.jpeg',
                bbox_inches='tight', pad_inches=0.25, dpi=100)
    plt.close()



#     # write per-sector output files
#     ALL_TIC = pd.Series(files_i).str.split('-', expand=True).iloc[:,-3].astype('int')

#     flare_out = pd.DataFrame(data={'TIC':ALL_TIC[FL_id], 'i0':FL_t0, 'i1':FL_t1, 'med':FL_f0, 'peak':FL_f1})
#     flare_out.to_csv(run_dir + sector + '_flare_out.csv')

#     rot_out = pd.DataFrame(data={'TIC':ALL_TIC,
#                                  'per':per_out, 'Pamp':per_amp, 'Pmed':per_med, 'StdLC':per_std,
#                                  'acf_pk':ACF_1pk, 'acf_per':ACF_1dt,
#                                  'bls_period':blsPeriod, 'bls_ampl':blsAmpl, 'ecl_flg':EclFlg})
#     rot_out.to_csv(run_dir + sector + '_rot_out.csv')



In [11]:
# get the unique object IDs (NOT the simplest way, but matches the next step)
obj = pd.Series(files).str.split('-', expand=True).groupby(by=2).count().index

# get the count of unique object IDs
Nobj = pd.Series(files).str.split('-', expand=True).groupby(by=2).count()[0]

for k in range(max(Nobj)):
    print(k+1, sum(Nobj > k))
obj[0] # example Object ID (TIC #)

1 63696
2 19129
3 9806
4 6682
5 5417
6 3107


'0000000000589445'

In [None]:
o5 = np.where((Nobj > 3))[0] # was named "o5" because originally wanted Over 5 observations. Now pick other N

for k in range(len(o5)):
    print(k, obj[o5][k])
    files_k = pd.Series(files)[np.where((pd.Series(files).str.split('-', expand=True)[2] == obj[o5][k]))[0]].values
    
    MultiSector(files_k)

0 0000000025063296
1 0000000025063396
2 0000000025063986
3 0000000025063999
4 0000000025064377
5 0000000025064731
6 0000000025064848
7 0000000025065290
8 0000000025065357
9 0000000025065358
10 0000000025065471
11 0000000025077654
12 0000000025077712
13 0000000025078674
14 0000000025078884
15 0000000025078924
16 0000000025079777
17 0000000025080362
18 0000000025080867
19 0000000025081005
20 0000000025081091
21 0000000025081173
22 0000000025081407
23 0000000025081575
24 0000000025081629
25 0000000025081729
26 0000000025115881
27 0000000025115973
28 0000000025115993
29 0000000025116013
30 0000000025116119
31 0000000025116423
32 0000000025116563
33 0000000025116994
34 0000000025117096
35 0000000025117242
36 0000000025117273
37 0000000025117741
38 0000000025117756
39 0000000025118084
40 0000000025118098
41 0000000025118795
42 0000000025118964
43 0000000025131718
44 0000000025132222
45 0000000025132265
46 0000000025132314
47 0000000025132694
48 0000000025132696
49 0000000025132720
50 0000000

396 0000000032072097
397 0000000032072196
398 0000000032072229
399 0000000032088889
400 0000000032088907
401 0000000032089054
402 0000000032089522
403 0000000032089857
404 0000000032089919
405 0000000032090046
406 0000000032090064
407 0000000032090208
408 0000000032090394
409 0000000032090407
410 0000000032090440
411 0000000032090581
412 0000000032090583
413 0000000032091178
414 0000000032091606
415 0000000032091784
416 0000000032092043
417 0000000032092280
418 0000000032093131
419 0000000032150013
420 0000000032150270
421 0000000032150333
422 0000000032150630
423 0000000032151066
424 0000000032151781
425 0000000032151799
426 0000000032152015
427 0000000032152605
428 0000000032153165
429 0000000032153214
430 0000000032153309
431 0000000032153316
432 0000000032153468
433 0000000032154131
434 0000000032154146
435 0000000032154622
436 0000000032154687
437 0000000033714406
438 0000000033714920
439 0000000033714944
440 0000000033715210
441 0000000033715956
442 0000000033716034
443 000000003

787 0000000038844604
788 0000000038845575
789 0000000038846471
790 0000000038846515
791 0000000038875458
792 0000000038877461
793 0000000038877496
794 0000000038877648
795 0000000038878468
796 0000000038905525
797 0000000038905722
798 0000000038907043
799 0000000038907305
800 0000000038907321
801 0000000038907328
802 0000000038907513
803 0000000038908257
804 0000000038908399
805 0000000038908752
806 0000000038908780
807 0000000038908923
808 0000000038909083
809 0000000038909096
810 0000000038909361
811 0000000038909487
812 0000000038909490
813 0000000038909532
814 0000000038909679
815 0000000038909740
816 0000000038939497
817 0000000038939536
818 0000000038939558
819 0000000038939585
820 0000000038941401
821 0000000038941653
822 0000000038942330
823 0000000038942483
824 0000000038942595
825 0000000038942727
826 0000000038942878
827 0000000038943708
828 0000000038943958
829 0000000038943971
830 0000000038954437
831 0000000038954454
832 0000000038954456
833 0000000040183989
834 000000004