# Consolidate Output
This program reads in the data from runs on the BlueBEAR cluster via RDS and appends them on to the existing `malatium` and `copper` dataframes, saving them as `atium` and `bronze` respectively.

In [1]:
import numpy as np
import lightkurve as lk
import pandas as pd
import fnmatch as fnm
import seaborn as sns
import astropy.units as u
from tqdm import tqdm
import glob
import matplotlib.pyplot as plt

### Add the mode results

Read in the existing dataframe

In [2]:
cols = ['KIC','Teff','age','dnu','eTeff','ednu','efeh','enumax','feh','loage','lomodlogg','lomodmass','lomodrad','modlogg','modmass','modrad','numax','source','upage','upmodlogg', 'upmodmass','upmodrad','G','bprp']
mal = pd.read_csv('../../data/malatium.csv', usecols=cols)
cop = pd.read_csv('../../data/copper.csv', index_col=0)

Parse all the data, including some flags:

`empty   : run not yet done`

`rhat_1  : has at least one rhat > 1.1. Major concern, exclude from sample`

`nosum   : there is no summary available for some reason`


In [3]:
cop['f'] = np.nan
cop['f_e'] = np.nan
cop['g'] = np.nan
cop['g_e'] = np.nan
cop['A'] = np.nan
cop['A_e'] = np.nan
cop['H'] = np.nan
cop['H_e'] = np.nan
cop['flag'] = ''

In [4]:
for idx in range(95):
    kic = str(mal.loc[idx].KIC)
    files = glob.glob('/home/oliver/PhD/mnt/RDS/malatium/peakbag/{}/*chains.csv'.format(str(kic)))

    try:
        chains = pd.read_csv(files[0],index_col=0)
        lis = list(chains)
    except IndexError:
        print('Star {} has not completed yet | idx {}'.format(str(kic), idx))
        cop.loc[cop.KIC == kic, 'flag'] = 'empty'
        continue

    # Lets do each mode in turn
    for mode in [0,1,2]:
        cop.loc[(cop.KIC==kic)&(cop.l==mode),'f'] = np.median(chains[fnm.filter(lis, 'f{}_*'.format(mode))], axis=0)
        cop.loc[(cop.KIC==kic)&(cop.l==mode),'f_e'] = np.std(chains[fnm.filter(lis, 'f{}_*'.format(mode))], axis=0).values
        cop.loc[(cop.KIC==kic)&(cop.l==mode),'g'] = np.median(chains[fnm.filter(lis, 'g{}_*'.format(mode))], axis=0)
        cop.loc[(cop.KIC==kic)&(cop.l==mode),'g_e'] = np.std(chains[fnm.filter(lis, 'g{}_*'.format(mode))], axis=0).values
        cop.loc[(cop.KIC==kic)&(cop.l==mode),'A'] = np.median(chains[fnm.filter(lis, 'a{}_*'.format(mode))], axis=0)
        cop.loc[(cop.KIC==kic)&(cop.l==mode),'A_e'] = np.std(chains[fnm.filter(lis, 'a{}_*'.format(mode))], axis=0).values
        cop.loc[(cop.KIC==kic)&(cop.l==mode),'H'] = np.median(chains[fnm.filter(lis, 'h{}_*'.format(mode))], axis=0)
        cop.loc[(cop.KIC==kic)&(cop.l==mode),'H_e'] = np.std(chains[fnm.filter(lis, 'h{}_*'.format(mode))], axis=0).values    
    
    #Lets copy over the Rhats for each mode and flag if needed
    try:
        summary = pd.read_csv(files[0].replace('chains','summary'), index_col=0)
        rhats = summary.loc[fnm.filter(lis, '*__*')].Rhat.values
        
        if any(np.abs(rhats - 1.) >= 0.1):
            print('Rhats greater than 1.1 for KIC {}'.format(str(kic)))
            cop.loc[cop.KIC==kic, 'flag'] = 'rhat_1'

        
    except FileNotFoundError:
        print('No summary file for KIC {}'.format(str(kic)))
        mal.loc[idx, 'flag'] = 'nosum'
        pass

Star 1435467 has not completed yet | idx 0
Star 2837475 has not completed yet | idx 1
Star 3427720 has not completed yet | idx 2
Star 3456181 has not completed yet | idx 3
Star 3632418 has not completed yet | idx 4
Star 3656476 has not completed yet | idx 5
Star 3735871 has not completed yet | idx 6
Star 4914923 has not completed yet | idx 7
Star 5184732 has not completed yet | idx 8
Star 5773345 has not completed yet | idx 9
Star 5950854 has not completed yet | idx 10
Star 6106415 has not completed yet | idx 11
Star 6116048 has not completed yet | idx 12
Star 6225718 has not completed yet | idx 13
Star 6508366 has not completed yet | idx 14
Star 6603624 has not completed yet | idx 15
Star 6679371 has not completed yet | idx 16
Star 6933899 has not completed yet | idx 17
Star 7103006 has not completed yet | idx 18
Star 7106245 has not completed yet | idx 19
Star 7206837 has not completed yet | idx 20
Star 7296438 has not completed yet | idx 21
Star 7510397 has not completed yet | idx 2

In [5]:
cop.head(5)

Unnamed: 0,KIC,n,l,Freq,e_Freq,E_Freq,Amp,e_Amp,E_Amp,Width,...,source,f,f_e,g,g_e,A,A_e,H,H_e,flag
0,1435467,11,1,889.25,0.47,0.53,,,,,...,L,889.771948,0.320578,4.267716,1.218315,2.203952,0.170429,0.728456,0.18234,
1,1435467,11,2,920.04,1.82,1.99,,,,,...,L,921.220902,1.643444,3.067566,1.534018,1.334852,0.324376,0.380058,0.227212,
2,1435467,12,0,928.22,0.94,0.71,2.39,0.14,0.13,4.79,...,L,927.920894,0.616308,3.702034,1.526683,1.383523,0.3511,0.332163,0.14788,
3,1435467,12,1,957.04,0.5,0.46,,,,,...,L,957.393253,0.239691,2.10908,0.636603,2.062365,0.138292,1.282096,0.321852,
4,1435467,12,2,989.14,1.24,1.3,,,,,...,L,989.204494,0.391108,1.597421,0.952271,1.723303,0.221747,1.161414,0.55736,


In [6]:
cop.to_csv('../../data/bronze.csv')

### Add the parameters results

Read in the existing dataframes

In [5]:
#Add the necessary columns to malatium
mal['nus*'] = np.nan
mal['l_nus*'] = np.nan
mal['u_nus*'] = np.nan

mal['cosi'] = np.nan
mal['l_cosi'] = np.nan
mal['u_cosi'] = np.nan

mal['i'] = np.nan
mal['l_i'] = np.nan
mal['u_i'] = np.nan

mal['nus'] = np.nan
mal['l_nus'] = np.nan
mal['u_nus'] = np.nan

mal['P'] = np.nan
mal['l_P'] = np.nan
mal['u_P'] = np.nan

mal['vsini'] = np.nan
mal['l_vsini'] = np.nan
mal['u_vsini'] = np.nan

mal['flag'] = ''
mal['chainflag'] = np.nan

Parse all the data, including some flags:

`empty   : run not yet done`

`rhat_01 : has at least one rhat > 1.01. Minor concern`

`rhat_1  : has at least one rhat > 1.1. Major concern, exclude from sample`

`nosum   : there is no summary available for some reason`

In [6]:
for idx in range(95):
    kic = mal.loc[idx].KIC
    files = glob.glob('/home/oliver/PhD/mnt/RDS/malatium/peakbag/{}/*chains.csv'.format(str(kic)))

    try:
        chains = pd.read_csv(files[0],index_col=0)
    except IndexError:
        print('Star {} has not completed yet | idx {}'.format(str(kic), idx))
        mal.loc[idx, 'flag'] = 'empty'
        continue

    #Before we go ahead, lets check the Rhats to see if we need to flag anything
    try:
        summary = pd.read_csv(files[0].replace('chains','summary'), index_col=0)
        rhats = np.array([summary.loc['xsplit'].Rhat, summary.loc['cosi'].Rhat,
                         summary.loc['i'].Rhat,summary.loc['split'].Rhat, 
                         summary.loc['b'].Rhat])
        
        if any(np.abs(rhats - 1.) >= 0.01):
            mal.loc[idx, 'flag'] = 'rhat_01'  
        
        if any(np.abs(rhats - 1.) >= 0.1):
            print('Rhats greater than 1.1 for KIC {}'.format(str(kic)))
            mal.loc[idx, 'flag'] = 'rhat_1'

        
    except FileNotFoundError:
        print('No summary file for KIC {}'.format(str(kic)))
        mal.loc[idx, 'flag'] = 'nosum'
        pass
    l, m, up = np.percentile(chains['xsplit'].values,[15.9, 50, 84.1])
    mal.loc[idx, 'l_nus*'], mal.loc[idx, 'nus*'], mal.loc[idx, 'u_nus*'] = m-l, m, up-m
    
    l, m, up = np.percentile(chains['cosi'].values, [15.9, 50, 84.1])
    mal.loc[idx, 'l_cosi'], mal.loc[idx, 'cosi'], mal.loc[idx, 'u_cosi'] = m-l, m, up-m
    
    l, m, up = np.percentile(chains['split'].values,[15.9, 50, 84.1])
    mal.loc[idx, 'l_nus'], mal.loc[idx, 'nus'], mal.loc[idx, 'u_nus'] = m-l, m, up-m
    
    l, m, up = np.percentile(chains['i'].values, [15.9, 50, 84.1])    
    mal.loc[idx, 'l_i'], mal.loc[idx, 'i'], mal.loc[idx, 'u_i'] =  m-l, m, up-m

    nus = u.Quantity(chains['split'].values, u.microhertz)
    Pchain = 1./nus.to(1./u.day).value
    l, m, up = np.percentile(Pchain, [15.9, 50, 84.1])  
    mal.loc[idx, 'l_P'], mal.loc[idx, 'P'], mal.loc[idx, 'u_P'] = m-l, m, up-m
    
    Rkm = u.Quantity(mal.loc[idx, 'modrad'], u.solRad).to(u.km)
    vsinichain = np.sin(chains['i'].values) * 2 * np.pi * Rkm / (1./nus.to(u.Hertz).value)
    l, m, up = np.percentile(vsinichain, [15.9, 50, 84.1])  
    mal.loc[idx, 'l_vsini'], mal.loc[idx, 'vsini'], mal.loc[idx, 'u_vsini'] = m-l, m, up-m


Rhats greater than 1.1 for KIC 5950854
Rhats greater than 1.1 for KIC 6603624
Star 7510397 has not completed yet | idx 22
Star 7970740 has not completed yet | idx 27
No summary file for KIC 8006161
No summary file for KIC 8379927
Star 8394589 has not completed yet | idx 33
Star 8760414 has not completed yet | idx 36
Rhats greater than 1.1 for KIC 8938364
Star 9139163 has not completed yet | idx 41
Star 9206432 has not completed yet | idx 42
Star 9414417 has not completed yet | idx 45
Star 10516096 has not completed yet | idx 53
Rhats greater than 1.1 for KIC 10730618
Star 11253226 has not completed yet | idx 58
Star 12069424 has not completed yet | idx 62
Star 12069449 has not completed yet | idx 63
Rhats greater than 1.1 for KIC 6521045
Star 8478994 has not completed yet | idx 81
No summary file for KIC 8866102


I'm going to add flags based on by-eye inspection of the corner plots as well.

`-1 : no data`

`0   : no issues`

`1 : poorly constrained rotational parameters`

`2  : bimodal distributions`

`3   : poorly sampled`

In [7]:
import corner
choice = input('Are we going to investigate the corners? (y/n) ')

if choice == 'y':
    for idx in range(95):
        kic = mal.loc[idx].KIC
        files = glob.glob('/home/oliver/PhD/mnt/RDS/malatium/peakbag/{}/*chains.csv'.format(str(kic)))

        try:
            chains = pd.read_csv(files[0],index_col=0)
        except IndexError:
            print('Star {} has not completed yet | idx {}'.format(str(kic), idx))
            mal.loc[idx, 'flagch'] = -1
            continue
            
        labels=['xsplit','cosi','b','i','split']
        chain = np.array([chains[label] for label in labels])
        verbose = [r'$\delta\nu_s^*$',r'$\cos(i)$',r'$b$',r'$i$',r'$\delta\nu_{\rm s}$']
        

        corner.corner(chain.T, labels=verbose, quantiles=[0.16, 0.5, 0.84]
                      ,show_titles=True)
        plt.show()
        
        print('KIC {}\n\
              0   : no issues \n\
              1 : poorly constrained rotational parameters\n\
              2  : bimodal distributions\n\
              3   : not converged'.format(str(kic)))
        flag = int(input('Flag: '))
        mal.loc[idx, 'chainflag'] = flag
        
else:
    mal['chainflag'] = pd.read_csv('../../data/atium.csv',usecols=['chainflag'])

Are we going to investigate the corners? (y/n) n


### Finally I'm going to calculate the BP-RP errors.
SQL is broken and I can't add this script to get_data.ipynb, so I'm doing it here.

In [30]:
from astropy.table import Table
gkf = Table.read('../../data/kepler_dr2_1arcsec.fits', format='fits').to_pandas()

In [32]:
gkf = Table.read('../../data/kepler_dr2_1arcsec.fits', format='fits').to_pandas().rename(columns={
                    'kepid':'KIC'})[['KIC','phot_bp_mean_flux',
                                     'phot_bp_mean_flux_error',
                                     'phot_rp_mean_flux',
                                     'phot_rp_mean_flux_error']]

In [35]:
ebp = gkf['phot_bp_mean_flux_error'] / (gkf['phot_bp_mean_flux']) * np.log(10)
erp = gkf['phot_rp_mean_flux_error'] / (gkf['phot_rp_mean_flux']) * np.log(10)
gkf['ebprp'] = np.sqrt(ebp**2 + erp**2)
mal = pd.merge(mal, gkf[['KIC','ebprp']], on='KIC', how='left')

In [37]:
mal.to_csv('../../data/atium.csv')