# Consolidate Output
This program reads in the data from runs on the BlueBEAR cluster via RDS and appends them on to the existing `malatium` and `copper` dataframes, saving them as `atium` and `bronze` respectively.

In [1]:
import numpy as np
import lightkurve as lk
import pandas as pd
import fnmatch as fnm
import seaborn as sns
import astropy.units as u
from tqdm import tqdm
import glob
import matplotlib.pyplot as plt
pd.set_option('display.max_columns', None)


  import pandas.util.testing as tm


Read in the existing dataframe

In [2]:
cols = ['KIC','Teff','age','dnu','eTeff','ednu','efeh','enumax','feh','loage','lomodlogg','lomodmass','lomodrad','modlogg','modmass','modrad','numax','source','upage','upmodlogg', 'upmodmass','upmodrad','G','bprp']
mal = pd.read_csv('../../data/malatium.csv', usecols=cols)
cop = pd.read_csv('../../data/copper.csv', index_col=0)

## Remove lines from `copper` for modes we don't use.

In [4]:
alt = [7970740, 8006161, 12069424, 12069449] # remove lnk<6
alt2 = 8478994 # Remove n <= 21 & n >= 27

for a in alt:
    ids = cop[(cop.KIC == str(a)) & (cop.l != 3) & (cop.lnK < 6)].index.values
    cop.drop(ids, inplace=True)

lo = cop[(cop.KIC == str(alt2)) & (cop.l != 3) & (cop.n <= 21)].index.values
hi = cop[(cop.KIC == str(alt2)) & (cop.l != 3) & (cop.n >= 27)].index.values
cop.drop(lo, inplace=True)
cop.drop(hi, inplace=True)

In [5]:
cop.reset_index(inplace=True, drop=True)

## Add the classification

We use the classification from Garcia+2014

In [3]:
mal['hrclass'] = ''
s = np.where((mal.modlogg > 4.0) & (mal.Teff < 6250))[0]
mal.loc[s, 'hrclass'] = 'MS'
s = np.where((mal.modlogg < 4.0) & (mal.Teff < 6250))[0]
mal.loc[s, 'hrclass'] = 'SG'
s = np.where((mal.Teff > 6250))[0]
mal.loc[s, 'hrclass'] = 'H'

## Add the mode results

Parse all the data, including some flags:

`empty   : run not yet done`

`rhat_1  : has at least one rhat > 1.1. Major concern, exclude from sample`

`nosum   : there is no summary available for some reason`


In [7]:
cop['f'] = np.nan
cop['f_e'] = np.nan
cop['g'] = np.nan
cop['g_e'] = np.nan
cop['A'] = np.nan
cop['A_e'] = np.nan
cop['H'] = np.nan
cop['H_e'] = np.nan
cop['flag'] = ''

In [10]:
incomplete = 0
for idx in tqdm(range(95)):
    kic = str(mal.loc[idx].KIC)
    files = glob.glob('/home/oliver/PhD/mnt/RDS/malatium/peakbag/{}/*chains.csv'.format(str(kic)))

    try:
        chains = pd.read_csv(files[0],index_col=0)
        lis = list(chains)
    except IndexError:
        incomplete += 1
        cop.loc[cop.KIC == kic, 'flag'] = 'empty'
        continue

    # Lets do each mode in turn
    for mode in [0,1,2]:
        cop.loc[(cop.KIC==kic)&(cop.l==mode),'f'] = np.median(chains[fnm.filter(lis, f'f{mode}_*')], axis=0)
        cop.loc[(cop.KIC==kic)&(cop.l==mode),'f_e'] = np.std(chains[fnm.filter(lis, f'f{mode}_*')], axis=0).values
        cop.loc[(cop.KIC==kic)&(cop.l==mode),'g'] = np.median(chains[fnm.filter(lis, f'g{mode}_*')], axis=0)
        cop.loc[(cop.KIC==kic)&(cop.l==mode),'g_e'] = np.std(chains[fnm.filter(lis, f'g{mode}_*')], axis=0).values
        cop.loc[(cop.KIC==kic)&(cop.l==mode),'A'] = np.median(chains[fnm.filter(lis, f'a{mode}_*')], axis=0)
        cop.loc[(cop.KIC==kic)&(cop.l==mode),'A_e'] = np.std(chains[fnm.filter(lis, f'a{mode}_*')], axis=0).values
        cop.loc[(cop.KIC==kic)&(cop.l==mode),'H'] = np.median(chains[fnm.filter(lis, f'h{mode}_*')], axis=0)
        cop.loc[(cop.KIC==kic)&(cop.l==mode),'H_e'] = np.std(chains[fnm.filter(lis, f'h{mode}_*')], axis=0).values    
    
    #Lets copy over the Rhats for each mode and flag if needed
    try:
        summary = pd.read_csv(files[0].replace('chains','summary'), index_col=0)
        rhats = summary.loc[fnm.filter(lis, '*__*')].Rhat.values
        
        if any(np.abs(rhats - 1.) >= 0.1):
            print('Rhats greater than 1.1 for KIC {}'.format(str(kic)))
            cop.loc[cop.KIC==kic, 'flag'] = 'rhat_1'

        
    except FileNotFoundError:
        print('No summary file for KIC {}'.format(str(kic)))
        mal.loc[idx, 'flag'] = 'nosum'
        pass
print(f'There are {incomplete} stars yet to run.')    

 66%|██████▋   | 63/95 [01:04<00:31,  1.03it/s]

Rhats greater than 1.1 for KIC 12069424


 74%|███████▎  | 70/95 [01:10<00:20,  1.21it/s]

Rhats greater than 1.1 for KIC 4143755


 86%|████████▋ | 82/95 [01:20<00:09,  1.32it/s]

Rhats greater than 1.1 for KIC 8478994


100%|██████████| 95/95 [01:31<00:00,  1.28it/s]

There are 0 stars yet to run.





In [11]:
cop.to_csv('../../data/bronze.csv')

### Add the parameters results

Read in the existing dataframes

In [10]:
#Add the necessary columns to malatium
mal['nus*'] = np.nan
mal['l_nus*'] = np.nan
mal['u_nus*'] = np.nan

mal['cosi'] = np.nan
mal['l_cosi'] = np.nan
mal['u_cosi'] = np.nan

mal['i'] = np.nan
mal['l_i'] = np.nan
mal['u_i'] = np.nan

mal['nus'] = np.nan
mal['l_nus'] = np.nan
mal['u_nus'] = np.nan

mal['P'] = np.nan
mal['l_P'] = np.nan
mal['u_P'] = np.nan

mal['vsini'] = np.nan
mal['l_vsini'] = np.nan
mal['u_vsini'] = np.nan

mal['fit_numax'] = np.nan
mal['efit_numax'] = np.nan
mal['fit_d01'] = np.nan
mal['efit_d01'] = np.nan
mal['fit_d02'] = np.nan
mal['efit_d02'] = np.nan
mal['fit_epsilon'] = np.nan
mal['efit_epsilon'] = np.nan

mal['V1'] = np.nan
mal['eV1'] = np.nan

mal['V2'] = np.nan
mal['eV2'] = np.nan

mal['rhat_flag'] = ''
mal['corner_flag'] = np.nan
mal['neff_xsplit'] = np.nan
mal['neff_cosi'] = np.nan
mal['neff_split'] = np.nan
mal['neff_i'] = np.nan

In [13]:
incomplete = 0
for idx in tqdm(range(95)):
    kic = mal.loc[idx].KIC
    files = glob.glob('/home/oliver/PhD/mnt/RDS/malatium/peakbag/{}/*chains.csv'.format(str(kic)))

    # Check to see if run is done
    try:
        chains = pd.read_csv(files[0],index_col=0)
        niters = len(chains)
    except IndexError:
        incomplete += 1
        mal.loc[idx, 'rhat_flag'] = 'empty'
        continue

    #Flag Rhat values & Neff values
    try:
        summary = pd.read_csv(files[0].replace('chains','summary'), index_col=0)
        rhats = summary.loc[['xsplit','cosi','i','split']].Rhat.values
        
        if any(np.abs(rhats - 1.) >= 0.01):
            print('Rhats greater than 1.01 for KIC {}'.format(str(kic)))
            mal.loc[idx, 'rhat_flag'] = 'rhat_01'  
        
        if any(np.abs(rhats - 1.) >= 0.1):
            print('Rhats greater than 1.1 for KIC {}'.format(str(kic)))
            mal.loc[idx, 'rhat_flag'] = 'rhat_1'

        neff = summary.loc[['xsplit','cosi','i','split']].n_eff
        mal.loc[idx, 'neff_xsplit'] = int(neff['xsplit'])
        mal.loc[idx, 'neff_cosi'] = int(neff['cosi'])
        mal.loc[idx, 'neff_split'] = int(neff['split'])
        mal.loc[idx, 'neff_i'] = int(neff['i'])
            
    except FileNotFoundError:
        mal.loc[idx, 'flag'] = 'nosum'
        pass
    
    l, m, up = np.percentile(chains['xsplit'].values,[15.9, 50, 84.1])
    mal.loc[idx, 'l_nus*'], mal.loc[idx, 'nus*'], mal.loc[idx, 'u_nus*'] = m-l, m, up-m
    
    l, m, up = np.percentile(chains['cosi'].values, [15.9, 50, 84.1])
    mal.loc[idx, 'l_cosi'], mal.loc[idx, 'cosi'], mal.loc[idx, 'u_cosi'] = m-l, m, up-m
    
    l, m, up = np.percentile(chains['split'].values,[15.9, 50, 84.1])
    mal.loc[idx, 'l_nus'], mal.loc[idx, 'nus'], mal.loc[idx, 'u_nus'] = m-l, m, up-m
    
    l, m, up = np.percentile(chains['i'].values, [15.9, 50, 84.1])    
    mal.loc[idx, 'l_i'], mal.loc[idx, 'i'], mal.loc[idx, 'u_i'] =  m-l, m, up-m
    
    m, up = np.percentile(chains['numax'].values, [50, 84.1])    
    mal.loc[idx, 'fit_numax'], mal.loc[idx, 'efit_numax'] =  m, up-m

    m, up = np.percentile(chains['d01'].values, [50, 84.1])    
    mal.loc[idx, 'fit_d01'], mal.loc[idx, 'efit_d01'] =  m, up-m
    
    m, up = np.percentile(chains['d02'].values, [50, 84.1])    
    mal.loc[idx, 'fit_d02'], mal.loc[idx, 'efit_d02'] =  m, up-m
    
    m, up = np.percentile(chains['epsilon'].values, [50, 84.1])    
    mal.loc[idx, 'fit_epsilon'], mal.loc[idx, 'efit_epsilon'] =  m, up-m
    
    m, up = np.percentile(chains['V1'].values, [50, 84.1])    
    mal.loc[idx, 'V1'], mal.loc[idx, 'eV1'] =  m, up-m
    
    m, up = np.percentile(chains['V2'].values, [50, 84.1])    
    mal.loc[idx, 'V2'], mal.loc[idx, 'eV2'] =  m, up-m    
    
    nus = u.Quantity(chains['split'].values, u.microhertz)
    Pchain = 1./nus.to(1./u.day).value
    l, m, up = np.percentile(Pchain, [15.9, 50, 84.1])  
    mal.loc[idx, 'l_P'], mal.loc[idx, 'P'], mal.loc[idx, 'u_P'] = m-l, m, up-m
    
    Rkm = u.Quantity(mal.loc[idx, 'modrad'], u.solRad).to(u.km)
    vsinichain = chains['xsplit'].values * 2 * np.pi * Rkm.value / (1e6)
    l, m, up = np.percentile(vsinichain, [15.9, 50, 84.1])  
    mal.loc[idx, 'l_vsini'], mal.loc[idx, 'vsini'], mal.loc[idx, 'u_vsini'] = m-l, m, up-m
    
print(f'There are {incomplete} stars still to run.') 
print(f"Of these, {len(np.where((mal.rhat_flag == 'empty') & (mal.hrclass == 'MS'))[0])} lie in the MS region.")

 39%|███▉      | 37/95 [00:17<00:26,  2.16it/s]

Rhats greater than 1.01 for KIC 8760414


 40%|████      | 38/95 [00:17<00:26,  2.17it/s]

Rhats greater than 1.01 for KIC 8938364
Rhats greater than 1.1 for KIC 8938364


 44%|████▍     | 42/95 [00:20<00:44,  1.18it/s]

Rhats greater than 1.01 for KIC 9139163


 65%|██████▌   | 62/95 [00:28<00:12,  2.72it/s]

Rhats greater than 1.01 for KIC 12069127


 66%|██████▋   | 63/95 [00:28<00:11,  2.67it/s]

Rhats greater than 1.01 for KIC 12069424
Rhats greater than 1.1 for KIC 12069424


 86%|████████▋ | 82/95 [00:34<00:03,  4.03it/s]

Rhats greater than 1.01 for KIC 8349582
Rhats greater than 1.01 for KIC 8478994
Rhats greater than 1.1 for KIC 8478994


100%|██████████| 95/95 [00:38<00:00,  3.29it/s]

Rhats greater than 1.01 for KIC 11904151
There are 0 stars still to run.
Of these, 0 lie in the MS region.





## Finally I'm going to calculate the BP-RP errors.
SQL is broken and I can't add this script to get_data.ipynb, so I'm doing it here.

In [15]:
from astropy.table import Table
gkf = Table.read('../../data/kepler_dr2_1arcsec.fits', format='fits').to_pandas()

  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)


In [16]:
gkf = Table.read('../../data/kepler_dr2_1arcsec.fits', format='fits').to_pandas().rename(columns={
                    'kepid':'KIC'})[['KIC','phot_bp_mean_flux',
                                     'phot_bp_mean_flux_error',
                                     'phot_rp_mean_flux',
                                     'phot_rp_mean_flux_error']]

In [17]:
ebp = gkf['phot_bp_mean_flux_error'] / ((gkf['phot_bp_mean_flux']) * np.log(10))
erp = gkf['phot_rp_mean_flux_error'] / ((gkf['phot_rp_mean_flux']) * np.log(10))
gkf['ebprp'] = np.sqrt(ebp**2 + erp**2)
mal = pd.merge(mal, gkf[['KIC','ebprp']], on='KIC', how='left')

In [18]:
mal.to_csv('~/PhD/malatium/data/atium.csv')

## By-Eye investigations

I'm going to add flags based on by-eye inspection of the corner plots as well.

`-1 : no data`

`0   : no issues`

`1 : poorly constrained rotational parameters`

`2  : bimodal distributions`

`3   : divergence/poor sampling`

In [19]:
import corner
choice = input('Are we going to investigate the corners? (y/n) ')

if choice == 'y':
    for idx in range(95):
        kic = mal.loc[idx].KIC
        files = glob.glob('/home/oliver/PhD/mnt/RDS/malatium/peakbag/{}/*chains.csv'.format(str(kic)))

        try:
            
            chains = pd.read_csv(files[0],index_col=0)
        except IndexError:
            print('Star {} has not completed yet | idx {}'.format(str(kic), idx))
            mal.loc[idx, 'flagch'] = -1
            continue
            
        labels=['xsplit','cosi','i','split']
        chain = np.array([chains[label] for label in labels])
        
        corner.corner(chain.T, labels=labels, quantiles=[0.16, 0.5, 0.84]
                      ,show_titles=True)
        plt.show()
        
        print('KIC {}\n\
              0   : no issues \n\
              1 : poorly constrained rotational parameters\n\
              2  : bimodal distributions\n\
              3   : not converged'.format(str(kic)))
        flag = int(input('Flag: '))
        mal.loc[idx, 'chainflag'] = flag
        
else:
    mal['chainflag'] = pd.read_csv('../../data/atium.csv',usecols=['chainflag'])

Are we going to investigate the corners? (y/n) n


ValueError: Usecols do not match columns, columns expected but not found: ['chainflag']