# Set up

In [1]:
import numpy as np
import pandas as pd
import sys
import importlib

In [2]:
# import our modules
sys.path.insert(0, './utils')

from data_cleaning import ProperMotionStats 
from data_cleaning import CleanData

In [3]:
importlib.reload(sys.modules['data_cleaning'])
from data_cleaning import ProperMotionStats 
from data_cleaning import CleanData

In [18]:
# setup path to directories where data are stored
HomeDir = './'
DataDir = '/Users/crimondino/Dropbox (PI)/LensVelocity2/data/star_star/' 

# Read in raw data and perform astrometic quality and distance cuts

In [3]:
# read in the data files for the background and foreground stars after the ruwe and distance cuts
df_fore = pd.read_csv(DataDir+'fg_raw.csv', 
                      usecols = ['source_id', 'ra', 'dec', 'pmra', 'pmdec','pmra_error', 'pmdec_error', 
                                 'parallax', 'parallax_error', 'phot_g_mean_mag', 'ruwe', 'dist_50', 'dist_14', 'dist_86', 'l', 'b'])
df_back = pd.read_csv(DataDir+'bg_raw.csv', 
                      usecols = ['source_id', 'ra', 'dec', 'pmra', 'pmdec', 'pmra_error', 'pmdec_error', 'pmra_pmdec_corr', 
                                 'parallax', 'parallax_error', 'phot_g_mean_mag', 'ruwe', 'dist_50', 'dist_14', 'dist_86', 'dist_error', 'l', 'b'])
len(df_back), len(df_fore)

(15138051, 15138051)

In [None]:
# create an instance of the CleanData class to perform ruwe and distance cuts
clean = CleanData()
clean.setup_params(default=True)

In [None]:
df_fore, df_back = clean.astrometric_quality(df_fore, df_back)
df_fore, df_back = clean.distance_cut(df_fore, df_back)

In [41]:
# save the result
df_fore.to_csv(DataDir+'fg_clean.csv', index=False, chunksize=100000)
df_back.to_csv(DataDir+'bg_clean.csv', index=False, chunksize=100000)

# Read in cleaned data and perform background subtraction

In [5]:
# read in the data files for the background and foreground stars after the ruwe and distance cuts
df_fore = pd.read_csv(DataDir+'fg_clean.csv', 
                      usecols = ['source_id', 'ra', 'dec', 'pmra', 'pmdec','pmra_error', 'pmdec_error', 
                                 'parallax', 'parallax_error', 'phot_g_mean_mag', 'dist_50', 'dist_14', 'dist_86', 'l', 'b'])
df_back = pd.read_csv(DataDir+'bg_clean.csv', 
                      usecols = ['source_id', 'ra', 'dec', 'pmra', 'pmdec', 'pmra_error', 'pmdec_error', 'pmra_pmdec_corr', 
                                 'parallax', 'parallax_error', 'phot_g_mean_mag', 'dist_50', 'dist_14', 'dist_86', 'dist_error', 'l', 'b'])
len(df_back), len(df_fore)

(15138051, 15138051)

In [6]:
# create an instance of the ProperMotionStats class to compute the proper motion statistics (mean and variance): subtract the mean, remove the outliers, and compute the effective errors
stats = ProperMotionStats()
stats.setup_bins(default=True)
stats.setup_params(default=True)

In [7]:
# iterative proper motion subtraction and outlier removal
iter_n=0
out_frac=1
n_iter = 10 # each iteration takes around 2 minutes

while (iter_n<n_iter) & (out_frac>1E-5):
    
    df_fore_n, df_back_n, out_frac = stats.compute_stats(df_fore, df_back, iter_n=iter_n, final_call=False)
    df_fore, df_back = df_fore_n, df_back_n
    iter_n += 1; 

Iter 0 -- fraction of stars in sparse bins: 15.28730 %
Iter 0 -- fraction of outliers removed: 2.483746 %
Iter 1 -- fraction of stars in sparse bins: 2.557835 %
Iter 1 -- fraction of outliers removed: 1.648274 %
Iter 2 -- fraction of stars in sparse bins: 0.715149 %
Iter 2 -- fraction of outliers removed: 0.783062 %
Iter 3 -- fraction of stars in sparse bins: 0.161387 %
Iter 3 -- fraction of outliers removed: 0.361407 %
Iter 4 -- fraction of stars in sparse bins: 0.075459 %
Iter 4 -- fraction of outliers removed: 0.169932 %
Iter 5 -- fraction of stars in sparse bins: 0.030268 %
Iter 5 -- fraction of outliers removed: 0.082062 %
Iter 6 -- fraction of stars in sparse bins: 0.017689 %
Iter 6 -- fraction of outliers removed: 0.039358 %
Iter 7 -- fraction of stars in sparse bins: 0.004497 %
Iter 7 -- fraction of outliers removed: 0.019452 %
Iter 8 -- fraction of stars in sparse bins: 0.001342 %
Iter 8 -- fraction of outliers removed: 0.009647 %
Iter 9 -- fraction of stars in sparse bins: 0.

In [8]:
# perform the subtraction one last time without removing the outliers
pd.options.mode.chained_assignment = None # disable pandas SettingWithCopyWarning

df_fore_n, df_back_n = stats.compute_stats(df_fore, df_back, iter_n=iter_n, final_call=True)
df_fore, df_back = df_fore_n, df_back_n

Iter 10 -- fraction of stars in sparse bins: 0.001419 %
Adding columns with the new stats...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_back['pmra_sub'] = pm_sub[:, 0]; df_back['pmdec_sub_'] = pm_sub[:, 1];
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_back['pmra_eff_error'] = np.sqrt(tab_var_pmra)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_back['pmdec_eff_error'] = np.sqrt(tab_var_pmdec)
A value is trying to be set o

In [24]:
# save the result

df_fore.to_csv(DataDir+'fg_subtracted.csv', index=False, chunksize=100000)
df_back.to_csv(DataDir+'bg_subtracted.csv', index=False, chunksize=100000)