In [24]:
# Import basic packages
import numpy as np
import scipy as sp
import pandas as pd
import fitsio as ft


# ==== Scikit-learn =======================
# Preprocessing
from sklearn.preprocessing import StandardScaler #Standar scaler for standardization
from sklearn.preprocessing import RobustScaler #Robust scaler for standardization 
from sklearn.model_selection import train_test_split # For random split

# Classifiers
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

# Metrics
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from sklearn import metrics
from sklearn.ensemble import RandomForestRegressor
from pprint import pprint

# ==========================================
# Matplotlib, urlib etc 
import urllib
import urllib.request
import matplotlib
import matplotlib.pyplot as plt
from IPython.display import clear_output, display
from PIL import Image
%matplotlib inline

In [25]:
data_in = ft.read('/data/des80.b/data/burcinmp/y6_lsbg/y6/v2/y6_gold_2_0_lsb_skim.fits')

In [26]:
coadd_ids_in = data_in['COADD_OBJECT_ID']
ras_in = data_in['RA']
decs_in = data_in['DEC']
A_IMAGE_in = data_in['A_IMAGE']
B_IMAGE_in = data_in['B_IMAGE']
MAG_AUTO_G_in = data_in['MAG_AUTO_G']
FLUX_RADIUS_G_in = 0.263*data_in['FLUX_RADIUS_G']
MU_EFF_MODEL_G_in = data_in['MU_EFF_MODEL_G']
MU_MAX_G_in = data_in['MU_MAX_G']
MU_MAX_MODEL_G_in = data_in['MU_MAX_MODEL_G']
MU_MEAN_MODEL_G_in = data_in['MU_MEAN_MODEL_G']
MAG_AUTO_R_in = data_in['MAG_AUTO_R']
FLUX_RADIUS_R_in = 0.263*data_in['FLUX_RADIUS_R']
MU_EFF_MODEL_R_in = data_in['MU_EFF_MODEL_R']
MU_MAX_R_in = data_in['MU_MAX_R']
MU_MAX_MODEL_R_in = data_in['MU_MAX_MODEL_R']
MU_MEAN_MODEL_R_in = data_in['MU_MEAN_MODEL_R']
MAG_AUTO_I_in = data_in['MAG_AUTO_I']
FLUX_RADIUS_I_in = 0.263*data_in['FLUX_RADIUS_I']
MU_EFF_MODEL_I_in = data_in['MU_EFF_MODEL_I']
MU_MAX_I_in = data_in['MU_MAX_I']
MU_MAX_MODEL_I_in = data_in['MU_MAX_MODEL_I']
MU_MEAN_MODEL_I_in = data_in['MU_MEAN_MODEL_I']

# =========== Create extra features ============
# Ellipticity
Ell_in = 1. - B_IMAGE_in/A_IMAGE_in

# Colors
col_g_r_in = MAG_AUTO_G_in - MAG_AUTO_R_in
col_g_i_in = MAG_AUTO_G_in - MAG_AUTO_I_in
col_r_i_in = MAG_AUTO_R_in - MAG_AUTO_I_in

# ==============================================
# ==============================================
# ========= Create the feature matrix ==========

# Length of matrix
len_n = len(ras_in)

# Initialize
X_mat_in = np.zeros([len_n,19])

# Populate
# Ellipticity 
X_mat_in[:,0] = Ell_in
# Colors
X_mat_in[:,1] = col_g_i_in
X_mat_in[:,2] = col_g_r_in;
X_mat_in[:,3] = col_r_i_in
# Magnitudes
X_mat_in[:,4] = MAG_AUTO_G_in
X_mat_in[:,5] = MAG_AUTO_R_in
X_mat_in[:,6] = MAG_AUTO_I_in
# Flux radii
X_mat_in[:,7] = FLUX_RADIUS_G_in
X_mat_in[:,8] = FLUX_RADIUS_R_in
X_mat_in[:,9] = FLUX_RADIUS_I_in
# Peak (max) surface brightness
X_mat_in[:,10] = MU_MAX_MODEL_G_in
X_mat_in[:,11] = MU_MAX_MODEL_R_in
X_mat_in[:,12] = MU_MAX_MODEL_I_in
# Effective surface brightness
X_mat_in[:,13] = MU_EFF_MODEL_G_in
X_mat_in[:,14] = MU_EFF_MODEL_R_in
X_mat_in[:,15] = MU_EFF_MODEL_I_in
# Mean surface brightness 
X_mat_in[:,16] = MU_MEAN_MODEL_G_in
X_mat_in[:,17] = MU_MEAN_MODEL_R_in
X_mat_in[:,18] = MU_MEAN_MODEL_I_in

In [27]:
test_scaler = RobustScaler().fit(X_mat_in)

In [28]:
print(test_scaler.center_)

[ 0.32159781  0.71307087  0.5091362   0.20647621 22.76064396 22.29974747
 22.11220074  3.00220549  2.786533    2.49099267 26.36533546 25.83893013
 24.49539566 28.17219734 27.6464653  26.37687016 27.47080421 26.94398689
 25.61118793]


In [31]:
train_scaler = RobustScaler().fit(
    np.load('/data/des80.b/data/burcinmp/y6_lsbg/y6/test_classifier/random_forest/v3/X_mat_v4_a.npy'))

In [32]:
print(train_scaler.center_)

[ 0.28029627  0.64396763  0.46206188  0.17678165 20.99273777 20.50767231
 20.34498978  3.13135135  3.00662017  2.84970486 24.44901371 23.72632027
 23.19342422 26.24743557 25.52617645 24.99985886 25.54815578 24.82653236
 24.29635048]


In [33]:
train_scaler.center_ - test_scaler.center_

array([-0.04130155, -0.06910324, -0.04707432, -0.02969456, -1.76790619,
       -1.79207516, -1.76721096,  0.12914586,  0.22008717,  0.3587122 ,
       -1.91632175, -2.11260986, -1.30197144, -1.92476177, -2.12028885,
       -1.3770113 , -1.92264843, -2.11745453, -1.31483746])

In [34]:
print(train_scaler.scale_)

[0.23037366 0.45369148 0.31929541 0.18628502 2.44769526 2.46362448
 2.44898224 1.21065611 1.24490525 1.25156087 2.71634579 2.76666355
 1.95962429 2.72252941 2.77207041 1.99851084 2.71927881 2.77047539
 1.96399593]


In [35]:
print(test_scaler.scale_)

[0.22805385 0.61607695 0.4174118  0.31510305 1.16271782 1.17871952
 1.22099066 0.84373379 0.97567743 1.20645282 1.10467625 1.91025496
 3.3534503  1.10384703 1.90816021 3.35874367 1.10740614 1.91773319
 3.35442305]


In [37]:
train_scaler.scale_ - test_scaler.scale_

array([ 0.00231981, -0.16238546, -0.0981164 , -0.12881804,  1.28497744,
        1.28490496,  1.22799158,  0.36692232,  0.26922781,  0.04510805,
        1.61166954,  0.8564086 , -1.39382601,  1.61868238,  0.8639102 ,
       -1.36023283,  1.61187267,  0.8527422 , -1.39042711])

In [40]:
combo = np.concatenate((X_mat_in, 
                        np.load('/data/des80.b/data/burcinmp/y6_lsbg/y6/test_classifier/random_forest/v3/X_mat_v4_a.npy')))
combine_scaler = RobustScaler().fit(combo)

In [46]:
combine_scaler.center_ - train_scaler.center_

array([ 0.04023898,  0.06664276,  0.04554749,  0.02825642,  1.75237846,
        1.77672672,  1.75143719, -0.12631381, -0.21454036, -0.34728527,
        1.90329075,  2.09478951,  1.23480606,  1.91186047,  2.10260773,
        1.30856419,  1.90974426,  2.09972954,  1.24682808])

In [43]:
np.save('/data/des81.a/data/kherron/LSBG/Default_Robust/X_eval_feat.npy', X_mat_in)