In [20]:
from pathlib import Path
import os
import sys
if str(Path.cwd().parent) not in sys.path:
    sys.path.append(str(Path.cwd().parent))
    
import warnings
from cycler import cycler
import pandas as pd
import matplotlib.pyplot as plt
from settings.paths import  validation_path, rf_path, bnn_path, flex_path, match_path
from utils.metrics import print_metrics_xval, print_metrics_test
from utils.preprocessing import rename_aper, prep_wise, missing_input, mag_redshift_selection, flag_observation


plt.rcParams["font.size"] = 22
blue = (0, 0.48, 0.70)
orange = (230/255,159/255, 0)
yellow = (0.94, 0.89, 0.26)
pink = (0.8, 0.47, 0.65)
CB_color_cycle = ['#377eb8', '#ff7f00', '#4daf4a',
                  '#f781bf', '#a65628', '#984ea3',
                  '#999999', '#e41a1c', '#dede00']
plt.rcParams['axes.prop_cycle'] = cycler('color', CB_color_cycle)

warnings.filterwarnings("ignore")
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [22]:
data = pd.read_table(os.path.join(match_path,"STRIPE82_DR4_DR16Q1a_unWISE2a_GALEXDR672a.csv"), sep=",")
data = mag_redshift_selection(data, rmax=22, zmax=5)
data = prep_wise(data)
data = flag_observation(data)
# data = correction(data)
data = missing_input(data)

# Test set
test = pd.read_csv(os.path.join(validation_path,"test.csv"), index_col="index")

# Train set
train = pd.read_csv(os.path.join(validation_path, "train.csv"), index_col="index")

In [38]:
def fraction_of_non_observation(df, survey="WISE"):
    if survey=="WISE":
        print(len(df[df["objID_x"].isna()])/len(df))
        return 
    
    if survey=="GALEX":
        print(len(df[df["name"].isna()])/len(df))
        return 
    
def fraction_of_non_detection(df, survey="WISE"):
    if survey=="WISE":
        print("W1:", len(df[(df["W1"]==99) & (df["flag_WISE"]==0)])/len(df))
        print("W2:", len(df[(df["W2"]==99) & (df["flag_WISE"]==0)])/len(df))
        return 
    
    if survey=="GALEX":
        print("FUV:", len(df[df["FUVmag"]==99].dropna(subset=["name"]))/len(df.dropna(subset=["name"])))
        print("NUV:", len(df[df["NUVmag"]==99].dropna(subset=["name"]))/len(df.dropna(subset=["name"])))
        return
    
def fraction_of_missing(df, survey="WISE"):
    if survey=="WISE":
        print("W1: ",len(df[df["W1"]==99])/len(df))
        print("W2: ",len(df[df["W2"]==99])/len(df))
        return 
    
    if survey=="GALEX":
        print("FUV: ", len(df[df["FUVmag"]==99])/len(df))
        print("NUV: ", len(df[df["NUVmag"]==99])/len(df))
        return 

In [39]:
fraction_of_non_observation(data, survey="WISE")
# fraction_of_non_observation(train, survey="WISE")
# fraction_of_non_observation(test, survey="WISE")

fraction_of_non_detection(data, survey="WISE")
# fraction_of_non_detection(train, survey="WISE")
# fraction_of_non_detection(test, survey="WISE")

fraction_of_non_observation(data, survey="GALEX")
# fraction_of_non_observation(train, survey="GALEX")
# fraction_of_non_observation(test, survey="GALEX")

fraction_of_non_detection(data, survey="GALEX")
# fraction_of_non_detection(train, survey="GALEX")
# fraction_of_non_detection(test, survey="GALEX")

fraction_of_missing(data, survey="WISE")

fraction_of_missing(data, survey="GALEX")

0.060480830140870565
W1: 0.008747850743567313
W2: 0.08084220687158758
0.6919851588187385
FUV: 0.6410733522671629
NUV: 0.0
W1:  0.06922868088443787
W2:  0.14132303701245816
FUV:  0.8894452656028475
NUV:  0.6919851588187385


In [24]:
fraction_of_non_observation(data, survey="WISE")
fraction_of_non_observation(train, survey="WISE")
fraction_of_non_observation(test, survey="WISE")

fraction_of_non_detection(data, survey="WISE")
fraction_of_non_detection(train, survey="WISE")
fraction_of_non_detection(test, survey="WISE")

fraction_of_non_observation(data, survey="GALEX")
fraction_of_non_observation(train, survey="GALEX")
fraction_of_non_observation(test, survey="GALEX")

fraction_of_non_detection(data, survey="GALEX")
fraction_of_non_detection(train, survey="GALEX")
fraction_of_non_detection(test, survey="GALEX")

0.060480830140870565
0.06069259542291759
0.059845559845559844
W1: 0.06922868088443787
W2: 0.14132303701245816
W1: 0.06962152596227325
W2: 0.14177693761814744
W1: 0.06805019305019305
W2: 0.13996138996138996
0.6919851588187385
0.6917105739452198
0.6928088803088803
FUV: 0.6410733522671629
NUV: 0.0
FUV: 0.636399217221135
NUV: 0.0
FUV: 0.655145326001571
NUV: 0.0


In [63]:
# Metrics from crossvalidation
print("---RF---")
for file in os.listdir(rf_path):
    if file.endswith(".csv") and file.startswith("val"):
        print(file.split("z_")[-1][:-4])
        results = pd.read_csv(os.path.join(rf_path, file))
        print_metrics_xval(results)

---RF---
broad+GALEX+WISE+flags
RMSE 0.4245 0.0101
NMAD 0.1021 0.0028
bias -0.0011 0.006
n15 0.2285 0.0034
n30 0.0705 0.0021
broad+GALEX+WISE
RMSE 0.4245 0.0104
NMAD 0.1022 0.0023
bias -0.0012 0.0063
n15 0.227 0.0041
n30 0.0697 0.0013
broad+WISE+narrow
RMSE 0.5341 0.0048
NMAD 0.1421 0.0033
bias 0.0022 0.0052
n15 0.3563 0.0046
n30 0.1457 0.0049
broad+GALEX+WISE+narrow+flags
RMSE 0.4101 0.0097
NMAD 0.0926 0.0017
bias 0.0009 0.0068
n15 0.2177 0.0047
n30 0.0666 0.0014
broad+WISE+narrow+flags
RMSE 0.5264 0.0047
NMAD 0.1408 0.0024
bias 0.0024 0.005
n15 0.3477 0.0033
n30 0.1414 0.0028
broad+narrow
RMSE 0.5752 0.0032
NMAD 0.1799 0.0028
bias 0.0006 0.003
n15 0.4287 0.0048
n30 0.1865 0.0032
broad+GALEX+WISE+narrow
RMSE 0.41 0.0095
NMAD 0.0926 0.002
bias 0.0009 0.0067
n15 0.2164 0.0049
n30 0.0662 0.0008
broad
RMSE 0.6463 0.0058
NMAD 0.2173 0.0037
bias 0.0007 0.0048
n15 0.4921 0.0054
n30 0.2252 0.004


In [None]:
# Random Forest
rf_all = pd.read_csv(os.path.join(rf_path,"test_z_broad+GALEX+WISE+narrow+flags.csv"))
rf_broad = pd.read_csv(os.path.join(rf_path,"test_z_broad+GALEX+WISE+flags.csv"))

# BMDN
bmdn_all = pd.read_csv(os.path.join(bnn_path,"bnn_BNWG_flag.csv"))
bmdn_broad = pd.read_csv(os.path.join(bnn_path,"bnn_BWG_flag.csv"))

# FlexCoDE
flex_all = pd.read_csv(os.path.join(flex_path,"test_z_broad+GALEX+WISE+narrow+flags.csv"))
flex_broad = pd.read_csv(os.path.join(flex_path,"test_z_broad+GALEX+WISE+flags.csv"))



In [49]:
# Metrics from testing set
print("---RF---")
print("Without narrow bands")
print_metrics_test(test.Z.to_numpy(), rf_broad.z_pred.to_numpy())
print("With narrow bands")
print_metrics_test(test.Z.to_numpy(), rf_all.z_pred.to_numpy())

print("---FlexCoDE---")
print("Without narrow bands")
print_metrics_test(test.Z.to_numpy(), flex_broad.z_flex_peak.to_numpy())
print("With narrow bands")
print_metrics_test(test.Z.to_numpy(), flex_all.z_flex_peak.to_numpy())

print("---BMDN---")
print("Without narrow bands")
print_metrics_test(test.Z.to_numpy(), bmdn_broad.zphot.to_numpy())
print("With narrow bands")
print_metrics_test(test.Z.to_numpy(), bmdn_all.zphot.to_numpy())



---RF---
Without narrow bands
RMSE 0.4227
NMAD 0.1003
bias -0.0024
n15 0.2245
n30 0.0683
With narrow bands
RMSE 0.409
NMAD 0.0889
bias 0.0021
n15 0.2186
n30 0.0649
---FlexCoDE---
Without narrow bands
RMSE 0.475
NMAD 0.0851
bias 0.0431
n15 0.2207
n30 0.0819
With narrow bands
RMSE 0.457
NMAD 0.0394
bias 0.0159
n15 0.2086
n30 0.0779
---BMDN---
Without narrow bands
RMSE 0.4592
NMAD 0.0825
bias 0.0365
n15 0.2072
n30 0.0764
With narrow bands
RMSE 0.4282
NMAD 0.0481
bias 0.0199
n15 0.1879
n30 0.0658
