In [1]:
%%capture
if True:
  !pip install astroquery
  !pip install plotly
  !pip install kaleido
  !pip install nbformat

In [2]:
import torch
import pandas as pd
import numpy as np
import pickle
import plotly.graph_objects as go
import sklearn
from sklearn.cluster import DBSCAN
import os
import sys
from google.colab import drive
drive.mount('/content/drive/')
ROOT = os.path.join("/content", "drive", "MyDrive", "Colab_Data", "src/")
sys.path.append(ROOT + "common_code")

from plotly_style import update_layout
from flux_table import get_flux
from fluxtable_to_tensor import fluxtable_to_tensor
# with open(ROOT + "models/model.pt", "rb") as f:
#   model = torch.load(f)
# state = torch.load(ROOT + "models/model.pt")
TEST = True
BATCHSIZE = 10

Mounted at /content/drive/


In [69]:
def get_flux(tbl, cols):
    dets = tbl.to_numpy()
    # Get correct columns
    colnames = cols.tolist()
    mjdindx = colnames.index("mjd")
    w1indx = colnames.index("w1mpro")
    w2indx = colnames.index("w2mpro")
    w1sindx = colnames.index("w1sigmpro")
    w2sindx = colnames.index("w2sigmpro")

    # Type processing
    dets[dets == 'null'] = 0
    mjds = dets[:,mjdindx].astype(float)
    w1mpro = np.nan_to_num(dets[:, w1indx].astype(float), nan=0.0)
    w1sig = np.nan_to_num(dets[:, w1sindx].astype(float), nan=0.0)
    w2mpro = np.nan_to_num(dets[:, w2indx].astype(float), nan=0.0)
    w2sig = np.nan_to_num(dets[:, w2sindx].astype(float), nan=0.0)

    # Sort by date
    sorter = np.argsort(mjds)

    # print(mjds, sorter)

    mjds = mjds[sorter]
    w1mpro = w1mpro[sorter]
    w1sig = w1sig[sorter]
    w2mpro = w2mpro[sorter]
    w2sig = w2sig[sorter]

    # Analysis

    w1mean = np.nanmean(w1mpro)
    w1median = np.nanmedian(w1mpro)

    w2mean = np.nanmean(w2mpro)
    w2median = np.nanmedian(w2mpro)

    w1var = np.nanvar(w1mpro)
    w2var = np.nanvar(w2mpro)
    w1std = np.sqrt(w1var)
    w2std = np.sqrt(w2var)

    w1mad = np.nanmedian([abs(mag - w1median) for mag in w1mpro]) # Mean Absolute Deviation - Not used
    w2mad = np.nanmedian([abs(mag - w2median) for mag in w2mpro])

    # Normalize with modified z-scoring
    w1norm = []
    w2norm = []
    for mag in w1mpro:
        w1norm.append((mag - w1mean) / (w1std)) # Z-score divided by 5.

    for mag in w2mpro:
        w2norm.append((mag - w2mean) / (w2std))

    w1norm = np.nan_to_num(np.array(w1norm))
    w2norm = np.nan_to_num(np.array(w2norm))


    # Optional Flux format
    to_flux_w1 = lambda m: 309.54 * 10**(-m / 2.5)
    to_flux_w2 = lambda m: 171.787 * 10**(-m / 2.5)
    w1flux = to_flux_w1(w1mpro)
    w2flux = to_flux_w2(w2mpro)

    # Flux normalization arcsin params
    ADJ = 0
    DIV = 0.001

    w1flux_norm = np.arcsinh((to_flux_w1(w1mpro) - ADJ) /DIV)
    w2flux_norm = np.arcsinh((to_flux_w2(w2mpro) - ADJ) /DIV)



    # Days since first timepoint
    day = mjds - mjds[0]

    day_norm = day / np.max(day)

    # Times since last observation

    dt = [day[i+1] - day[i] for i in range(len(day) - 1)]
    dt = [np.median(dt)] + dt


    # print("day", day)
    # print("dt", dt)


    # Normalized times since last observation

    dt_norm = np.array([np.arcsinh(dt_ex / np.median(dt)) for dt_ex in dt]).flatten()




    data_dict = {
        "raw": {
          "w1": w1mpro,
          "w1flux": w1flux,
          "w1sig": w1sig,
          "w2": w2mpro,
          "w2flux": w2flux,
          "w2sig": w2sig,
          "mjd": mjds,
          "day": day,
          "dt": dt
        },
        "norm": {
          "w1": w1norm,
          "w1flux": w1flux_norm,
          "w1std": w1std,
          "w1sig": w1sig,
          "w2": w2norm,
          "w2std": w2std,
          "w2flux": w2flux_norm,
          "w2sig": w2sig,
          "mjd": mjds,
          "day": day_norm,
          "dt": dt_norm
        },
        "analyze": {
            "mean": {
                "w1": w1mean,
                "w2": w2mean
            },
            "median": {
                "w1": w1median,
                "w2": w2median
            }
        }
    }


    return data_dict

In [47]:
object_key = {}
test_row_key = {}
raw = pd.DataFrame()

if TEST:
  with open(ROOT + "datasets/test_buckets.pkl", "rb") as f:
    test_buckets = pickle.load(f)

  for kind in test_buckets:
    for obj in test_buckets[kind]:
      ra = obj["ra"].to_numpy()
      dec = obj["dec"].to_numpy()
      center = (np.mean(ra), np.mean(dec))
      object_key[center] = kind

  for kind in test_buckets:
    for df in test_buckets[kind]:
      start = len(raw) - 1
      end = start + len(df)
      raw = pd.concat((raw, df), ignore_index=True)
      for i in range(start,end):
        test_row_key[i] = kind


  raw.insert(len(raw.columns), "idx", list(range(len(raw))), True) #indexing
else:
  pass

Clustering and collecting objects

In [51]:
clstr_tbl = raw[["ra", "dec"]]
dbscan = DBSCAN(eps=1.25, min_samples=6).fit(clstr_tbl)
labels = dbscan.labels_

objects = {}

for i, label in enumerate(labels):
  if label != -1:
    if label not in objects:
      objects[label] = raw.iloc[[i]]
    else:
      objects[label] = pd.concat((objects[label], raw.iloc[[i]]))

In [53]:
len(raw)

5227

In [54]:
print(sum([len(o) for o in objects.values()]))

5227


In [80]:
len(set(raw["mjd"]))

3994

In [77]:
len(set(objects[1]["mjd"])) - len(objects[1]["mjd"])

-474

In [None]:
print(get_flux(objects[0], objects[0].columns))