In [1]:
import heliolinc3d as hl
import heliolinc3d.heliolinc3d_v2 as hl2

In [2]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import plotly.express as px
import warnings
import logging
warnings.filterwarnings('ignore')

In [3]:
from numba import njit

In [4]:
logger = logging.getLogger()
logger.setLevel( logging.INFO )
logging.debug(' initilizing logger...')

In [5]:
dfObs = pd.read_csv( '../neos_mbas_1month.csv')

In [6]:
# get a 14 day window
mjd_start = dfObs[ 'FieldMJD_TAI'].min()
mjd_stop = mjd_start + 14.0
dfObs = dfObs[ dfObs['FieldMJD_TAI'] <= mjd_stop ]
mjd_ref = 0.5*(mjd_start + mjd_stop)

In [7]:
dfObs.reset_index( inplace=True )
dfObs.rename( 
    columns = {
        'FieldMJD_TAI' : 'FieldMJD',
        'index' : 'obsName' 
    }, 
    inplace=True )

In [8]:
dfHeliolincInput=hl.obs2heliolinc(dfObs)
# add observer states for observation epochs
dfHeliolincInput[['x_obs','y_obs','z_obs']], dfHeliolincInput[['vx_obs','vy_obs','vz_obs']] = hl.getObserverStates(
    dfHeliolincInput['time'],
    origin='SSB',
    observer_location='X05',
    ephemeris_dt='1h',
    frame='ecliptic'
    )
dfHeliolincInput['night'] = hl.whichNight(dfHeliolincInput['time'])

In [9]:
dfHeliolincInput

Unnamed: 0,obsName,time,RA,DEC,obsId,x_obs,y_obs,z_obs,vx_obs,vy_obs,vz_obs,night
0,0,60796.013231,210.812507,-73.626046,0,-0.760799,-0.649628,-0.000151,0.010797,-0.013326,0.000083,60796
1,1,60796.037691,210.781540,-73.625285,1,-0.760535,-0.649954,-0.000148,0.010835,-0.013335,0.000089,60796
2,2,60796.097231,169.145905,-7.835319,2,-0.759887,-0.650749,-0.000143,0.010934,-0.013334,0.000093,60796
3,3,60796.120341,169.149066,-7.826511,3,-0.759634,-0.651057,-0.000141,0.010973,-0.013326,0.000091,60796
4,4,60796.120791,168.356483,-10.641907,4,-0.759629,-0.651063,-0.000141,0.010974,-0.013326,0.000091,60796
...,...,...,...,...,...,...,...,...,...,...,...,...
6202912,11927698,60808.160981,201.954568,-10.274897,6202912,-0.613102,-0.794164,-0.000120,0.013463,-0.010748,0.000074,60808
6202913,11927699,60808.185011,201.958576,-10.271149,6202913,-0.612779,-0.794422,-0.000119,0.013493,-0.010721,0.000064,60808
6202914,11927700,60809.096001,187.341383,7.238919,6202914,-0.600577,-0.803944,-0.000125,0.013545,-0.010570,0.000090,60809
6202915,11927701,60809.100931,180.363544,-5.266361,6202915,-0.600510,-0.803996,-0.000124,0.013553,-0.010568,0.000089,60809


In [10]:
# fig = px.scatter( dfHeliolincInput['RA'], dfHeliolincInput['DEC'] )

In [11]:
# fig.show()

In [12]:
# pairs = hl2.makeTracklets( 
#     dfHeliolincInput['RA'].to_numpy() ,
#     dfHeliolincInput['DEC'].to_numpy() ,
#     dfHeliolincInput['time'].to_numpy() ,
#     max_vel=3.5, # deg/day
#     max_time=2.0/24.0, # days
#     )

# x, v, dt, pairs = hl2.make_heliocentric_arrows( 
#     dfHeliolincInput['RA'].to_numpy() ,
#     dfHeliolincInput['DEC'].to_numpy() ,
#     dfHeliolincInput['time'].to_numpy() ,
#     dfHeliolincInput[['x_obs', 'y_obs', 'z_obs',]].to_numpy(),
#     pairs,
#     1.3,
#     0.0,
#     mjd_ref,
#     max_vel=0.05, # AU / day
#     )

In [13]:
# mjd_ref = 0.5*(dfHeliolincInput['time'].to_numpy().max() + dfHeliolincInput['time'].to_numpy().min())
# arrows = hl2.make_heliocentric_arrows_2(
#     dfHeliolincInput['RA'].to_numpy() ,
#     dfHeliolincInput['DEC'].to_numpy() ,
#     dfHeliolincInput['time'].to_numpy() ,
#     dfHeliolincInput[['x_obs', 'y_obs', 'z_obs',]].to_numpy(),
#     1.3,
#     0.0,
#     mjd_ref,
#     max_vel=0.05,
#     max_trk_time=2.0/24.0,
# )

In [14]:
dfHeliolincInput[['x_obs', 'y_obs', 'z_obs',]].values.T

array([[-7.60799297e-01, -7.60534736e-01, -7.59886668e-01, ...,
        -6.00577085e-01, -6.00510287e-01, -5.99521059e-01],
       [-6.49628377e-01, -6.49954450e-01, -6.50748522e-01, ...,
        -8.03944247e-01, -8.03996352e-01, -8.04762467e-01],
       [-1.50563912e-04, -1.48460181e-04, -1.42987832e-04, ...,
        -1.24549215e-04, -1.24108222e-04, -1.18309197e-04]])

In [24]:
dbscan_fit, L_A, pairs = hl2.heliolinc3d(
    dfHeliolincInput['RA'].to_numpy() ,
    dfHeliolincInput['DEC'].to_numpy() ,
    dfHeliolincInput['time'].to_numpy() ,
    dfHeliolincInput[['x_obs', 'y_obs', 'z_obs',]].to_numpy().T,
    1.2,
    0.0,
    # cr=0.00001
    cr=0.0001
)

INFO:heliolinc3d.heliolinc3d_v2: calculating arrows...
INFO:heliolinc3d.heliolinc3d_v2: tracklet search radius: 0.0047140452079103175 AU
INFO:heliolinc3d.heliolinc3d_v2: KD-tree construction time: 2.2483949661254883 s
INFO:heliolinc3d.heliolinc3d_v2: KD-tree construction time: 3.229146957397461 s
INFO:heliolinc3d.heliolinc3d_v2: found 2 potential pairs
INFO:heliolinc3d.heliolinc3d_v2: found 1432530 valid pairs
INFO:heliolinc3d.heliolinc3d_v2: found 1310894 valid pairs after velocity cut
INFO:heliolinc3d.heliolinc3d_v2: clustering angular momentum and eccentricity...


In [16]:
dbscan_fit

In [17]:
tracklets = pd.DataFrame({
    'observation_1'   : pairs[0],
    'observation_1_id': dfObs.iloc[ pairs[0] ]['ObjID'].values,
    'observation_2'   : pairs[1],
    'observation_2_id': dfObs.iloc[ pairs[1] ]['ObjID'].values,
    'l_x'             : L_A[0],
    'l_y'             : L_A[1],
    'l_z'             : L_A[2],
    'e_x'             : L_A[3],
    'e_y'             : L_A[4],
    'e_z'             : L_A[5],
    'cluster'         : dbscan_fit.labels_
})
tracklets = tracklets[ tracklets['cluster'] != -1 ]
tracklets

Unnamed: 0,observation_1,observation_1_id,observation_2,observation_2_id,l_x,l_y,l_z,e_x,e_y,e_z,cluster
22,8773,G376174,8774,G376174,0.001731,-0.006300,0.012829,0.009974,0.009788,0.003461,109
39,753755,S1009Tuda,753756,S1009Tuda,-0.002058,-0.002743,0.017882,0.006604,0.016642,0.003313,904
45,754362,S1009Tuda,754364,S1009Tuda,-0.001987,-0.002849,0.017928,0.006279,0.016814,0.003368,904
47,754358,S1009Tuda,754359,S1009Tuda,-0.002073,-0.002736,0.017921,0.006424,0.016757,0.003301,904
48,754357,S1009Tuda,754359,S1009Tuda,-0.002047,-0.002771,0.017925,0.006346,0.016789,0.003320,904
...,...,...,...,...,...,...,...,...,...,...,...
1310843,1867543,S1001r0Ga,1867608,S1001r0Ga,0.000838,0.001796,0.000364,-0.001749,0.000903,-0.000429,3422
1310861,5164679,S1004xj9a,5164732,S1004xj9a,0.001226,0.002053,-0.000378,-0.002066,0.001130,-0.000563,2552
1310868,5873549,S1005YW5a,5873595,S1005YW5a,0.000907,0.001824,0.000331,-0.001754,0.000964,-0.000504,3422
1310884,2944883,S100273Da,2944955,S100273Da,0.001439,0.001984,-0.001525,-0.002491,0.001299,-0.000661,2579


In [18]:
tracklet_clusters = tracklets.groupby( 'cluster' )
tracklet_clusters

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x1795c5dc0>

In [19]:
pairs_idx = np.array([[i,i] for i in range(pairs.shape[1])]).flatten()
obs_idx = pairs.flatten()
cluster_obs = pd.DataFrame({
    'obs_id' : obs_idx,
    'trk_idx' : pairs_idx,
    'cluster': dbscan_fit.labels_[ pairs_idx ],
    'obj_id' : dfObs.iloc[obs_idx]['ObjID'].values,
})
cluster_obs = cluster_obs[ cluster_obs['cluster'] != -1 ]
cluster_obs

Unnamed: 0,obs_idx,trk_idx,cluster,obs_id
44,754363,22,109,S1009Tuda
45,754362,22,109,S1009Tuda
78,4065765,39,904,S100364ta
79,4065764,39,904,S100364ta
90,1850785,45,904,S100ghGea
...,...,...,...,...
2621737,1867608,1310868,3422,S1001r0Ga
2621768,297599,1310884,2579,S1000ITMa
2621769,297672,1310884,2579,S1000ITMa
2621770,3466359,1310885,2750,S1002wrqa


In [20]:
for cluster in cluster_obs:
    print( cluster )

obs_idx
trk_idx
cluster
obs_id


In [21]:
pairs.flatten()

array([   7124, 2329249, 2329432, ..., 3942301, 1750973, 1750910])

In [22]:
len( np.unique(dbscan_fit.labels_) )

41307

In [23]:
len(L_E[:,0])

NameError: name 'L_E' is not defined

In [None]:
len(np.unique(dbscan_fit.labels_))

19607

In [None]:
len( dbscan_fit.labels_ ), len( L_E )

(2318452, 2318452)

In [None]:
@njit
def calculate_mean_states_2( labels, points ):
    n = len(np.unique(labels)) - 1
    d = points.shape[1]

    # logger.info( f' {n} clusters with {6} dimensions' )

    counts = np.zeros( n )
    mean_states = np.zeros( (n, d), dtype=points.dtype, )

    for label, point in zip(labels, points):
        mean_states[ label ] += point
        counts[ label ] += 1
    
    # mean_states /= counts
    return (mean_states.T / counts).T, counts
    # return mean_states, counts

In [None]:
def check_pairs( names, pairs ):
    mismatch = np.where( names[pairs[:, 0]] != names[pairs[:, 1]] )
    logger.info( f' {len(mismatch[0])} bad tracklets' )
    # if len(mismatch) == 1:
        # logger.info( f' at {mismatch[0]}' )

In [None]:
check_pairs( dfObs['ObjID'].values, pairs )

INFO:root: 152553 bad tracklets


In [None]:
# def collect_clusters( labels, pairs, points, names ):

#     clusters = { label: {
#         'ids': [],
#         'n_trks': 0,
#         'rating': 0,
#         'mean_state'
#     } for label in np.unique(labels)[1:]  }

#     for label, pair in zip(labels, pairs):
#         if label != -1:
#             ids = names[ pair ]
#             clusters[ label ][ 'ids' ].append( ids )
#             clusters[ label ][ 'n_trks' ] += 1

#             mean_state = np.mean(  )

#     clusters =  { label : {
#         'ids': np.unique( clusters[ label ]['ids'] ),
#         'n_trks': clusters[ label ]['n_trks'],
#         'rating': len(np.unique( clusters[ label ]['ids'] )),
#         'sigma_r': 
#     } for label in clusters.keys() }

#     return clusters


In [None]:
clusters = collect_clusters( dbscan_fit.labels_, pairs, dbscan_fit.components_, dfObs['ObjID'].values )
len( clusters)

19606

In [None]:
clusters

{0: {'ids': array(['S1000c0va'], dtype=object), 'n_trks': 3, 'rating': 1},
 1: {'ids': array(['S10030jAa'], dtype=object), 'n_trks': 4, 'rating': 1},
 2: {'ids': array(['S100eOFLa'], dtype=object), 'n_trks': 3, 'rating': 1},
 3: {'ids': array(['S1005gE5a'], dtype=object), 'n_trks': 3, 'rating': 1},
 4: {'ids': array(['S1001xdQa'], dtype=object), 'n_trks': 3, 'rating': 1},
 5: {'ids': array(['S1001kyMa', 'S100iyhWa'], dtype=object),
  'n_trks': 3,
  'rating': 2},
 6: {'ids': array(['S1007GPJa'], dtype=object), 'n_trks': 3, 'rating': 1},
 7: {'ids': array(['S1000zBma', 'S100uTyza'], dtype=object),
  'n_trks': 3,
  'rating': 2},
 8: {'ids': array(['S1004iDOa'], dtype=object), 'n_trks': 3, 'rating': 1},
 9: {'ids': array(['S1003ToVa', 'S1009vkFa'], dtype=object),
  'n_trks': 3,
  'rating': 2},
 10: {'ids': array(['S1001Jrta'], dtype=object), 'n_trks': 3, 'rating': 1},
 11: {'ids': array(['S100553Za'], dtype=object), 'n_trks': 4, 'rating': 1},
 12: {'ids': array(['S1004ctSa', 'S100hlpla'], 

In [None]:
logger.setLevel( logging.INFO )
mean_states, counts = calculate_mean_states_2( dbscan_fit.labels_, L_E )

In [None]:
np.any(counts == 0.0), np.sum( counts ), np.any(mean_states == 0.0)

(False, 1912814.0, False)

In [None]:
mean_states

array([[-0.00462895, -0.00345086,  0.02370017,  0.02366618,  0.00304667,
         0.00506592],
       [-0.00548261, -0.00126943,  0.03132543, -0.02103724,  0.02372734,
        -0.00272043],
       [-0.00546051, -0.00126474,  0.03119876, -0.02022954,  0.02426948,
        -0.0025568 ],
       ...,
       [-0.0021681 ,  0.00316397,  0.02126336,  0.02147791, -0.00059252,
         0.00227815],
       [ 0.00911051,  0.00364949,  0.03687614,  0.03463953, -0.01432809,
        -0.00713994],
       [-0.00227513,  0.00033303,  0.02894567,  0.00187481,  0.01074184,
        -0.000178  ]])

In [None]:
# unique_labels = np.unique(clusters.labels_)[1:]
# counts = np.zeros( len(unique_labels) )
# for i, label in enumerate( unique_labels ):
#     if label != i:
#         print(i, np.unique(clusters.labels_)[i+1])

# for (label, _) in zip(clusters.labels_, clusters.components_):
# # for label in clusters.labels_:
#     if label != -1:
#         counts[label] += 1


# len(np.unique(clusters.labels_)[1:][counts == 0.0 ])

4724

In [None]:
len( clusters.labels_), len(clusters.components_)

(1912814, 13466)

In [None]:
np.random.rand( 3, 10 ) * np.random.rand( 3, 10 )

array([[0.25722033, 0.28790878, 0.01166061, 0.23893145, 0.63000022,
        0.3300459 , 0.00709297, 0.40323104, 0.34092054, 0.10568957],
       [0.58105918, 0.0339664 , 0.1370016 , 0.01448471, 0.018097  ,
        0.49076873, 0.67529429, 0.20299409, 0.03651175, 0.05045339],
       [0.42210805, 0.1617699 , 0.15694712, 0.0026243 , 0.25416524,
        0.11581138, 0.04689292, 0.24780549, 0.04963919, 0.42037164]])