In [1]:
%pylab inline
import h5py
import fitsio
import astropy.units as u
from astropy.io import fits
from astropy.table import Table
from astropy.visualization import hist
from astropy import coordinates as coords
from scipy import stats
import networkx as nx

import gwb

Populating the interactive namespace from numpy and matplotlib


In [2]:
print(style.available)

['seaborn-whitegrid', 'seaborn-deep', 'ggplot', 'grayscale', 'seaborn-paper', 'seaborn-bright', 'seaborn-dark', 'seaborn-white', 'seaborn-colorblind', 'seaborn-dark-palette', 'seaborn-talk', 'classic', 'seaborn-darkgrid', 'seaborn-muted', 'dark_background', 'seaborn-pastel', 'seaborn-ticks', 'fivethirtyeight', 'bmh', 'seaborn-notebook', 'seaborn-poster']


In [3]:
style.use(['seaborn-colorblind', 'notebook.mplstyle'])

# Load Data

In [4]:
tgas = gwb.TGASData('../data/stacked_tgas.fits')

In [5]:
pairidx_rand = fits.getdata('../output/random/snr8_random200000.fits')
with h5py.File("../output/random/snr8_random200000_vscatter0-lratio.h5") as f:
    lnH1_rand = f['lnH1'].value
    lnH2_rand = f['lnH2'].value
    llr_rand = lnH1_rand - lnH2_rand
# throw out nans
bad = isnan(llr_rand)
pairidx_rand = pairidx_rand[~bad]
lnH1_rand = lnH1_rand[~bad]
lnH2_rand = lnH2_rand[~bad]
llr_rand = llr_rand[~bad]

# pairidx_rand_sn32 = fits.getdata('../output/random/snr32_random100000.fits')
# with h5py.File("../output/random/snr32_random100000_vscatter0-lratio.h5") as f:
#     lnH1_rand_sn32 = f['lnH1'].value
#     lnH2_rand_sn32 = f['lnH2'].value
#     llr_rand_sn32 = lnH1_rand_sn32 - lnH2_rand_sn32
# # throw out nans
# bad = isnan(llr_rand_sn32)
# pairidx_rand_sn32 = pairidx_rand_sn32[~bad]
# lnH1_rand_sn32 = lnH1_rand_sn32[~bad]
# lnH2_rand_sn32 = lnH2_rand_sn32[~bad]
# llr_rand_sn32 = llr_rand_sn32[~bad]

# pairidx = fits.getdata('../output/21081/snr8_n128_dv10_new.fits')
# with h5py.File("../output/21081/snr8_n128_dv10_vscatter0-lratio.h5") as f:
#     lnH1 = f['lnH1'].value
#     lnH2 = f['lnH2'].value
#     llr = lnH1 - lnH2
pairidx = fits.getdata('../output/23560/snr8_r10_dv10.fits')
with h5py.File("../output/23560/snr8_r10_dv10_vscatter0-lratio.h5") as f:
    lnH1 = f['lnH1'].value
    lnH2 = f['lnH2'].value
    llr = lnH1 - lnH2

In [6]:
parallax_snr = tgas.parallax_snr
vtan = tgas.get_vtan().value
c = tgas.get_coord()
d = tgas.get_distance().value

star1, star2 = pairidx['star1'], pairidx['star2']
min_snr = np.min(np.vstack((parallax_snr[star1], parallax_snr[star2])), axis=0)
dvtan = norm(vtan[star1]-vtan[star2], axis=1)
vtanmean = (vtan[star1] + vtan[star2])*0.5
sep = c[star1].separation_3d(c[star2]).value
sep_sky = c[star1].separation(c[star2])

c1 = c[star1]
c2 = c[star2]
ra1, dec1 = c1.ra.value, c1.dec.value
ra2, dec2 = c2.ra.value, c2.dec.value
l1, b1 = c1.transform_to(coords.Galactic).l.value, c1.transform_to(coords.Galactic).b.value
l2, b2 = c2.transform_to(coords.Galactic).l.value, c2.transform_to(coords.Galactic).b.value
d1 = d[star1]
d2 = d[star2]
dmean = (d1+d2)*0.5

  tmp = self._data['parallax'] * (0.5 + 0.5*np.sqrt(1 - 16/snr**2))


In [7]:
pairidx.size

271232

In [8]:
cond_lr_cut = llr>6
print(cond_lr_cut.sum(), sum((sep<1)&cond_lr_cut))
cmpairs = pairidx[cond_lr_cut]

13058 377


In [9]:
sum(llr_rand>6)/llr_rand.size, sum((llr_rand>6) & (pairidx_rand['delta_v']<10))/(pairidx_rand['delta_v']<10).sum()

(3.4999999999999997e-05, 0.0001199760047990402)

In [78]:
tmass = fits.getdata('/Users/semyeong/data/gaia/tgas_source/tgas-matched-2mass.fits')
# tmass = fits.getdata('../data/tgas_tmassj.fits', ext=1)

In [79]:
# gjcolor = tgas._data['phot_g_mean_mag']- tmass['j_m']
gjcolor = tgas._data['phot_g_mean_mag']- tmass['j_mag']
gMag = tgas._data['phot_g_mean_mag'] + 5*(log10(tgas._data['parallax']*1e-3)+1)

  app.launch_new_instance()


# Examine the network of pairs

In [80]:
edgelist = [(int(i),int(j)) for i,j in zip(cmpairs['star1'][:],cmpairs['star2'][:])]
nodes = unique(edgelist).astype(int)

In [111]:
g = nx.Graph()

In [112]:
colra = tgas.ra.value
coldec = tgas.dec.value
colglon = tgas.l
colglat = tgas.b
coldist = tgas.get_distance().value
colgj = gjcolor
colgMag = gMag


  tmp = self._data['parallax'] * (0.5 + 0.5*np.sqrt(1 - 16/snr**2))


In [113]:
nodes_attr = []
for node in nodes:
    if ~(isnan(colgj[node]) or isnan(colgMag[node])):
        attr=dict(ra=colra[node],
                  dec=coldec[node],
                  glon=colglon[node],
                  glat=colglat[node],
                  dist=coldist[node],
                  gj=colgj[node],
                  gMag=colgMag[node])
        nodes_attr.append((int(node),attr))

In [114]:
g.add_nodes_from(nodes_attr)
g.add_edges_from(edgelist)

In [115]:
len(g.nodes())

10606

In [49]:
isnan(colgj[nodes]).sum()

149

In [127]:
dic={n:groupi for groupi, g in enumerate(sorted(nx.connected_components(g), key=len, reverse=True)) for n in g}
nx.set_node_attributes(g, 'group', dic)

In [137]:
subgraphs = array(sorted(nx.connected_component_subgraphs(g, copy=True), key=len, reverse=True))
sizes = array([len(a) for a in subgraphs])
gsmall = nx.union_all(subgraphs[:100])


In [125]:
import json
from networkx.readwrite import json_graph

In [138]:
data = json_graph.node_link_data(gsmall)
# do not use numpy.int64, use python native int

In [139]:
with open('../../mywebpage/vis/gaia-comoving-stars/data.json', 'w') as f:
    f.write(json.dumps(data,sort_keys=True,separators=(',',':')))

In [None]:
connected = array([array(list(c)) for c in nx.connected_components(graph)])
sizes = array([len(c) for c in nx.connected_components(graph)])
print('number of nodes %i' % (len(graph)))
print('total number of connected components %i' % (connected.size))
print(min(sizes),max(sizes))

In [None]:
nn_nodes = array([len(graph.neighbors(i)) for i in graph.nodes()])
print('most connected star ind %i connection size %i' % (graph.nodes()[nn_nodes.argmax()], nn_nodes.max()))
print(tgas[graph.nodes()[nn_nodes.argmax()]]._data)

In [None]:
subgraphs = list(nx.connected_component_subgraphs(graph))

In [None]:
import json
from networkx.readwrite import json_graph

In [None]:
graph = nx.from_edgelist(
    [(int(i),int(j)) for i,j in zip(cmpairs['star1'][:500],cmpairs['star2'][:500])])
data = json_graph.node_link_data(graph)

In [None]:
data.keys()

In [None]:
type(data['nodes'][0]['id'])

In [None]:
with open('../../gwb-web/data.json', 'w') as f:
    f.write(json.dumps(data, indent=4, sort_keys=True))

In [None]:
# sorted list of subgraphs from largest to smallest
Gc = array(sorted(nx.connected_component_subgraphs(graph), key=len, reverse=True))
sizes = array([len(g) for g in Gc])

In [None]:
a = array(Gc[6].nodes())
plot(tgas.l[a], tgas.get_distance()[a], '.')
plot(mwsc['GLON'], mwsc['d'], 'ro')
for cl in mwsc[
    (mwsc['d']<d[a].max()) & (mwsc['d']>d[a].min()) & (mwsc['GLON']>tgas.l[a].min()) & (mwsc['GLON']<tgas.l[a].max())]:
    text(cl['GLON'], cl['d'], cl['Name'].replace('_', ' '))
xlim(tgas.l[a].min()*0.9, tgas.l[a].max()*1.1)
ylim(d[a].min()*0.9, d[a].max()*1.1)

In [5]:
mwsc = Table.read('../data/J_A+A_585_A101/catalog.dat', readme='../data/J_A+A_585_A101/ReadMe',
                 format='ascii.cds')
print('total number of mwsc', len(mwsc))
print('number of mwsc d<600 pc', (mwsc['d']<600).sum())

total number of mwsc 3210
number of mwsc d<600 pc 135


In [6]:
mwsc['MWSC', 'Name', 'GLON', 'GLAT', 'd'].write('../../gwb-web/mwsc.csv', format='ascii.csv')

In [None]:
figure(figsize=(10,5))

pairs = array([list(g.nodes()) for g in Gc[sizes==2]])
x = hstack([tgas.l[pairs], array([None]*pairs.shape[0])[:,None]])
y = hstack([d[pairs], array([None]*pairs.shape[0])[:,None]])
# manipulate GLON
x[:,:-1].sort(axis=1)
x[:,1][x[:,1]-x[:,0]>180] -= 360.
plot(x.ravel(),y.ravel(), alpha=.3, c='k', label='size 2')

pairs = array([list(g.nodes()) for g in Gc[sizes==3]])
x = hstack([tgas.l[pairs], array([None]*pairs.shape[0])[:,None]])
y = hstack([d[pairs], array([None]*pairs.shape[0])[:,None]])
# manipulate GLON
x[:,:-1].sort(axis=1)
for i in range(x.shape[1]-1):
    x[:,i][x[:,i]-x[:,0]>180] -= 360.
plot(x.ravel(), y.ravel(), alpha=.7, c='b', label='size 3')

for s in Gc[sizes>=4]:
    plot(tgas.l[s], d[s], '.', c=cm.spectral(uniform()))
    
for cc, ll in mwsc['d', 'GLON'][mwsc['d']<600]:
    l, = plot(ll, cc, 'ro', ms=8, mfc='None', mec='k', mew=1,)
    
xlim(0,360)
ylim(0,500)
# leg = legend(loc='upper left')

xlabel('Galactic longtidude [deg]')
ylabel('distance [pc]')

In [None]:
mwsc.sort('d')

In [None]:
mwsc.show_in_browser()

In [None]:
where((llr>6) & (l1>90) & (l1<120) & (abs(l1-l2)>180))

In [None]:
pairidx[12749]

In [None]:
print( tgas[704255] )
print( tgas[704255].get_coord().transform_to(coords.Galactic) )
print( tgas[1325999] )
print( tgas[1325999].get_coord().transform_to(coords.Galactic) )


In [None]:
tgas[704255].get_coord().separation_3d(tgas[1325999].get_coord())

In [None]:
tgas[704255].l, d[704255], tgas[1325999].l, d[1325999]

# Color-Magnitude Diagrams

In [None]:
tmass = fits.getdata('/Users/semyeong/data/gaia/tgas_source/tgas-matched-2mass.fits')

In [None]:
gjcolor = tgas._data['phot_g_mean_mag']- tmass['j_mag']
gMag = tgas._data['phot_g_mean_mag'] + 5*(log10(tgas._data['parallax']*1e-3)+1)

In [None]:
# distance matching sample
ind_uniq = unique(hstack([cmpairs['star1'], cmpairs['star2']]))
print(ind_uniq.size)
dpairs = d[ind_uniq]

In [None]:
hist(dpairs)
xlabel('distance [pc]')
ylabel('count')

In [None]:
from sklearn.neighbors import KDTree

In [None]:
cond = (parallax_snr>8) & tmass['matched']
d_snr = d[cond]
ind_snr = arange(len(tgas))[cond]
dtree = KDTree(atleast_2d(d_snr).T)

In [None]:
plot(dpairs, dtree.query_radius(atleast_2d(dpairs).T, 1.5, count_only=True), '.')
yscale('log')
xlabel('distance')
ylabel('N(stars within 1.5pc)')

In [None]:
N_per_star = 10
result = dtree.query_radius(atleast_2d(dpairs).T, 1.5,)

In [None]:
ind_control = []
for row in result:
    if len(row)==0:
        continue
    ind_control.append(ind_snr[choice(row, size=N_per_star)])
ind_control = hstack(ind_control)

In [None]:
d[ind_control]

In [None]:
c,bins,patches=hist(dpairs, normed=True, histtype='step')
xlabel('distance [pc]')
ylabel('count')
hist(d[ind_control], bins=bins, normed=True, histtype='step')

In [None]:
plot(gjcolor[ind_control], gMag[ind_control], 'k,')

In [None]:
c, ye, xe = histogram2d(gjcolor[ind_control], gMag[ind_control], 512,)
x,y = meshgrid((xe[1:]+xe[:-1])*0.5, (ye[1:]+ye[:-1])*0.5)
pcolormesh(y,x,c, norm=mpl.colors.LogNorm(), cmap='Blues', zorder=-1)


In [None]:
def draw_cmd():
    c, ye, xe = histogram2d(gjcolor[ind_control], gMag[ind_control], 512,)
    x,y = meshgrid((xe[1:]+xe[:-1])*0.5, (ye[1:]+ye[:-1])*0.5)
    pcolormesh(y,x,c, norm=mpl.colors.LogNorm(), cmap='Blues', zorder=-1)


In [None]:
(mepairs['sep']<1).sum()