In [39]:
# Comparative data volumes for ground baased surveys 
import numpy as np
import pandas as pd
import random

from bokeh.models import ColumnDataSource
from bokeh.palettes import Spectral6

#Bokeh libraries and modules
from bokeh.io import  show, reset_output, output_notebook, export_png
from bokeh.plotting import figure
from bokeh.models import Range1d, FactorRange, ColumnDataSource, LabelSet, HoverTool
from bokeh.layouts import gridplot, row, column
from bokeh.transform import factor_cmap
from bokeh.models.annotations import Label
from bokeh.palettes import Spectral5, Viridis256, Colorblind, Magma256, Turbo256, Blues8

#Setting visualizations' display to the in-notebook mode
output_notebook()

#Setting Bokeh's visualtization toolset - a set of functional tools attached to every visual
#Their names are pretty self-explanatory
TOOLS = "pan,wheel_zoom,box_zoom,reset,save"

In [40]:
# Numbers for surveys (per day or 24hr periood)
# Numbers in TB

data = [['2MASS', '2 Micron All-Sky Survey', 1997, 0.0138, '14GB', '', '' ],
     #  ['VLT', '',1998, 0.010, '10GB', '', '' ], 
     #   ['HST', '', 1990, 0.003, '3GB', '', '' ],
        ['SDSS', '', 2000, 0.2, '200GB', '', '' ],    
        ['Vista', '', 2009, 0.315, '315GB', '', '' ], 

        ['Pan-STARRS', '', 2010, 4, '4TB', '4PB', '', 'https://arxiv.org/pdf/1612.05240.pdf' ], 
   #     ['DES', '', 2013, 2.5, '2.5TB', '', ''  ],
        ['ZTF', '', 2017, 6, '6TB', '', '' ],
    #    ['Gaia', '', 2014, 40, '40TB', '60PB','15PB'],
        ['LSST', '', 2023, 20, '20TB', '60PB','15PB'],
     #   ['Roman', '', 2023, 1.4 , '1.4TB', '20PB',''],
 #      ['ELT', '', 2025, 2, '2TB', '', '', '' ],
        ['TMT', '', 2027, 90, '90TB', '', '' ],
        #['SKA', '', 2027, 15 * 10**14, '150TB', '3ZB', '' ],
       ]

data = pd.DataFrame(data, columns = ['Key', 'Name', 'Start', 'Night', 'label', 'Raw', 'Catalog', 'Reference'])
volume = data['Night']
label = data['label']
surveys = data["Key"].astype(str) + "\n(" + data["Start"].astype(str) + ")"
source = ColumnDataSource(data=dict(surveys=surveys, volume=volume, label=label))

In [28]:
p = figure(x_range=surveys, plot_height=250,
           toolbar_location=None, 
           title="Data volume per night")

p.vbar(x='surveys', top='volume', width=0.9, source=source,
       line_color='white', 
       fill_color=factor_cmap('surveys', palette=Blues8, factors=surveys))

p.xgrid.grid_line_color = None
# p.y_range.start = 0
# p.y_range.end = 9
# p.legend.orientation = "horizontal"
# p.legend.location = "top_center"

show(p)

In [53]:
# Log plot 
from bokeh.models import BasicTickFormatter


p_log= figure(x_range=surveys, plot_height=450, plot_width=600,  
              y_axis_type="log", y_range = [0.01, 500], 
              toolbar_location=None, title="Evolution of nightly Data Volume for ground-based surveys")

p_log.vbar(x='surveys', width=0.9, bottom=0.001, top='volume', source= source,
           fill_alpha = .8, line_alpha = .5,
           line_color='black', line_dash='solid',
           fill_color=factor_cmap('surveys', palette=Blues8, factors=surveys)
  )

# p_log.scatter(x='surveys', width=0.9, bottom=0.1, top='volume', source= source,
#            fill_alpha = .8, line_alpha = .5,
#            line_color='black', line_dash='solid',
#            fill_color=factor_cmap('surveys', palette=Blues8, factors=surveys)
#  )

p_log.yaxis.formatter = BasicTickFormatter(use_scientific=False)

labels = LabelSet(x='surveys', y='volume', text='label', 
                  text_font_size="13px", text_color="#555555",
                  x_offset=0, y_offset=5, source=source, 
                  render_mode='canvas', 
                  text_align='center'
                 )
p_log.add_layout(labels)

p_log.yaxis.axis_label = 'Data Volume (TB)'
#p_log.xaxis.axis_label = 'Surveys'
p_log.title.text_font_size = '12pt'
p_log.xaxis.axis_label_text_font_size = '12pt'
p_log.yaxis.axis_label_text_font_size = '12pt'
p_log.xaxis.major_label_text_font_size = "11pt"
p_log.yaxis.major_label_text_font_size = "11pt"
#p_log.axis.axis_label_text_font_style = 'bold'

show(p_log)

In [9]:
# Export 
# pl.background_fill_color = None
# plot.border_fill_color = None
# from bokeh.io import export_png, export_svg
# export_svg(p_log, filename="plot.svg")

In [89]:
# Survey final archive Data Volume - Images
