In [1]:
import numpy as np
import pandas as pd
from bokeh.plotting import figure, show, output_notebook
output_notebook()

In [156]:
# Load in data, form is ID, mass, size, WHIM sizes
d2048 = np.loadtxt('./2048z3/WHIM_data.txt')
hm2048 = d2048[:,1]
WHIM2048 = d2048[:,3::]

d4096 = np.loadtxt('./4096z05/WHIM_data.txt')
hm4096 = d4096[:,1]
WHIM4096 = d4096[:,3::]

In [3]:
# Define useful tools:
TOOLS="hover,crosshair,pan,wheel_zoom,zoom_in,zoom_out,box_zoom,undo,redo,reset,tap,save,box_select,poly_select,lasso_select,"

In [148]:
# First make a basic scatter plot
def scatter(hm,WHIM_sizes):
    '''
    Takes in a halo masses with their corresponding WHIM_sizes. 
    Make sure the indices are the same between the sets.
    Produces a scatter plot
    '''
    p = figure(tools=TOOLS)
    p.scatter(x=np.transpose(np.log10([hm]*26)).flatten(), y=WHIM_sizes.flatten())
    show(p)

In [149]:
# First plot histogram of mass distribution
def hm_hist(hm,nbins=10):
    '''
    Takes in a set of halo masses and makes the histogram.
    nbins = 10 by default.
    '''

    # Take log of halo masses:
    log_hm = np.log10(hm)

    # Get bin positions
    hist, edges = np.histogram(log_hm, density=False, bins=nbins)

    p = figure(title='',tools="save", background_fill_color="#E8DDCB")
    p.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:], fill_color="#036564", line_color="#033649")
    show(p)

In [193]:
# Now make a box plot function:
def boxplot(hm,WHIM_sizes,nbins=10):
    '''
    Takes in a halo masses with their corresponding WHIM_sizes. 
    Make sure the indices are the same between the sets.
    Will produce a boxplot with quartiles, median, and whiskers extending to 0.05 and 0.95.
    nbins is an optional argument that is by default set to 10.
    '''
    # Take log:
    log_hm = np.log10(hm)
    
    # Get the histogram data for log_hm:
    hist, edges = np.histogram(log_hm, density=False, bins=nbins)
    
    # List of indices for each bin
    ind_list = [np.argwhere((log_hm < edges[i+1]) & (log_hm >= edges[i])) for i in range(nbins)]

    # Get position for bins and lines (weighted bin position)
    pos_bin = [(edges[i+1]+edges[i])/2 for i in range(nbins)]
    pos_line = [log_hm[ind_list[i]].mean() if ind_list[i].size else pos_bin[i] for i in range(nbins)]

    # Add the WHIM_data from the correct spot.
    WHIMdata = [WHIM_sizes[ind_list[i]].flatten() for i in range(nbins)]
    
    # Also remove nans:
    for i in range(len(WHIMdata)):
        WHIMdata[i] = WHIMdata[i][~np.isnan(WHIMdata[i])]

    # Set up quartiles. If there is no data then set to zero.
    q1 = [(np.percentile(WHIM,25) if WHIM.size else 0) for WHIM in WHIMdata]
    q2 = [(np.percentile(WHIM,50) if WHIM.size else 0) for WHIM in WHIMdata]
    q3 = [(np.percentile(WHIM,75) if WHIM.size else 0) for WHIM in WHIMdata]
    upper = [(np.percentile(WHIM,95) if WHIM.size else 0) for WHIM in WHIMdata]
    lower = [(np.percentile(WHIM,5) if WHIM.size else 0) for WHIM in WHIMdata]

    p = figure(tools="save", background_fill_color="#EFE8E2", title="", x_range=(min(edges),max(edges)))

    # stems
    p.segment(pos_line, upper, pos_line, q3, line_color="black")
    p.segment(pos_line, lower, pos_line, q1, line_color="black")

    # boxes
    p.vbar(pos_bin, [edges[i+1]-edges[i] for i in range(nbins)], q2, q3, fill_color="#E08E79", line_color="black")
    p.vbar(pos_bin, [edges[i+1]-edges[i] for i in range(nbins)], q1, q2, fill_color="#3B8686", line_color="black")

    # whiskers (almost-0 height rects simpler than segments)
    p.rect(pos_line, lower, [(edges[i+1]-edges[i])/2 for i in range(nbins)], 0.001, fill_color='black',line_color="black")
    p.rect(pos_line, upper, [(edges[i+1]-edges[i])/2 for i in range(nbins)], 0.001, fill_color="black",line_color="black")

    p.xgrid.grid_line_color = None
    p.ygrid.grid_line_color = "white"
    p.grid.grid_line_width = 2
    p.xaxis.major_label_text_font_size="12pt"

    show(p)

In [152]:
# Test all of my functions with the simple 2048 dataset first:
scatter(hm2048,WHIM2048)

In [153]:
hm_hist(hm2048)

In [194]:
boxplot(hm2048,WHIM2048)

In [157]:
# Since they are all working, try with 4096 dataset:
scatter(hm4096,WHIM4096)

In [158]:
hm_hist(hm4096)

In [195]:
boxplot(hm4096,WHIM4096)