In [2]:
import pandas as pd
import numpy as np
from bokeh.plotting import figure, show, output_file
from bokeh.models import Band

In [51]:
def plot_lines(data_file, output_file, depth_type, start_date, end_date, num_outliers, use_top50=False):
    data = pd.read_csv(data_file)
    data = data.set_index(pd.date_range(start=start_date, end=end_date).date)
    data = data.transpose()
    data = data.set_index(pd.date_range(start=start_date, periods=24, freq='H').time)

    data_outputs = pd.read_csv(output_file).set_index(pd.date_range(start=start_date, end=end_date).date)

    data_outputs = data_outputs.transpose()

    data_final = pd.concat([data,data_outputs])

    data_final = data_final.sort_values(by=depth_type,axis=1)
    
    data_outliers = data_final.iloc[:,:num_outliers]
    data_median   = data_final.iloc[:,data_final.shape[1]-1]
    if use_top50:
        data_top50    = data_final.iloc[:,(data_final.shape[1]//2):data_final.shape[1]-1]
        data_ordered  = pd.concat([data_top50, data_outliers, data_median], axis=1)
    else:
        data_middle   = data_final.iloc[:,num_outliers:data_final.shape[1]-1]
        data_ordered  = pd.concat([data_middle, data_outliers, data_median], axis=1)
    pcrint(data_ordered.shape)
    
    color_top50 = ["#fbb4b9"] * (data_ordered.shape[1] - (num_outliers+1))
    color_median = ["Blue"]
    color_outliers = ["Red"] * num_outliers
    color_list = color_top50 + color_outliers + color_median
    
    alpha_list = [0.5] * data_final.shape[1]
    alpha_list[0:num_outliers] = [1] * num_outliers
    alpha_list[len(alpha_list)-1] = 1

    numlines = len(data_ordered.columns)

    p = figure(width=1000, height=800, x_axis_type="datetime") 
    p.multi_line(xs=[data_ordered.index.values]*numlines,
                 ys=[data_ordered[name].values for name in data_ordered.iloc[0:24]],
                 line_color=color_list,
                 line_width=5)
    return p

In [52]:
output_file('taxis_v3_top50.html')
plot_taxis_v3 = plot_lines('../data/taxis_v3.csv',"../outputs/taxis_v3_out.txt",'tmd','1/1/2014','12/31/2018', 20, use_top50=True)
show(plot_taxis_v3)

(36, 933)


In [53]:
output_file('taxis_v3_full.html')
plot_taxis_v3 = plot_lines('../data/taxis_v3.csv',"../outputs/taxis_v3_out.txt",'tmd','1/1/2014','12/31/2018', 20)
show(plot_taxis_v3)

(36, 1826)


In [54]:
output_file('taxis_v2_top50.html')
plot_taxis_v2 = plot_lines('../data/taxis_v2.csv',"../outputs/taxis_v2_out.txt",'tmd','1/1/2016','12/31/2018', 20, use_top50=True)
show(plot_taxis_v2)

(36, 568)


In [55]:
output_file('taxis_v2_full.html')
plot_taxis_v2 = plot_lines('../data/taxis_v2.csv',"../outputs/taxis_v2_out.txt",'tmd','1/1/2016','12/31/2018', 20)
show(plot_taxis_v2)

(36, 1096)


In [56]:
output_file('taxis_v1_top50.html')
plot_taxis_v1 = plot_lines('../data/taxis_v1.csv',"../outputs/taxis_v1_out.txt",'tmd','1/1/2018','12/31/2018', 20, use_top50=True)
show(plot_taxis_v1)

(36, 203)


In [57]:
output_file('taxis_v1_full.html')
plot_taxis_v1 = plot_lines('../data/taxis_v1.csv',"../outputs/taxis_v1_out.txt",'od','1/1/2018','12/31/2018', 20)
show(plot_taxis_v1)

(36, 365)


In [58]:
output_file('airquality_v1_top50.html')
plot_airquality_v1 = plot_lines('../data/airquality_v1.csv',"../outputs/airquality_v1_out.txt",'fd','03/10/2004','04/04/2005',20, use_top50=True)
show(plot_airquality_v1)

(36, 216)


In [59]:
output_file('airquality_v1_full.html')
plot_airquality_v1 = plot_lines('../data/airquality_v1.csv',"../outputs/airquality_v1_out.txt",'fd','03/10/2004','04/04/2005',20)
show(plot_airquality_v1)

(36, 391)


In [35]:
data_outputs['td'].quantile([0.25,0.5,0.75])

NameError: name 'data_outputs' is not defined

# Functional Boxplot

In [46]:
def get_envelopes(data_top50):
    df_max = data_top50.max(axis=1)
    df_min = data_top50.min(axis=1)
    
    iqr = df_max - df_min
    mid = (df_max + df_min)//2
    out_top = mid + (0.75*iqr)
    out_bot = mid - (0.75*iqr)
    
    return pd.concat({'top':df_max,'bot':df_min,'out_top':out_top,'out_bot':out_bot},axis=1)

def functional_boxplot(data_file, output_file, start_date, end_date, depth_type):
    data = pd.read_csv(data_file)
    data = data.set_index(pd.date_range(start=start_date, end=end_date).date)
    data = data.transpose()
    data = data.set_index(pd.date_range(start=start_date, periods=24, freq='H').time)

    data_outputs = pd.read_csv(output_file).set_index(pd.date_range(start=start_date, end=end_date).date)
    data_outputs = data_outputs.transpose()

    data_final   = pd.concat([data,data_outputs])
    data_final   = data_final.sort_values(by=depth_type,axis=1)
    
    data_outliers  = data_final.iloc[:,:3]
    data_median    = data_final.iloc[:,data_final.shape[1]-1]
    data_top50     = data_final.iloc[:,(data_final.shape[1]//2):data_final.shape[1]-1]
    data_envelopes = get_envelopes(data_top50)
    data_ordered   = pd.concat([data_envelopes,data_outliers,data_median],axis=1)

    color_list = ["#f768a1","black","black","#f768a1","red","red","red","blue"]
    alpha_list = [1.0]*4 + [0.5]*3 + [1.0]
    
    numlines = len(data_ordered.columns)

    p = figure(width=1000, height=800, x_axis_type="datetime") 
    
    upper_band = np.array(data_envelopes['top'].iloc[0:24])
    lower_band = np.array(data_envelopes['bot'].iloc[0:24])
    x          = np.array(data_ordered.index[0:24])
    
    xs = np.concatenate([x, x[::-1]])
    ys = np.concatenate([lower_band, upper_band[::-1]])
    
    p.patch(x=xs, y=ys, fill_color="#f768a1", fill_alpha=0.8, line_alpha=0, legend="IQR")
    
    p.multi_line(xs=[data_ordered.index.values]*numlines,
                 ys=[data_ordered[name].values for name in data_ordered.iloc[0:24]],
                 line_color=color_list,
                 line_width=5,
                 alpha=alpha_list)

    return p

In [47]:
output_file('fbplot_taxisv1.html')
fbplot_taxisv1 = functional_boxplot('../data/taxis_v1.csv',"../outputs/taxis_v1_out.txt",'01/01/2018','12/31/2018','omd')
show(fbplot_taxisv1)

In [None]:
def get_color_from_depth(depth):
    '''
    if depth < 0.270425: return '#fef0d9'
    if depth < 0.353502: return '#fdcc8a'
    '''
    if depth < 0.074495: return '#fef0d9'
    else: return '#fdcc8a' 


data_outputs['color'] = data_outputs['td'].map(lambda x: get_color_from_depth(x))

In [9]:
p = figure(x_axis_type="datetime", plot_width=950, title = "Band plot")
#p.xaxis.major_label_orientation = pi/4
p.xaxis.axis_label = "X"
p.xaxis.axis_label_text_font_style='normal'
p.yaxis.axis_label = "Y"
p.yaxis.axis_label_text_font_style='normal'
p.grid.grid_line_alpha=0.3

# Values to be plotted
upper_band = np.array([30,  32, 34, 35, 35, 33, 32, 31, 30])
lower_band = np.array([25,  27, 30, 31, 32, 30, 30, 29, 28])
x          = np.array([1,   2,  3,  4,  5,  6,  7,  8,  9])

# Bands are drawn as patches. That is, a polygon specified by a series of 2D points
# Because points are specified in clockwise order, the lower band needs to be reverse (Hence the [::-1])
xs = np.concatenate([x, x[::-1]])
ys = np.concatenate([upper_band, lower_band[::-1]])

# Draw the area patch without border
p.patch(x=xs, y=ys, fill_alpha=0.3, line_alpha=0, legend="Band")

# Draw the respective lines
p.line(x, upper_band, line_alpha=0.8)
p.line(x, lower_band, line_alpha=0.8)

show(p)