### Purpose of the Notebook

Cboe produces an interesting dataset at the following url (the final %s's should be substituted with the date of interest, as yyyy-MM-dd):

https://markets.cboe.com/europe/equities/market_share/market/venue/data/?startDate=%s&endDate=%s

For example:

https://markets.cboe.com/europe/equities/market_share/market/venue/data/?startDate=2021-01-22&endDate=2021-01-22




In [None]:
# Libraries imports
%matplotlib qt
#%matplotlib notebook
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from IPython import display
import datetime as dt
import seaborn as sns
import os
import math
import calendar as cal
import pickle
import ipywidgets
from ipywidgets import Layout, HBox, VBox, Label, DatePicker, Select, SelectMultiple, RadioButtons, Dropdown, Text, Button

plt.style.use('seaborn-whitegrid')
#sns.set_style("white")
# Set nice font
mpl.rcParams['font.sans-serif'] = "Lucida Bright"

#plt.ioff()


In [None]:
__STYLE__ = {'description_width': 'initial'}
__BOX_L__ = Layout(width='400px', height='100px', overflow_y='hidden')
__LABEL_L__ = Layout(width='30%',height='100%')
__ELEMENT_L__ = Layout(width='70%',height='100%')
__DATE_L__ = Layout(width='400px', height='30px', overflow_y='hidden')

__DATE_FORMATS__ = {'D': "%Y-%m-%d", 'W': "%Y-%m-%d", 'M': "%Y-%m", 'Q': '%Y-%m', 'Y': "%Y"}

# function 
def load_data(path):
    if os.path.isdir(path):
        d = pd.concat([pd.read_csv("/".join([path, x])) for x in os.listdir(path)])
        d = d.reset_index(drop=True)
    else:
        d = pd.read_csv(path)

    d = d.rename(columns={'daterange' : 'date'})
    d = d.convert_dtypes()
    d.loc[:,"date"] = pd.to_datetime(d.loc[:,"date"])
    return d

# This returns the # of distinct value per column
# With only one value, the columns does not have any purpose 
def count_values(df):
    res = [(c, len(set(df.loc[:,c]))) for c in df.columns]
    return res

def filter_columns(d):
    f = count_values(d)
    return d.loc[:,[k for k, v in f if v > 1]]

def _make_name(s):
    return "|".join(s) if type(s) is tuple else s

def select_data(df, start_date=None, end_date=None, date_field='date'):
    sd = start_date.strftime("%Y%m%d") if start_date is not None else min(df.loc[:,'date'])
    ed = end_date.strftime("%Y%m%d") if end_date is not None else max(df.loc[:,'date'])
    return df.loc[(df.loc[:,'date'] >= sd) & (df.loc[:,'date'] < ed),:].reset_index(drop=True)    

# determines the number of decimals to be shown 
# in the formatting of an ax
def assess_scale(vec):
    max_value = max(vec)
    min_value = min(vec)
    res = 0
    for x in range(-6, 6):
        s = ((max_value - min_value) / len(vec)) / math.pow(10, x)
        if s <= 1:
            break
        res = x
    res = abs(res) if res < 0 else 0
    return "".join(["{:.", "{:d}".format(res), "f}"])

def save_pickle(path, df):
    with open(path, 'wb') as h:
        pickle.dump(df, h, protocol=pickle.HIGHEST_PROTOCOL)

def load_pickle(path):
    res = None
    with open(path, 'rb') as h:
        res = pickle.load(h)
    return res

class DialogBox:    
    def __init__(self, df, categories=None):
        self.df = df
        self.start_date = DatePicker(value=min(df['date']).date())
        self.end_date = DatePicker(value=max(df['date']).date(), disabled=True)     
        self.cat_widget = Select(value=None, options=categories, row=5, layout=__ELEMENT_L__, style=__STYLE__, disabled=False)
        self.opt_label =  Label("", layout=__LABEL_L__)
        self.opt_widget = SelectMultiple(row=10, layout=__ELEMENT_L__, style=__STYLE__, disabled=True)
        self.chart_type = RadioButtons(options = ['stacked', 'side-by-side', ], value = None, disabled=False)
        self.period = Dropdown(options=[("Daily", "D"), ("Weekly", "W"), ("Monthly", "M"), ("Quarterly", "Q"), ("Yearly", "Y")], value="M",)
        self.output_dir = Text(placeholder='enter output directory', disabled=False)
        self.save_button = Button(description='Save', disabled=False, button_style='', tooltip='Save to file')
        self.show_button = Button(description='Show', disabled=False, button_style='', tooltip='Show chart')
        
    def _variable_selected(self, changed):
        if changed['type'] == 'change' and changed['name'] == 'value':
            self.opt_label.value = self.cat_widget.value.upper()
            self.opt_widget.options=[x for x in sorted(set(self.df.loc[:,self.cat_widget.value]))]
            self.opt_widget.disabled=False  
            
    def _date_selected(self, changed):
        if changed['type'] == 'change' and changed['name'] == 'value':        
            self.end_date.disabled = False
            
    def get_context(self):
        return {k: v.value for k, v in self.__dict__.items() 
                if (getattr(v, '__module__', None).split(".")[0] == ipywidgets.__name__.split(".")[0])
               & (hasattr(v, 'value'))
               }
    
    def _show_figure(self, clicked):
        self.graph.set_ctx(self.get_context())
        self.graph.show(self.df)

    def show(self):
        # Start/End time selectors
        hboxcal_start = HBox([Label("START:", layout=__LABEL_L__), self.start_date], layout=__DATE_L__)
        hboxcal_end =  HBox([Label("END:", layout=__LABEL_L__), self.end_date], layout=__DATE_L__)       
        
        # Category and options selectors
        hbox_c = HBox([Label("VARIABLE:", layout=__LABEL_L__), self.cat_widget], layout=__BOX_L__)    
        hbox_o = HBox([self.opt_label, self.opt_widget], layout=__BOX_L__) 
        
        # Selectors for chart type and periodicity
        hbox_ct = HBox([Label("CHART TYPE:", layout=__LABEL_L__), self.chart_type], layout=__BOX_L__)
        hbox_per = HBox([Label("PERIODICITY:", layout=__LABEL_L__), self.period], layout=__BOX_L__)
        hbox_dir = HBox([self.output_dir, self.save_button], layout=__BOX_L__)       
        # Add listeners to widgets
        self.cat_widget.observe(self._variable_selected)
        self.start_date.observe(self._date_selected)
        self.show_button.on_click(self._show_figure)
        
        self.graph = GraphMaker()
    
        return HBox([VBox([hboxcal_start, hboxcal_end, hbox_c, hbox_o]),  
                     VBox([hbox_ct, hbox_per, self.show_button])
                    ]
                   )
    
class GraphMaker:
    
    def __init__(self):
        self.ctx = None
        self.figure, self.axis = plt.subplots(1, 1, figsize=(8, 8))
    
    def set_ctx(self, ctx):
        self.ctx = ctx
    
    def __str__(self):
        return self.ctx
    
    def _get_data(self, data):
        v = self.ctx['cat_widget']
        sel = self.ctx['opt_widget']
        sel_label = _make_name(sel)

        d = select_data(data, start_date = self.ctx['start_date'], end_date = self.ctx['end_date'])

        d.loc[:,'period'] = d.loc[:,'date'].dt.to_period(self.ctx['period'])
    
        perc_label = "%_{:s}".format(sel_label)

        # Check - weight of closing auctions
        s = d.loc[d.loc[:,v].isin(sel),:]
        sn = d.loc[~d.loc[:,v].isin(sel),:]
    
        s_period = 1e-9 * s.groupby(["period",v])[['notional']].sum().rename(columns={'notional' : sel_label})
        s_period = s_period.unstack()
        s_period.columns = [v for k, v in s_period.columns] 

        sn_period = 1e-9 * sn.groupby("period")[['notional']].sum()

        all_df = s_period.join(sn_period)
        all_df = s_period.div(all_df.apply(lambda x:x.sum(), axis=1), axis=0)
        all_df = all_df.fillna(0.0)
        all_df.index = [x.asfreq("D").to_timestamp() for x in all_df.index]
        return all_df
    
    # Formats x according to the object type
    @staticmethod
    def _format(x, f = None):
        t = type(x)
        res = None
        if (t is dt.datetime) | (t is dt.date):
            res = x.strftime(f) if f is not None else str(x)
        elif (t is float) | (t is int):
            res = f.format(x) if f is not None else str(x)
        elif t is pd.Timestamp:
            res = GraphMaker._format(x.date(), f)
        else:
            print("Type {:s} not parsed yet".format(str(t)))
        return res
    
    def _set_yticklabels(self):
        yticks = [100.0 * x for x in self.axis.get_yticks()]
        yformat = "".join([assess_scale(yticks), "%"])
        self.axis.set_yticklabels([GraphMaker._format(float(y), f=yformat) for y in yticks], fontsize=15)
        self.axis.set_ylabel("%", fontsize=20)
        
    def _set_xticklabels(self, labels=[], format=None):
        dates = [GraphMaker._format(x, f=format) for x in labels]
        self.axis.set_xticklabels([dates[i] for i in self.axis.get_xticks()], rotation=90, fontsize=15)
        self.axis.set_xlabel("date", fontsize=20)
    
    def _set_elements(self):
        self.axis.legend(loc=2, fontsize=12, facecolor="white", frameon=True, framealpha=1, edgecolor='black')
        self.axis.set_title("Market Share - in %", fontsize=25)
        self.axis.grid(True)       
        
    def _side_chart(self, data):
        # need to reorganize data to be usable by seaborn
        def make_data(x):
            x = x.reset_index().melt(id_vars="index").sort_values(by=["index", "variable"])\
            .rename(columns={"index":"date", "variable":"venue", "value":"%"}).reset_index(drop=True)            
            return x
        
        data = make_data(data)
        # end of data manipulation
        sns.barplot(x='date', y='%', hue='venue', data=data, ax=self.axis)
        self._set_yticklabels()
        self._set_xticklabels(labels=sorted(set(data.loc[:,"date"])), format=__DATE_FORMATS__.get(self.ctx['period']))
        self._set_elements()

    def _period_on_period(self, data):
        def make_data(x):
            x = x.reset_index()
            x.columns = ['date', '%']
            x.loc[:,'month'] =  x.loc[:,'date'].dt.month
            x.loc[:,'year'] =  x.loc[:,'date'].dt.year
            x.drop('date', axis=1)
            return x

        def get_xlabels(ticks, period):
            v = []
            l = None
            if period == "M":
                v = [cal.month_abbr[x + 1] for x in ticks]
                l = "month"
            elif period == "Q":
                v = ["Q{:.0f}".format(x + 1) for x in ticks]
                l = "quarter"
            return v, l    
        
        data = make_data(data)
        sns.barplot(data=data, y='%', x='month', hue='year', ax=self.axis)    
        self._set_yticklabels()
        
        x, label = get_xlabels(self.axis.get_xticks(), self.ctx['period']) 
        
        if label is not None:
            self.axis.set_xticklabels(x, rotation=90, fontsize=15)
            self.axis.set_xlabel(label, fontsize=20)
        
        self._set_elements()
    
    def _stacked_chart(self, data):
        data.plot(kind='bar', stacked=True, ax=self.axis)
        self._set_yticklabels()
        self._set_xticklabels(labels=sorted(set(data.index)), format=__DATE_FORMATS__.get(self.ctx['period']))
        self._set_elements()
        
    def show(self, data):
        d = self._get_data(data)
        ct = self.ctx['chart_type']
        self.axis.cla()
        if ct == "stacked":
            self._stacked_chart(d)
        elif ct == "side-by-side":
            # if 2+ series selected, columns will be shown annaally side by side
            if len(d.columns) > 1:
                self._side_chart(d)
            # if only one series selected, annual variation will be shown
            else:
                self._period_on_period(d)
    

In [None]:
data = load_pickle("data/cboe_2019_2020.pickle")

In [None]:
v = ["market", "venue", "category", "subcategory", "subvenue"]
b = DialogBox(data, categories=v)
b.show()
