# Requirements

1. Add data sources independently
   - [x] Choose from available dataframes (variables)
     -  [] prompt for read from file?
   - [ ] Select/edit time range from data
   - [ ] Select time column to use
   - [ ] Select properties for hover
   - [ ] Enter series name
   - [ ] Enter series color
   - [ ] Select order (vertical position)

2. Add annotation:
   - [] Select time
     - [] Choose from event?
   - [] Add title
   - [] Add text
   - [] Tie to data set
     - [] which data set should the annotation reference
   - [] set display offset (to avoid overlaps)
     - [] can we do this dynamically (keep track of what text has gone where)
   - [] combine with 
   - [] build into DF

3. Global
   - Save settings
   - Set
     - font size
     - width, height
     - default color
     - theme

In [8]:
from datetime import datetime
from functools import singledispatchmethod
from typing import Any, Dict, Iterable, List, Optional


import attr
from bokeh.models import LayoutDOM
import pandas as pd
from msticpy.vis.timeline import display_timeline
from msticpy.analysis.observationlist import Observations, Observation
from msticpy.common.timespan import TimeSpan


@attr.s(auto_attribs=True)
class DataSet:
    data: pd.DataFrame
    time_column: str
    series_name: str
    source_columns: attr.Factory(list)
    color: Optional[str] = None
    glyph: str = "circle"




class CompositeTimeline:

    _TIME_COLUMNS = ("TimeGenerated", "EventTime", "timestamp")

    def __init__(
        self,
        data: Optional[pd.DataFrame] = None,
        series_name: Optional[str] = None,
        data_sets: Optional[Dict[str, Any]] = None,
        **kwargs,
    ):
        self._data_sets = Observations()
        self._observations = Observations()
        self._timeline_plot: Optional[LayoutDOM] = None

        if data:
            self.add_data(data, series_name, **kwargs)
        if data_sets:
            for ds_series_name, ds_series_attrs in data_sets.items():
                if isinstance(ds_series_attrs, DataSet):
                    ds_series_attrs = attr.asdict(ds_series_attrs)
                self.add_data(
                    data=ds_series_attrs["data"],
                    series_name=ds_series_name,
                    time_column=ds_series_attrs["time_column"],
                    source_columns=ds_series_attrs.get(
                        "time_column",
                        self._determine_time_column(ds_series_attrs["data"])
                    ),
                    color=ds_series_attrs.get("color")
                )

    @property
    def data_set_names(self) -> List[str]:
        return list(self._data_sets)
    
    def __getitem__(self, key: str) -> Observation:
        return self._data_sets.get(key)

    def get_plot_params(self, caption: str) -> DataSet:
        return self._data_sets[caption]["additional_properties"]["plot_params"]

    @singledispatchmethod
    def add_data(
        self,
        data: pd.DataFrame,
        series_name: Optional[str],
        time_column: Optional[str] = None,
        source_columns: Optional[Iterable[str]] = None,
        color: Optional[str] = None,
        glyph: str = "circle",
        group_by: Optional[str] = None,
    ) -> None:
        if not time_column:
            time_column = self._determine_time_column(data)

        if not time_column:
            raise ValueError(
                "No value for time_column could be found in the data. "
                "Please specify the time column in the 'time_column' parameter."
            )
        if not group_by:
            self._add_data_set(
                series_name=series_name,
                data=data,
                time_column=time_column,
                source_columns=source_columns,
                color=color
            )
        else:
            for group, data_group in data.groupby(group_by):
                series_group_name = f"{series_name} ({group})"
                self._add_data_set(
                    series_name=series_group_name,
                    data=data_group,
                    time_column=time_column,
                    source_columns=source_columns,
                    color=color
                )

    @add_data.register
    def _(
        self,
        data_set: DataSet,
    ) -> None:
        if not data_set.time_column:
            data_set.time_column = self._determine_time_column(data_set.data)

        if not data_set.time_column:
            raise ValueError(
                "No value for time_column could be found in the data. "
                "Please specify the time column in the 'time_column' attribute."
            )
        self._datasets[data_set.series_name] = data_set

    def _add_data_set(self, series_name, data, time_column, source_columns, color):
        new_data = Observation(
            caption=series_name,
            data=data,
            data_type="DataFrame",
            time_span=TimeSpan(start=data[time_column].min(), end=data[time_column].max()),
            time_column=time_column
        )
        new_data.additional_properties["plot_params"] = DataSet(
            data=data,
            series_name=series_name,
            source_columns=source_columns,
            time_column=time_column,
            color=color,
        )
        self._datasets[series_name] = new_data

    def add_annotation(
        self,
        caption: str,
        description: str,
        timestamp: datetime,
        **kwargs,
    ):
        obs_kwargs = {
            key: value for key, value in kwargs.items()
            if key in Observation.all_fields()
        }
        self._observations.add_observation(
            Observation(
                caption=caption, description=description, timestamp=timestamp, **obs_kwargs
            )
        )

    def add_observations(self, observations: Iterable[Observation]):
        for observation in observations:
            self.add_observation(observation)

    def add_observation(
        self,
        observation: Observation,
        **kwargs,
    ) -> None:
        self._observations.add_observation(observation, **kwargs)
        observation_time = observation.timestamp or kwargs.get("timestamp")
        if not observation_time:
            print("No time_stamp supplied, observation will not be plotted.")
        observation.timestamp = observation_time

    def display(self):

        # Add normal data series
        timeline_dict = {series_name: attr.asdict(data_set) for series_name, data_set in self._datasets.items()}

        # Add observations
        # TODO

    def _determine_time_column(self, data: pd.DataFrame) -> Optional[str]:
        return next(iter(
            col for col_lower, col in ((col.casefold(), col)
            for col in data.columns)
            if col_lower in self._TIME_COLUMNS
        ), None)

    # def _obs_list_no_expand_to_df(self):
    #     """Return observations as a DataFrame."""
    #     obs_list = [
    #         attr.asdict(obs, filter=lambda attr, _: attr != "data")
    #         for obs in self.observation_list.values()
    #         if not obs.expand_data
    #     ]
    #     return pd.json_normalize(obs_list)

    # def _obs_list_data_to_df(self):
    #     obs_list = [
    #         attr.asdict(obs, filter=lambda attr, _: attr != "data")
    #         for obs in self.observation_list.values()
    #         if obs.expand_data
    #     ]
    #     obs_dfs = [
    #         obs.data[[obs.time_column]].rename(columns={obs.time_column, "time_stamp"})
    #         for obs in self.observation_list.values()
    #         if obs.expand_data
    #     ]


@attr.s(auto_attribs=True):

class TLAnnotation

class TLAnnotationsSet:

    def __init__(self):
        self.annotations = List[TLAnnotation]
    def add_annotation()


In [11]:
dt = {1: 2, 3: 4}
type(iter(dt))

dict_keyiterator

In [7]:
from bokeh.colors import groups
color_groups = groups.__all__
color_groups[0]
dir(groups.black)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_colors']

In [4]:
import ipywidgets as widgets

btn = widgets.Button(description="Help", tooltip="Confirms updates to the settings changes")
btn

Button(description='Help', style=ButtonStyle(), tooltip='Confirms updates to the settings changes')

In [6]:
widgets.Dropdown(description="Help", options=["one", "two"], tooltip="Confirms updates to the settings changes")

Dropdown(description='Help', options=('one', 'two'), value='one')

# UI

In [5]:
import re

import pandas as pd

def get_global_dfs(pattern: str = None):
    
    return [
        name for name in globals()
        if (not pattern or re.match(pattern, name))
        and isinstance(globals()[name], pd.DataFrame)
        and not name.startswith("_")
    ]

var1 = pd.DataFrame()
xxx = pd.DataFrame()

def get_df_var(var_name: str) -> pd.DataFrame:
    if var_name in globals():
        return globals()[var_name]

In [79]:
import ipywidgets as widgets



from msticpy.config.file_browser import FileBrowser


DESC_WIDTH = {"style": {"description_width": "120px"}}

WIDTH_80 = {"layout": widgets.Layout(width="80%")}

def border_layout(width="95%"):
    """Return border widget layout."""
    return {
        "layout": widgets.Layout(
            **{
                "width": width,
                "border": "solid gray 1px",
                "margin": "1pt",
                "padding": "5pt",
            }
        )
    }




btn_get_file_name = widgets.Button(description="Select file...", tooltip="Browse for DataFrame")
FileBrowser()

from msticpy.nbtools.nbwidgets import QueryTime

c_timeline = CompositeTimeline()



sel_ds = widgets.Select(description="Datasets", options=c_timeline.data_set_names)

# Basic data set edit controls
txt_ds_caption = widgets.Text(description="Series name", **DESC_WIDTH)
sel_ds_time_column = widgets.Select(description="Time column", **DESC_WIDTH)
txt_ds_color = widgets.Text(description="Series color", **DESC_WIDTH)
sel_ds_groupby = widgets.Select(description="Group by", **DESC_WIDTH)
msel_ds_tooltips = widgets.SelectMultiple(description="Tooltip cols", **DESC_WIDTH)


btn_ds_del = widgets.Button(description="Delete")
btn_ds_apply = widgets.Button(description="Save")
vb_ds_edit1 = widgets.VBox([txt_ds_caption, sel_ds_time_column, txt_ds_color])
vb_ds_edit2 = widgets.VBox([sel_ds_groupby, msel_ds_tooltips])
hb_ds_edit = widgets.HBox([vb_ds_edit1, vb_ds_edit2, btn_ds_apply], **border_layout("70%"))
vb_ds_list = widgets.VBox([sel_ds, btn_ds_del], **border_layout("25%"))
hb_data_set = widgets.HBox([vb_ds_list, hb_ds_edit])

sel_obs = widgets.Select(description="Observations")
# Observation edit controls
txt_obs_caption = widgets.Text(description="Caption", **DESC_WIDTH, **WIDTH_80)
txt_obs_desc = widgets.Textarea(description="Text", **DESC_WIDTH, **WIDTH_80)
dt_obs_time = widgets.DatePicker(description="Timestamp")
btn_obs_new = widgets.Button(description="New")
lbl_obs_sliders = widgets.Label(value="Annotation positioning")
SLIDER_PARAMS = dict(
    value=0,
    min=-10,
    max=10,
    step=1,
    readout=True,
    continuous_update=False,
    readout_format='d',
    orientation='horizontal',
)
int_obs_hoff = widgets.IntSlider(
    description='H Offset:',
    **SLIDER_PARAMS,
)
int_obs_voff = widgets.IntSlider(
    description='V Offset:',
    **SLIDER_PARAMS,
)
btn_obs_del = widgets.Button(description="Delete")
btn_obs_save = widgets.Button(description="Save")
btn_obs_new = widgets.Button(description="Save new")
btn_obs_clear = widgets.Button(description="Clear")
hb_obs_buttons = widgets.HBox([btn_obs_save, btn_obs_new, btn_obs_clear])
vb_obs_edit1 = widgets.VBox([txt_obs_caption, txt_obs_desc, dt_obs_time, hb_obs_buttons], layout=widgets.Layout(width="60%"))
vb_obs_edit2 = widgets.VBox([lbl_obs_sliders, int_obs_hoff, int_obs_voff])
hb_obs_edit = widgets.HBox([vb_obs_edit1, vb_obs_edit2], **border_layout("65%"))
vb_obs_list = widgets.VBox([sel_obs, btn_obs_del], **border_layout("25%"))
hb_obs = widgets.HBox([vb_obs_list, hb_obs_edit])



# Add data frame
sel_add_var_name = widgets.Select(
    description="Dataframe variables",
    options=get_global_dfs(),
    **DESC_WIDTH,
)
btn_add_add = widgets.Button(description="Add DataFrame", tooltip="Add DataFrame to Datasets.")
btn_add_refresh = widgets.Button(description="Refresh vars", tooltip="Add DataFrame to Datasets.")
hb_add_dataset = widgets.HBox([sel_add_var_name, btn_add_refresh, btn_add_add])

# Filter expression
txt_df_filter = widgets.Textarea(description="Pandas query", layout=widgets.Layout(height="200px", width="50%"))
btn_apply_query = widgets.Button(description="Apply", tooltip="Apply query to data.")
vb_query = widgets.VBox([txt_df_filter, btn_apply_query])

# Time range
qt_select_time_range = QueryTime(description="Filter time range of data")
btn_apply_time_range = widgets.Button(description="Save", tooltip="Apply selected time range to data.")
vb_time_range = widgets.VBox([qt_select_time_range.layout, btn_apply_time_range])


accd_filter = widgets.Accordion(children=[vb_time_range, vb_query])
accd_filter.set_title(0, "Filter time range")
accd_filter.set_title(1, "Filter data")
accd_filter.selected_index = None
vb_data_set_edit = widgets.VBox([hb_data_set, accd_filter])

tab_ds_obs = widgets.Tab(children=[vb_data_set_edit, hb_obs, hb_add_dataset])
tab_ds_obs.set_title(0, "Loaded data sets")
tab_ds_obs.set_title(1, "Annotations")
tab_ds_obs.set_title(2, "Add data sets")

lbl_status = widgets.Label(value="Test status")

widgets.Layout()
html_title = widgets.HTML(value="<h2>Timeline builder</h2>", style={"text-align": "center"})
app_layout = widgets.AppLayout(
    header=html_title,
    center=tab_ds_obs,
    footer=lbl_status,
    pane_heights=[1, "400px", 1]
)

app_layout

AppLayout(children=(HTML(value='<h2>Timeline builder</h2>', layout=Layout(grid_area='header')), Label(value='T…

In [None]:
## Event handlers

def _refresh_df_vars(btn):
    del btn
    sel_add_var_name.options=get_global_dfs()

btn_add_refresh.on_click(_refresh_df_vars)

def _select_dataset(change):
    df_key = change.get("new")
    data = _get_data_for_key(df_key)
    _update_groupby(data)
    _update_timecolumn(data)
    _update_status(data)

sel_ds.observe(_select_dataset, names="value")

def _save_ds_changes(btn):
    """Click handler for save changes to current set."""
    if not txt_ds_caption.value:


def _update_groupby(data):
    sel_ds_groupby.options = [
        "None",
        *(sorted(data.columns))
    ]

def _update_timecolumn(data):
    sel_ds_time_column.options = [
        "None",
        *(sorted(data.select_dtypes(include=["datetime", "datetimetz"]).columns))
    ]

def _update_status(data):
    # TODO apply filter and timespan
    time_col = sel_ds_time_column.value
    status = (
        f"Selected records: {len(data)}  "
        f"Start time: {data[time_col].min().isoformat()}  "
        f"End time: {data[time_col].max().isoformat()}  "
    )

# stubs
def _get_data_for_key(key):
    return mde_df

def _update_dataset(caption, time_column, group_by, color, source_columns):
    return True


In [73]:
mde_df.select_dtypes(include=["datetime", "datetimetz"])


Unnamed: 0,Timestamp,ProcessCreationTime,InitiatingProcessCreationTime,InitiatingProcessParentCreationTime
0,2021-12-15 23:00:03.449464400+00:00,2021-12-15 23:00:03.449464400+00:00,2021-11-11 03:26:31.574305+00:00,2021-11-11 03:26:28.153467700+00:00
1,2021-12-15 23:00:03.449833900+00:00,2021-12-15 23:00:03.449833900+00:00,2021-12-15 23:00:03.038042200+00:00,2021-11-11 03:26:31.574305+00:00
2,2021-12-15 23:11:27.173362800+00:00,2021-12-15 23:11:27.173362800+00:00,2021-11-11 03:26:32.145838600+00:00,2021-11-11 03:26:28.153467700+00:00
3,2021-12-15 23:11:37.869622100+00:00,2021-12-15 23:11:37.869622100+00:00,2021-12-15 23:11:26.473328+00:00,2021-11-11 03:26:32.145838600+00:00
4,2021-12-15 23:11:37.870024200+00:00,2021-12-15 23:11:37.870024200+00:00,2021-12-15 23:11:37.777611600+00:00,2021-12-15 23:11:26.473328+00:00
...,...,...,...,...
195,2021-12-16 03:31:11.600695600+00:00,2021-12-16 03:31:11.600695600+00:00,2021-12-16 03:28:10.765459700+00:00,2021-12-16 03:28:07.042663300+00:00
196,2021-12-16 03:31:12.486054600+00:00,2021-12-16 03:31:12.486054600+00:00,2021-12-16 03:28:07.042663300+00:00,2021-12-16 03:28:06.996438500+00:00
197,2021-12-16 03:32:11.149138600+00:00,2021-12-16 03:32:11.149138600+00:00,2021-12-16 03:28:10.930604400+00:00,2021-12-16 03:28:07.042663300+00:00
198,2021-12-16 03:32:14.034770600+00:00,2021-12-16 03:32:14.034770600+00:00,2021-12-16 03:28:07.231891200+00:00,2021-12-16 03:28:07.042663300+00:00


In [None]:
def get_matching_vars
combo_df_var_name.observe(get_matching_vars, names="value")

In [33]:
from msticpy.vis.timeline import display_timeline

mde_df = pd.read_pickle("e:/src/msticpy/tests/testdata/mde_proc_pub.pkl")

plot = display_timeline(data=mde_df, time_column="Timestamp")

In [42]:
[attr for attr in dir(plot) if not attr.startswith("_")]
fig = plot.children[0]

In [45]:
widgets.IntSlider(
    value=0,
    min=-10,
    max=10,
    step=1,
    description='H Offset:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d'
)
widgets.IntSlider(
    value=0,
    min=-10,
    max=10,
    step=1,
    description='V Offset:',
    disabled=False,
    continuous_update=False,
    orientation='vertical',
    readout=True,
    readout_format='d'
)

IntSlider(value=0, continuous_update=False, description='V Offset:', max=10, min=-10, orientation='vertical')