In [1]:
import datetime
import glob
import hashlib
import re

import numpy as np
import pandas as pd
import sklearn.metrics.pairwise
import sklearn.neighbors
import sklearn.preprocessing
import sqlparse

from typing import Dict

In [2]:
pgfiles = glob.glob('data/extracted/simple/postgresql*.csv')
display(pgfiles)

['data/extracted/simple/postgresql-2021-12-06_160121.csv',
 'data/extracted/simple/postgresql-2021-12-06_160207.csv',
 'data/extracted/simple/postgresql-2021-12-06_160127.csv',
 'data/extracted/simple/postgresql-2021-12-06_160157.csv',
 'data/extracted/simple/postgresql-2021-12-06_160202.csv',
 'data/extracted/simple/postgresql-2021-12-06_160143.csv',
 'data/extracted/simple/postgresql-2021-12-06_160146.csv',
 'data/extracted/simple/postgresql-2021-12-06_160210.csv',
 'data/extracted/simple/postgresql-2021-12-06_160132.csv',
 'data/extracted/simple/postgresql-2021-12-06_160129.csv',
 'data/extracted/simple/postgresql-2021-12-06_160135.csv',
 'data/extracted/simple/postgresql-2021-12-06_160118.csv',
 'data/extracted/simple/postgresql-2021-12-06_160149.csv',
 'data/extracted/simple/postgresql-2021-12-06_160048.csv',
 'data/extracted/simple/postgresql-2021-12-06_160138.csv',
 'data/extracted/simple/postgresql-2021-12-06_160159.csv',
 'data/extracted/simple/postgresql-2021-12-06_160124.csv

In [3]:
# https://www.postgresql.org/docs/13/runtime-config-logging.html#RUNTIME-CONFIG-LOGGING-CSVLOG
PG_LOG_COLUMNS = [
    'log_time',
    'user_name',
    'database_name',
    'process_id',
    'connection_from',
    'session_id',
    'session_line_num',
    'command_tag',
    'session_start_time',
    'virtual_transaction_id',
    'transaction_id',
    'error_severity',
    'sql_state_code',
    'message',
    'detail',
    'hint',
    'internal_query',
    'internal_query_pos',
    'context',
    'query',
    'query_pos',
    'location',
    'application_name',
    'backend_type',
]


df = pd.concat(
    pd.read_csv(pgfile,
                names=PG_LOG_COLUMNS,
                parse_dates=['log_time', 'session_start_time'],
                usecols=['log_time', 'session_start_time', 'command_tag', 'message'],
                header=None,
                index_col=False)
    for pgfile in pgfiles
)
print(df.shape)
print(df.columns)
print(set(df['command_tag']))

(603434, 4)
Index(['log_time', 'command_tag', 'session_start_time', 'message'], dtype='object')
{nan, 'DELETE', 'INSERT', 'SHOW', 'COMMIT', 'SELECT', 'ROLLBACK', 'BEGIN', 'SET', 'UPDATE'}


## Extracting the relevant queries.

In [4]:
commands = ['SELECT', 'INSERT', 'UPDATE', 'DELETE']

def extract_query(message):
    for command in commands:
        idx = message.find(command)
        if idx != -1:
            query = message[idx:]
            return query
    return ''

df['query'] = df['message'].apply(extract_query)
df['query']

0        SELECT S_QUANTITY, S_DATA, S_DIST_01, S_DIST_0...
1        SELECT I_PRICE, I_NAME , I_DATA   FROM item WH...
2        SELECT S_QUANTITY, S_DATA, S_DIST_01, S_DIST_0...
3        SELECT I_PRICE, I_NAME , I_DATA   FROM item WH...
4        SELECT S_QUANTITY, S_DATA, S_DIST_01, S_DIST_0...
                               ...                        
28029    SELECT S_QUANTITY, S_DATA, S_DIST_01, S_DIST_0...
28030    SELECT I_PRICE, I_NAME , I_DATA   FROM item WH...
28031    SELECT S_QUANTITY, S_DATA, S_DIST_01, S_DIST_0...
28032    SELECT I_PRICE, I_NAME , I_DATA   FROM item WH...
28033    SELECT S_QUANTITY, S_DATA, S_DIST_01, S_DIST_0...
Name: query, Length: 603434, dtype: object

## Anonymizer: salt and hash non-date non-digit strings.

In [5]:
ANONYMIZE = False

SALT = 'andycannotsay.com'.encode('utf-8')
DATE_REGEX = re.compile(r'\d{4}-\d{2}-\d{2}.*')
DIGITS_REGEX = re.compile(r'\d+\.?\d*')

def anonymize(sql):
    cleaned_tokens = []

    # TODO(WAN): sqlparse.parse is actually quite slow.
    # Do we really need this?
    parsed = sqlparse.parse(sql)
    if len(parsed) == 0:
        return ''
    
    assert len(parsed) == 1
    tokens = parsed[0].flatten()
    for token in tokens:
        token = str(token)

        single_quoted = token.startswith("'") and token.endswith("'")
        double_quoted = token.startswith('"') and token.endswith('"')
        not_quoted = not single_quoted and not double_quoted

        is_date = DATE_REGEX.search(token) is not None
        is_digits = DIGITS_REGEX.search(token) is not None

        if not_quoted or is_date or is_digits:
            cleaned_tokens.append(token)
            continue

        sha = hashlib.sha256(SALT + token.encode('utf-8')).hexdigest()
        clean_token = "'{}\\{}'".format(len(token) - 2, sha)
        cleaned_tokens.append(clean_token)

    return ''.join(cleaned_tokens)

if ANONYMIZE:
    df['query_anon'] = df['query'].apply(anonymize)
    df['query_anon']

## Pre-processor: extracting query templates.

In [6]:
STRING_REGEX = r'([^\\])\'((\')|(.*?([^\\])\'))'
DOUBLE_QUOTE_STRING_REGEX = r'([^\\])"((")|(.*?([^\\])"))'
INT_REGEX = r'([^a-zA-Z])-?\d+(\.\d+)?'
HASH_REGEX = r'(\'\d+\\.*?\')'

def extract_template(query):
    template = query
    template = re.sub(HASH_REGEX, r"@@@", template)
    template = re.sub(STRING_REGEX, r"\1&&&", template)
    template = re.sub(DOUBLE_QUOTE_STRING_REGEX, r"\1&&&", template)
    template = re.sub(INT_REGEX, r"\1#", template)
    return template

query_column = 'query_anon' if ANONYMIZE else 'query'

df['query_template'] = df[query_column].apply(extract_template)
df['query_template']

0        SELECT S_QUANTITY, S_DATA, S_DIST_#, S_DIST_#,...
1        SELECT I_PRICE, I_NAME , I_DATA   FROM item WH...
2        SELECT S_QUANTITY, S_DATA, S_DIST_#, S_DIST_#,...
3        SELECT I_PRICE, I_NAME , I_DATA   FROM item WH...
4        SELECT S_QUANTITY, S_DATA, S_DIST_#, S_DIST_#,...
                               ...                        
28029    SELECT S_QUANTITY, S_DATA, S_DIST_#, S_DIST_#,...
28030    SELECT I_PRICE, I_NAME , I_DATA   FROM item WH...
28031    SELECT S_QUANTITY, S_DATA, S_DIST_#, S_DIST_#,...
28032    SELECT I_PRICE, I_NAME , I_DATA   FROM item WH...
28033    SELECT S_QUANTITY, S_DATA, S_DIST_#, S_DIST_#,...
Name: query_template, Length: 603434, dtype: object

In [7]:
df['log_time_s'] = df['log_time'].round('S')
df['log_time_s']

0       2021-12-06 16:01:21-05:00
1       2021-12-06 16:01:21-05:00
2       2021-12-06 16:01:21-05:00
3       2021-12-06 16:01:21-05:00
4       2021-12-06 16:01:21-05:00
                   ...           
28029   2021-12-06 16:01:54-05:00
28030   2021-12-06 16:01:54-05:00
28031   2021-12-06 16:01:54-05:00
28032   2021-12-06 16:01:54-05:00
28033   2021-12-06 16:01:54-05:00
Name: log_time_s, Length: 603434, dtype: datetime64[ns, pytz.FixedOffset(-300)]

In [8]:
gb = df.groupby(['query_template', 'log_time_s']).size()
grouped_df = pd.DataFrame(gb, columns=['count'])
grouped_df.drop('', axis=0, level=0, inplace=True)
grouped_df

Unnamed: 0_level_0,Unnamed: 1_level_0,count
query_template,log_time_s,Unnamed: 2_level_1
DELETE FROM new_order WHERE NO_O_ID = # AND NO_D_ID = # AND NO_W_ID = #,2021-12-06 16:01:12-05:00,110
DELETE FROM new_order WHERE NO_O_ID = # AND NO_D_ID = # AND NO_W_ID = #,2021-12-06 16:01:13-05:00,158
DELETE FROM new_order WHERE NO_O_ID = # AND NO_D_ID = # AND NO_W_ID = #,2021-12-06 16:01:14-05:00,75
DELETE FROM new_order WHERE NO_O_ID = # AND NO_D_ID = # AND NO_W_ID = #,2021-12-06 16:01:15-05:00,137
DELETE FROM new_order WHERE NO_O_ID = # AND NO_D_ID = # AND NO_W_ID = #,2021-12-06 16:01:16-05:00,90
...,...,...
UPDATE warehouse SET W_YTD = W_YTD + # WHERE W_ID = #,2021-12-06 16:02:08-05:00,159
UPDATE warehouse SET W_YTD = W_YTD + # WHERE W_ID = #,2021-12-06 16:02:09-05:00,174
UPDATE warehouse SET W_YTD = W_YTD + # WHERE W_ID = #,2021-12-06 16:02:10-05:00,161
UPDATE warehouse SET W_YTD = W_YTD + # WHERE W_ID = #,2021-12-06 16:02:11-05:00,169


## Clusterer

In [9]:
# TODO(WAN): Port online_clustering.py.
# TODO(WAN): I would be somewhat surprised if sklearn doesn't have this built in... We'll see

In [147]:
DF = grouped_df.copy()
assert DF.index.names == ['query_template', 'log_time_s']
assert DF.columns.values == ['count']


class Clusterer:
    def __init__(self, dataframe, n_samples=10000, rho=0.8):
        self._df = dataframe
        self.n_samples = n_samples
        self.rho = rho

        # Cluster every second.
        self.min_time = self._get_timestamps().min()
        self.max_time = self._get_timestamps().max()
        self.n = (self.max_time - self.min_time).days * 24*60*60 \
                 + (self.max_time - self.min_time).seconds \
                 + 1
        self.cluster_gap = 1
        self.n_gaps = self.n // self.cluster_gap + 1
    
        self._dbgname = {v:k for k,v in dict(enumerate(self._get_queries())).items()}
        
        
    def _get_queries(self):
        return sorted(set(self._df.index.get_level_values(0)))
    def _get_timestamps(self):
        return self._df.index.get_level_values(1)

    def _get_first_arrival(self, template):
        return self._df.xs(template, level=0).index.min()
    
    @staticmethod
    def _query_df_range(df, template, start_time, end_time):
        return df.query(
            "`query_template` == @template"
            " and @start_time <= `log_time_s`"
            " and `log_time_s` < @end_time"
        ).droplevel(0)
    
    @staticmethod
    def _query_df(df, template, timestamps):
        df = df.query(
            "`query_template` == @template"
            " and `log_time_s` in @timestamps"
        ).droplevel(0)
        return df.reindex(timestamps, fill_value=0)

    @staticmethod
    def _query_series(series, timestamps):
        series = series.query("`log_time_s` in @timestamps")
        return series.reindex(timestamps, fill_value=0)

    @staticmethod
    def _similarity(s1, s2):
        if s1.shape[0] == 0 or s2.shape[0] == 0:
            return 0
        # Reshape because we only have a single feature, the count.
        arr1 = s1.reshape(-1, 1)
        arr2 = s2.reshape(-1, 1)
        # Compute the cosine similarity.
        return sklearn.metrics.pairwise.cosine_similarity(arr1, arr2)[0][0]

    @staticmethod
    def _sample_timestamps(n, start_time, end_time, n_samples):
        if n > n_samples:
            offsets = np.random.choice(a=n, size=n_samples, replace=False)
        else:
            offsets = np.arange(n)
        timestamps = [start_time]
        for offset in offsets:
            next_time = pd.Timedelta(seconds=offset) + start_time
            if next_time >= end_time:
                break
            timestamps.append(next_time)
        return pd.array(timestamps)

    @staticmethod
    def _build_neighbors(centers, timestamps, n_neighbors):
        clusters = sorted(centers.keys())
        samples = np.array([
            Clusterer._query_series(centers[cluster], timestamps).values
            for cluster in clusters
        ])

        if len(samples) == 0:
            neighbors = None
        else:
            samples = samples.reshape(len(clusters), -1)
            normalized_samples = sklearn.preprocessing.normalize(samples, copy=False)
            neighbors = sklearn.neighbors.NearestNeighbors(n_neighbors=n_neighbors, algorithm='kd_tree', metric='l2')
            neighbors.fit(normalized_samples)
        return neighbors
        
    
    def cluster(self):
        rho = self.rho
        
        centers : Dict[int, pd.DataFrame] = {}
        cluster_totals : Dict[int, int] = {}
        cluster_sizes : Dict[int, int] = {}

        assignments = [
            (self.min_time, {template: None for template in sorted(self._get_queries())})
        ]
        
        current_time = self.min_time
        next_cluster = 0

        for gap in range(self.n_gaps):
            next_time = current_time + datetime.timedelta(seconds=self.cluster_gap)
            # Up to last 10 seconds.
            start_time = max(self.min_time, next_time - datetime.timedelta(seconds=10))
            timestamps = Clusterer._sample_timestamps(self.n, start_time, next_time, self.n_samples)

            last_assignment = assignments[-1][1]
            assignment = assignments[-1][1].copy()

            # Update counts for all the assignments made in the past round.
            for template in last_assignment:
                old_cluster = last_assignment[template]
                if old_cluster is not None:
                    counts = Clusterer._query_df_range(self._df, template, current_time, next_time)
                    centers[old_cluster] = centers[old_cluster].add(counts,
                                                                    fill_value=0)
                    cluster_totals[old_cluster] += counts.sum()

            # If possible, build a kdtree of neighbors.
            neighbors = Clusterer._build_neighbors(centers, timestamps, n_neighbors=1)
            
            # For each template, try to assign a cluster.
            for template in self._get_queries():
                old_cluster = assignment[template]
                
                if old_cluster is not None:
                    # Test if the template still belongs to its old cluster.
                    last_cluster_element = cluster_sizes[old_cluster] == 1
                    still_belongs = Clusterer._similarity(
                        Clusterer._query_df(self._df, template, timestamps).values,
                        Clusterer._query_series(centers[old_cluster], timestamps).values
                        ) > rho
                    # If the template still belongs, continue.
                    if last_cluster_element or still_belongs:
                        reason = ''
                        if last_cluster_element: reason += 'L'
                        if still_belongs: reason += 'B'
#                         print(f'Template stayed in cluster {old_cluster} because ({reason}): {self._dbgname[template]}')
                        continue
                    # Otherwise, eliminate the template from its old cluster.
                    cluster_sizes[old_cluster] -= 1
                    centers[old_cluster] = centers[old_cluster].sub(Clusterer._query_df_range(self._df, template, start_time, next_time),
                                                                    fill_value=0)
                    print(f'Template eliminated from cluster {old_cluster}: {self._dbgname[template]}')

                # Test if template has appeared at this point in time; otherwise, continue.
                if assignment[template] is None:
                    first_arrival = self._get_first_arrival(template)
                    if current_time <= first_arrival:
                        print(f'Template has not yet arrived at {current_time}, skipping: {self._dbgname[template]}')
                        continue
                    print(f'Template arrived at {current_time}: {self._dbgname[template]}')

                new_cluster = None
                # Try to assign to existing cluster.
                if neighbors == None:
                    for cluster in centers.keys():
                        if Clusterer._similarity(
                            self._query_df(self._df, template, timestamps).values,
                            self._query_series(centers[cluster], timestamps).values
                            ) > rho:
                            new_cluster = cluster
                            break
                else:
                    data = Clusterer._query_df(self._df, template, timestamps)['count'].values.reshape(1, -1)
                    data = sklearn.preprocessing.normalize(data)
                    neighbor = neighbors.kneighbors(data, return_distance=False)[0][0]
                    clusters = sorted(centers.keys())
                    if Clusterer._similarity(data, centers[clusters[neighbor]].values) > rho:
                        new_cluster = clusters[neighbor]

                # If this template found a cluster to join, then make the assignment and continue.
                if new_cluster is not None:
                    description = 'joined' if assignment[template] is None else 'reassigned to'
                    print(f'Template {description} cluster {new_cluster}: {self._dbgname[template]}')
                    assignment[template] = new_cluster
                    centers[new_cluster] = centers[new_cluster].add(
                        self._query_df_range(self._df, template, start_time, next_time),
                        fill_value=0)
                    cluster_sizes[new_cluster] += 1
                    continue

                # Otherwise, this template needs a new cluster. Make a new cluster.
                assignment[template] = next_cluster                
                centers[next_cluster] = self._query_df_range(self._df, template, start_time, next_time)
                assert centers[next_cluster].index.name == 'log_time_s'
                assert centers[next_cluster].columns.values == ['count']
                if centers[next_cluster].shape[0] == 0:
                    print(f'WARNING: cluster {cluster} has no items. Does the following query appear within the lookback window: {self._dbgname[template]}')

                cluster_sizes[next_cluster] = 1
                cluster_totals[next_cluster] = 0
                print(f'Created cluster {next_cluster} based on template: {self._dbgname[template]}')
                # Update the cluster counter.
                next_cluster += 1
        
            root = [None] * len(centers)
            # If possible, build an updated kdtree of neighbors.
            neighbors = Clusterer._build_neighbors(centers, timestamps, n_neighbors=2)

            clusters = sorted(centers.keys())
            if len(clusters) > 1:
                # Try to merge clusters.
                for i, cluster in enumerate(clusters):
                    merge_cluster = None
                    data = Clusterer._query_series(centers[cluster], timestamps)['count'].values.reshape(1, -1)
                    data = sklearn.preprocessing.normalize(data)
                    neighbor = neighbors.kneighbors(data, return_distance=False)

                    neighbor_inds = neighbor[0]
                    if clusters[neighbor_inds[0]] == cluster:
                        neighbor = neighbor_inds[1]
                    else:
                        neighbor = neighbor_inds[0]
                    while root[neighbor] is not None:
                        neighbor = root[neighbor]
                    is_similar = self._similarity(
                        self._query_series(centers[cluster], timestamps).values,
                        self._query_series(centers[clusters[neighbor]], timestamps).values) > rho
                    if cluster != clusters[neighbor] and is_similar:
                        merge_cluster = clusters[neighbor]
                    if merge_cluster != None:
                        centers[merge_cluster] = centers[merge_cluster].add(centers[cluster], fill_value=0)
                        cluster_sizes[merge_cluster] += cluster_sizes[cluster]
                        del centers[cluster]
                        del cluster_sizes[cluster]
                        if neighbors != None:
                            root[i] = neighbor
                        for template in self._get_queries():
                            if assignment[template] == cluster:
                                assignment[template] = merge_cluster
                                print(f'Template merged from cluster {cluster} into {merge_cluster}: {self._dbgname[template]}')
            assignments.append((next_time, assignment))
            current_time = next_time
            for cluster, df in centers.items():
                if df.shape[0] == 0:
                    print(f'WARNING: gap {gap} cluster {cluster} has no items.')
        for template, cluster in assignments[-1][1].items():
            print(self._dbgname[template], "->", cluster)
        print('done')
    
Clusterer(DF).cluster()

Template has not yet arrived at 2021-12-06 16:01:11-05:00, skipping: 0
Template has not yet arrived at 2021-12-06 16:01:11-05:00, skipping: 1
Template has not yet arrived at 2021-12-06 16:01:11-05:00, skipping: 2
Template has not yet arrived at 2021-12-06 16:01:11-05:00, skipping: 3
Template has not yet arrived at 2021-12-06 16:01:11-05:00, skipping: 4
Template has not yet arrived at 2021-12-06 16:01:11-05:00, skipping: 5
Template has not yet arrived at 2021-12-06 16:01:11-05:00, skipping: 6
Template has not yet arrived at 2021-12-06 16:01:11-05:00, skipping: 7
Template has not yet arrived at 2021-12-06 16:01:11-05:00, skipping: 8
Template has not yet arrived at 2021-12-06 16:01:11-05:00, skipping: 9
Template has not yet arrived at 2021-12-06 16:01:11-05:00, skipping: 10
Template has not yet arrived at 2021-12-06 16:01:11-05:00, skipping: 11
Template has not yet arrived at 2021-12-06 16:01:11-05:00, skipping: 12
Template has not yet arrived at 2021-12-06 16:01:11-05:00, skipping: 13
Te

Template has not yet arrived at 2021-12-06 16:01:15-05:00, skipping: 7
Template has not yet arrived at 2021-12-06 16:01:15-05:00, skipping: 8
Template has not yet arrived at 2021-12-06 16:01:15-05:00, skipping: 9
Template has not yet arrived at 2021-12-06 16:01:15-05:00, skipping: 10
Template has not yet arrived at 2021-12-06 16:01:15-05:00, skipping: 11
Template has not yet arrived at 2021-12-06 16:01:15-05:00, skipping: 12
Template has not yet arrived at 2021-12-06 16:01:15-05:00, skipping: 13
Template has not yet arrived at 2021-12-06 16:01:15-05:00, skipping: 14
Template has not yet arrived at 2021-12-06 16:01:15-05:00, skipping: 35
Template has not yet arrived at 2021-12-06 16:01:16-05:00, skipping: 7
Template has not yet arrived at 2021-12-06 16:01:16-05:00, skipping: 8
Template has not yet arrived at 2021-12-06 16:01:16-05:00, skipping: 9
Template has not yet arrived at 2021-12-06 16:01:16-05:00, skipping: 10
Template has not yet arrived at 2021-12-06 16:01:16-05:00, skipping: 1

Template has not yet arrived at 2021-12-06 16:01:23-05:00, skipping: 7
Template has not yet arrived at 2021-12-06 16:01:23-05:00, skipping: 8
Template has not yet arrived at 2021-12-06 16:01:23-05:00, skipping: 9
Template has not yet arrived at 2021-12-06 16:01:23-05:00, skipping: 10
Template has not yet arrived at 2021-12-06 16:01:23-05:00, skipping: 11
Template has not yet arrived at 2021-12-06 16:01:23-05:00, skipping: 12
Template has not yet arrived at 2021-12-06 16:01:23-05:00, skipping: 13
Template has not yet arrived at 2021-12-06 16:01:23-05:00, skipping: 14
Template has not yet arrived at 2021-12-06 16:01:23-05:00, skipping: 35
Template has not yet arrived at 2021-12-06 16:01:24-05:00, skipping: 7
Template has not yet arrived at 2021-12-06 16:01:24-05:00, skipping: 8
Template has not yet arrived at 2021-12-06 16:01:24-05:00, skipping: 9
Template has not yet arrived at 2021-12-06 16:01:24-05:00, skipping: 10
Template has not yet arrived at 2021-12-06 16:01:24-05:00, skipping: 1

Template has not yet arrived at 2021-12-06 16:01:33-05:00, skipping: 7
Template has not yet arrived at 2021-12-06 16:01:33-05:00, skipping: 8
Template has not yet arrived at 2021-12-06 16:01:33-05:00, skipping: 9
Template has not yet arrived at 2021-12-06 16:01:33-05:00, skipping: 10
Template has not yet arrived at 2021-12-06 16:01:33-05:00, skipping: 11
Template has not yet arrived at 2021-12-06 16:01:33-05:00, skipping: 12
Template has not yet arrived at 2021-12-06 16:01:33-05:00, skipping: 13
Template has not yet arrived at 2021-12-06 16:01:33-05:00, skipping: 14
Template has not yet arrived at 2021-12-06 16:01:33-05:00, skipping: 35
Template has not yet arrived at 2021-12-06 16:01:34-05:00, skipping: 7
Template has not yet arrived at 2021-12-06 16:01:34-05:00, skipping: 8
Template has not yet arrived at 2021-12-06 16:01:34-05:00, skipping: 9
Template has not yet arrived at 2021-12-06 16:01:34-05:00, skipping: 10
Template has not yet arrived at 2021-12-06 16:01:34-05:00, skipping: 1

Template has not yet arrived at 2021-12-06 16:01:43-05:00, skipping: 7
Template has not yet arrived at 2021-12-06 16:01:43-05:00, skipping: 8
Template has not yet arrived at 2021-12-06 16:01:43-05:00, skipping: 9
Template has not yet arrived at 2021-12-06 16:01:43-05:00, skipping: 10
Template has not yet arrived at 2021-12-06 16:01:43-05:00, skipping: 11
Template has not yet arrived at 2021-12-06 16:01:43-05:00, skipping: 12
Template has not yet arrived at 2021-12-06 16:01:43-05:00, skipping: 13
Template has not yet arrived at 2021-12-06 16:01:43-05:00, skipping: 14
Template has not yet arrived at 2021-12-06 16:01:43-05:00, skipping: 35
Template has not yet arrived at 2021-12-06 16:01:44-05:00, skipping: 7
Template has not yet arrived at 2021-12-06 16:01:44-05:00, skipping: 8
Template has not yet arrived at 2021-12-06 16:01:44-05:00, skipping: 9
Template has not yet arrived at 2021-12-06 16:01:44-05:00, skipping: 10
Template has not yet arrived at 2021-12-06 16:01:44-05:00, skipping: 1

Template has not yet arrived at 2021-12-06 16:01:53-05:00, skipping: 7
Template has not yet arrived at 2021-12-06 16:01:53-05:00, skipping: 8
Template has not yet arrived at 2021-12-06 16:01:53-05:00, skipping: 9
Template has not yet arrived at 2021-12-06 16:01:53-05:00, skipping: 10
Template has not yet arrived at 2021-12-06 16:01:53-05:00, skipping: 11
Template has not yet arrived at 2021-12-06 16:01:53-05:00, skipping: 12
Template has not yet arrived at 2021-12-06 16:01:53-05:00, skipping: 13
Template has not yet arrived at 2021-12-06 16:01:53-05:00, skipping: 14
Template has not yet arrived at 2021-12-06 16:01:53-05:00, skipping: 35
Template has not yet arrived at 2021-12-06 16:01:54-05:00, skipping: 7
Template has not yet arrived at 2021-12-06 16:01:54-05:00, skipping: 8
Template has not yet arrived at 2021-12-06 16:01:54-05:00, skipping: 9
Template has not yet arrived at 2021-12-06 16:01:54-05:00, skipping: 10
Template has not yet arrived at 2021-12-06 16:01:54-05:00, skipping: 1

Template has not yet arrived at 2021-12-06 16:02:03-05:00, skipping: 7
Template has not yet arrived at 2021-12-06 16:02:03-05:00, skipping: 8
Template has not yet arrived at 2021-12-06 16:02:03-05:00, skipping: 9
Template has not yet arrived at 2021-12-06 16:02:03-05:00, skipping: 10
Template has not yet arrived at 2021-12-06 16:02:03-05:00, skipping: 11
Template has not yet arrived at 2021-12-06 16:02:03-05:00, skipping: 12
Template has not yet arrived at 2021-12-06 16:02:03-05:00, skipping: 13
Template has not yet arrived at 2021-12-06 16:02:03-05:00, skipping: 14
Template has not yet arrived at 2021-12-06 16:02:03-05:00, skipping: 35
Template has not yet arrived at 2021-12-06 16:02:04-05:00, skipping: 7
Template has not yet arrived at 2021-12-06 16:02:04-05:00, skipping: 8
Template has not yet arrived at 2021-12-06 16:02:04-05:00, skipping: 9
Template has not yet arrived at 2021-12-06 16:02:04-05:00, skipping: 10
Template has not yet arrived at 2021-12-06 16:02:04-05:00, skipping: 1

Template arrived at 2021-12-06 16:02:13-05:00: 7
Created cluster 13 based on template: 7
Template arrived at 2021-12-06 16:02:13-05:00: 8
Created cluster 14 based on template: 8
Template arrived at 2021-12-06 16:02:13-05:00: 9
Created cluster 15 based on template: 9
Template arrived at 2021-12-06 16:02:13-05:00: 10
Created cluster 16 based on template: 10
Template arrived at 2021-12-06 16:02:13-05:00: 11
Created cluster 17 based on template: 11
Template arrived at 2021-12-06 16:02:13-05:00: 12
Created cluster 18 based on template: 12
Template arrived at 2021-12-06 16:02:13-05:00: 13
Created cluster 19 based on template: 13
Template arrived at 2021-12-06 16:02:13-05:00: 14
Created cluster 20 based on template: 14
Template arrived at 2021-12-06 16:02:13-05:00: 35
Created cluster 21 based on template: 35
0 -> 4
1 -> 4
2 -> 4
3 -> 4
4 -> 4
5 -> 8
6 -> 9
7 -> 13
8 -> 14
9 -> 15
10 -> 16
11 -> 17
12 -> 18
13 -> 19
14 -> 20
15 -> 4
16 -> 4
17 -> 4
18 -> 4
19 -> 4
20 -> 4
21 -> 4
22 -> 4
23 ->

## Forecaster

In [None]:
# TODO(WAN): Port exp_multi_online_continuous.py