# Arctic

> Arctic helper scripts and functions.

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# | default_exp arctic

In [None]:
# | hide
from nbdev.showdoc import *

In [None]:
# | export
import os

import re
import gc
import click
from click.testing import CliRunner
from arcticdb import Arctic, QueryBuilder
from arcticdb.version_store.library import Library
from arcticdb.exceptions import LibraryNotFound
import hydra
from hydra import initialize, initialize_config_module, initialize_config_dir, compose
from omegaconf import OmegaConf
from pathlib import Path
from pprint import pformat
import textwrap
from lobster_tools.config import (
    MainConfig,
    Overrides,
    NASDAQExchange,
    ETFMembers,
    register_configs,
    get_config,
)
from lobster_tools.preprocessing import Data, Lobster, Event, infer_ticker_to_date_range, infer_ticker_to_ticker_path, infer_ticker_dict
import sys
import pandas as pd
import numpy as np
import logging
from logging import Logger
from datetime import date
from typing import Callable, TypedDict, Protocol, cast
from dataclasses import dataclass
import time
from inspect import signature
from functools import wraps

from concurrent.futures import ProcessPoolExecutor, wait
import subprocess

When using the `@hydra.main` decorator, `register_configs` must be called. If simply using a notebook or writing the CLIs with `click`, it is enough to use `get_config`.

In [None]:
# | export
register_configs()
cfg = get_config(overrides=Overrides.full_server)

In [None]:
# | export
CONTEXT_SETTINGS = dict(
    help_option_names=["-h", "--help"],
    token_normalize_func=lambda x: x.lower() if isinstance(x, str) else x,
    show_default=True,
)

In [None]:
# @click.group()
# @click.option('--debug/--no-debug', default=False)
# @click.pass_context
# def cli(ctx, debug):
#     # ensure that ctx.obj exists and is a dict (in case `cli()` is called
#     # by means other than the `if` block below)
#     ctx.ensure_object(dict)

#     ctx.obj['DEBUG'] = debug

# @cli.command()
# @click.pass_context
# def sync(ctx):
#     click.echo(f"Debug is {'on' if ctx.obj['DEBUG'] else 'off'}")

# if __name__ == '__main__':
#     cli(obj={})

In [None]:

# option_dict = {
#     'db_path': click.option("-d", "--db_path", default=cfg.db.db_path, help="Database path"),
#     'library': click.option("-l", "--library", default=cfg.db.db_path, help="Library name"),
# }

# # Custom decorator to apply options based on a list of names
# def apply_options(option_names):
#     def decorator(f):
#         for option_name in reversed(option_names):
#             f = option_dict[option_name](f)
#         return f
#     return decorator

In [None]:
# | export
class Options:
    def __init__(self) -> None:
        self.db_path = click.option("-d", "--db_path", default=cfg.db.db_path, help="Database path")
        self.library = click.option("-l", "--library", default=cfg.db.db_path, help="Library name")

def apply_options(options: list):
    def decorator(f):
        for option in reversed(options):
            f = option(f)
        return f
    return decorator

@click.group()
def arctic():
    pass

O = Options()
@arctic.command()
@apply_options([O.db_path])
def initdb(db_path):
    print(f'Initialized the database {db_path}')
    # click.echo(f'Initialized the database {db_path}')

@arctic.command()
@apply_options([O.db_path, O.library])
def use_both(db_path, library):
    print(f'Initialized the database {db_path} {library}')
    # click.echo(f'Initialized the database {db_path}')

@arctic.command()
def dropdb():
    click.echo('Dropped the database')

In [None]:
# | export
click_db_path = click.option("-d", "--db_path", default=cfg.db.db_path, help="database path")
click_library = click.option("-l", "--library", default=cfg.db.library, help="library name")

@click.group()
def cool():
    pass

@cool.command()
@click_db_path
def initdb(db_path):
    print(f'Initialized the database {db_path}')
    # click.echo(f'Initialized the database {db_path}')

@cool.command()
def dropdb():
    click.echo('Dropped the database')

In [None]:
# | export
@click.group()
@click.option("-d", "--db_path", default=cfg.db.db_path, help="database path")
@click.option("-l", "--library", default=cfg.db.library, help="library name")
@click.pass_context
def nic(ctx, db_path, library):
    ctx.ensure_object(dict)
    ctx.obj['library'] = library

    conn = f"lmdb://{db_path}"
    arctic = Arctic(conn)
    ctx.obj['arctic'] = arctic

@nic.command()
@click.option("-t", "--ticker", default="AMZN", help="ticker")
@click.pass_context
def read(ctx, ticker):
    arctic = ctx.obj["arctic"]
    library = ctx.obj["library"]
    df = arctic[library].read(ticker).data
    print(f'df.head() {df.head()}')

@nic.command()
@click.pass_context
def dropdb(ctx):
    click.echo('Dropped the database')

In [None]:
# | export
def get_arctic_library(db_path, library):
    conn = f"lmdb://{db_path}"
    arctic = Arctic(conn)
    arctic_library = arctic[library]
    return arctic_library

The following code had library passed to all. maybe nicer to use the context thing in the end and do sth like
arctic --library=testa --db_path=sth NEXT_COMMAND. This maybe makes some of the decorators i had less relevant as i won't be specifying libray and db_path all over the place.

In [None]:
# # | export
# # TODO: csv_path vs csv_files_path. think if this is a problem...maybe unify?
# class Options:
#     def __init__(self) -> None:
#         self.db_path = click.option("-d", "--db_path", default=cfg.db.db_path, help="Database path")
#         self.library = click.option("-l", "--library", default=cfg.db.library, help="Library name")
#         self.ticker = click.option("-t", "--ticker", required=True, help="ticker to print")
#         self.start_date = click.option("-s", "--start_date", default=None, help="start date")
#         self.end_date = click.option("-e", "--end_date", default=None, help="end date")
#         self.csv_path = click.option("-c", "--csv_path", default=cfg.data_config.csv_files_path, help="csv files path")
#         self.etf = click.option("--etf", default=None, help="restrict to subset specified by ETF members")
#         self.zip_path = click.option("-z", "--zip_path", default="/nfs/lobster_data/lobster_raw/2016", help="zip files path")
#         self.tickers = click.option("--tickers", default=None, multiple=True, type=str, help="tickers to dump")
#         self.max_workers = click.option("-m", "--max_workers", default=20, help="max workers for parallelisation")
# O = Options()

# def apply_options(options: list):
#     def decorator(f):
#         for option in reversed(options):
#             f = option(f)
#         return f
#     return decorator

# def inherit_docstring_from(source_fn):
#     def decorator(target_fn):
#         target_fn.__doc__ = source_fn.__doc__
#         return target_fn
#     return decorator

# def infer_options(func) -> list[Callable]:
#     """Works together with the `auto_apply` to automatically infer arguments.
    
#     Used together this looks like:
#     @auto_apply(infer_options)
#     """
#     sig = signature(func)
#     param_names = [
#         param.name
#         for param in sig.parameters.values()
#         if param.kind == param.POSITIONAL_OR_KEYWORD
#     ]
#     options_list = [getattr(O, name) for name in param_names]
#     return options_list

# def inherits_from(func):
#     """Inherit docstring and options from `func`. Actually this wasn't the best idea. Keep separate"""
#     options_list = infer_options(func)

#     # still stlightly confused about the order of the decorators, oh well
#     def decorator(target_fn):
#         @inherit_docstring_from(func)
#         @apply_options(options_list)
#         @wraps(target_fn)
#         def wrapper(*args, **kwargs):
#             return target_fn(*args, **kwargs)
#         return wrapper
#     return decorator

# # def simple_inherits_from(func):
# #     """Simple without using functools.wraps"""
# #     options_list = infer_options(func)
# #     def decorator(target_fn):
# #         decorated = apply_options(options_list)(target_fn)
# #         decorated.__doc__ = func.__doc__
# #         return decorated
# #     return decorator

# @click.group(context_settings=CONTEXT_SETTINGS)
# def arctic():
#     pass

# @arctic.command()
# @apply_options([O.db_path])
# def list_libraries(db_path) -> None:
#     """List arcticdb libraries"""
#     arctic = Arctic(f"lmdb://{db_path}")
#     print(arctic.list_libraries())

# @arctic.command()
# @apply_options([O.db_path, O.library])
# def list_symbols(db_path, library) -> None:
#     """List symbols in the arcticdb library."""
#     arctic = Arctic(f"lmdb://{db_path}")
#     print(arctic[library].list_symbols())

# @arctic.command()
# @apply_options([O.db_path, O.library])
# def create_library(db_path, library) -> None:
#     """Create a blank new arcticdb library."""
#     arctic = Arctic(f"lmdb://{db_path}")
#     arctic.create_library(library) 
#     print(arctic[library])

# @arctic.command()
# @apply_options([O.db_path, O.library])
# @click.confirmation_option(prompt='Are you sure you want to delete the entire library?')
# def delete_library(db_path, library) -> None:
#     """Delete entire arcticdb library"""
#     arctic = Arctic(f"lmdb://{db_path}")
#     arctic.delete_library(library) 

# @arctic.command()
# @apply_options([O.db_path, O.library, O.ticker, O.start_date, O.end_date])
# def read(db_path, library, ticker, start_date, end_date,
# ):
#     """Read ticker and print head and tail."""
#     arctic = Arctic(f"lmdb://{db_path}")

#     if start_date and end_date:
#         start_datetime = pd.Timestamp(f"{start_date}T{NASDAQExchange.exchange_open}")
#         end_datetime = pd.Timestamp(f"{end_date}T{NASDAQExchange.exchange_close}")
#         date_range = (start_datetime, end_datetime)
#         df = arctic[library].read(ticker, date_range=date_range).data
#     else:
#         print("not using start or end dates")
#         df = arctic[library].read(ticker).data
    
#     print(f"Printing df.head() and df.tail() for ticker {ticker}")
#     print(df.head())
#     print(df.tail())

# def _write(
#     db_path,
#     library,
#     csv_path,
#     ticker,
#     start_date,
#     end_date,
# ):
#     """Preprocess and write ticker to database."""
#     arctic = Arctic(f"lmdb://{db_path}")

#     date_range = (start_date, end_date)
#     data = Data(
#         directory_path=csv_path,
#         ticker=ticker,
#         date_range=date_range,
#         aggregate_duplicates=False,
#     )
#     lobster = Lobster(data=data)
#     df = pd.concat([lobster.messages, lobster.book], axis=1)
#     print(f"head of ticker {ticker}")
#     print(df.head())

#     arctic[library].write(symbol=ticker, data=df)

# @arctic.command()
# @apply_options([O.db_path, O.library, O.csv_path, O.ticker, O.start_date, O.end_date])
# def write(**kwargs):
#     _write(**kwargs)


# # if want to also access _say from other functions then need to do this.
# def _say(
#     db_path,
#     library,
# ):
#     """Print some really important information"""
#     print(db_path, library)


# # @arctic.command()
# # @apply_options(infer_options(_say))
# # @inherit_docstring_from(_say)
# # def say(**kwargs):
# #     _say(**kwargs)

# @arctic.command()
# @inherits_from(_say)
# def say(**kwargs):
#     _say(**kwargs)

# @arctic.command()
# @apply_options([O.db_path, O.library, O.csv_path, O.start_date, O.end_date])
# def generate_jobs(db_path, library, csv_path, start_date, end_date):
#     ticker_date_dict = infer_ticker_to_date_range(csv_path)
#     with open('arctic_commands.txt', 'w') as f:
#         for ticker, (inferred_start_date, inferred_end_date) in ticker_date_dict.items():
#             # if date is None use the inferred date, otherwise use the CLI argument
#             start_date = start_date or inferred_start_date
#             end_date = end_date or inferred_end_date
#             f.write(f"arctic write --csv_path={csv_path} --db_path={db_path} --library={library} --ticker={ticker} --start_date={start_date} --end_date={end_date} \n")

# def sleepy(csv_path, folder_info):
#     time.sleep(5)
#     print(csv_path, folder_info.full)

# def extract_7z(input_path, output_path):
#     try:
#         subprocess.run(["7z", "x", input_path, f"-o{output_path}"], check=True)
#     except subprocess.CalledProcessError as e:
#         print(f"An error occurred: {str(e)}")

# @arctic.command()
# @apply_options([O.zip_path, O.csv_path, O.etf, O.max_workers])
# def zip(zip_path, csv_path, etf, max_workers):
#     folder_infos = infer_ticker_dict(zip_path)

#     # filter first
#     if etf:
#         def in_etf(folder_info):
#             return folder_info.ticker in ETFMembers().mapping[etf] + [etf]
#         folder_infos = list(filter(in_etf, folder_infos))

#     # commands = [f"mkdir -p {csv_path}/{folder_info.ticker_till_end}\n"
#     #             for folder_info in folder_infos]

#     # with open("zip_commands.txt", "w") as f:
#     #     [f.write(command) for command in commands]
#     with ProcessPoolExecutor(max_workers=max_workers) as executor:
#         # outputs_dirs = [folder_info.ticker_till_end for folder_info in folder_infos]
#         futures = [
#             executor.submit(os.mkdir, path=f"{csv_path}/{folder_info.ticker_till_end}")
#             for folder_info in folder_infos
#         ]
#         wait(futures)
#         futures = [
#             executor.submit(extract_7z, input_path=folder_info.full, output_path=f"{csv_path}/{folder_info.ticker_till_end}")
#             for folder_info in folder_infos
#         ]


#         # for folder_info in folder_infos:
#         #     # print(folder_info.ticker)
#         #     f.write(f"examle: mkdir {csv_path}/{folder_info.ticker_till_end}\n")


#     # ticker_date_dict = infer_ticker_to_ticker_path(zip_path)
#     # print(ticker_date_dict)
#     # if etf:
#     #     print(ETFMembers().mapping[etf])
#     #     ticker_date_dict = {
#     #         ticker: ticker_path
#     #         for ticker, ticker_path in ticker_date_dict.items()
#     #         if ticker in ETFMembers().mapping[etf] + [etf]
#     #     }
#     # print(ticker_date_dict)
#     # ticker_dict = infer_ticker_dict(zip_path)
#     # with open("zip_commands.txt", "w") as f:
#     #     for ticker, dict_ in ticker_dict.items():
#     #         full = dict_["full"]
#     #         ticker_till_end = dict_["ticker_till_end"]
#     #         f.write(f"mkdir {csv_path}/{ticker_till_end}\n")
#     #         f.write(f"/nfs/home/nicolasp/usr/bin/7z x {full} -o{ticker_till_end}\n")


# @arctic.command()
# @apply_options([O.db_path, O.library, O.csv_path, O.tickers, O.max_workers])
# def dump(
#     db_path,
#     library,
#     csv_path,
#     tickers,
#     max_workers,
# ):
#     """Dump all csv to arctic_db inferring start and end date from folder."""
#     folder_infos = infer_ticker_dict(csv_path)
#     print("inferred from folder")
#     print(folder_infos)

#     if tickers:
#         folder_infos = [folder_info for folder_info in folder_infos if folder_info.ticker in tickers]

#     print("filtered folder_info after filtering for tickers.")
#     print(folder_infos)

#     with ProcessPoolExecutor(max_workers=max_workers) as executor:
#         # small job with only a few dates
#         # futures = [
#         #     executor.submit(write_, csv_path=csv_path, db_path=db_path, library=library, ticker=folder_info.ticker, start_date="2016-01-01", end_date="2016-01-04")
#         #     for folder_info in folder_infos
#         # ]
#         # full job with whole year
#         futures = [
#             executor.submit(_write, csv_path=csv_path, db_path=db_path, library=library, ticker=folder_info.ticker, start_date=folder_info.start_date, end_date=folder_info.end_date)
#             for folder_info in folder_infos
#         ]
#     print('done')


# @dataclass
# class ArcticDBInfo:
#     ticker: str
#     dates_ndarray: np.ndarray
#     dates_series: pd.Series

#     def __post_init__(self):
#         self.dates_list: list[str] = list(self.dates_ndarray)
#         self.start_date = min(self.dates_ndarray)
#         self.end_date = max(self.dates_ndarray)

# def _info(db_path, library) -> list[ArcticDBInfo]:
#     """Return information about ticker info in database."""
#     arctic = Arctic(f"lmdb://{db_path}")

#     arcticdb_infos: list[ArcticDBInfo] = []
#     for ticker in arctic[library].list_symbols():
#         q = QueryBuilder()
#         # there is one auction each morning
#         q = q[q.event == Event.CROSS_TRADE.value]
#         df = arctic[library].read(symbol=ticker, query_builder=q).data

#         dates_series: pd.Series = df.index.date
#         dates_ndarray: np.ndarray = df.index.to_series().dt.strftime("%Y-%m-%d").values
#         arcticdb_infos.append(
#             ArcticDBInfo(ticker=ticker, dates_ndarray=dates_ndarray, dates_series=dates_series)
#         )
#     return arcticdb_infos


# @arctic.group()
# @inherits_from(_info)
# def info(**kwargs):
#     pass

# @info.command()
# @apply_options(infer_options(_info))
# def tickers(db_path, library):
#     """Print tickers in db."""
#     arctic = Arctic(f"lmdb://{db_path}")
#     print(arctic[library].list_symbols())

# @info.command()
# @apply_options([O.db_path, O.library])
# def versions(db_path, library):
#     arctic = Arctic(f"lmdb://{db_path}")
#     print(arctic[library].list_versions())

# @info.command()
# @apply_options(infer_options(_info))
# def dates(**kwargs):
#     """Print ticker information"""
#     arcticdb_infos = _info(**kwargs)
#     print({x.ticker: (x.start_date, x.end_date) for x in arcticdb_infos})


In [None]:
# # | export
# # TODO: csv_path vs csv_files_path. think if this is a problem...maybe unify?
# class Options:
#     def __init__(self) -> None:
#         self.db_path = click.option("-d", "--db_path", default=cfg.db.db_path, help="Database path")
#         self.library = click.option("-l", "--library", default=cfg.db.library, help="Library name")
#         self.ticker = click.option("-t", "--ticker", required=True, help="ticker to print")
#         self.start_date = click.option("-s", "--start_date", default=None, help="start date")
#         self.end_date = click.option("-e", "--end_date", default=None, help="end date")
#         self.csv_path = click.option("-c", "--csv_path", default=cfg.data_config.csv_files_path, help="csv files path")
#         self.etf = click.option("--etf", default=None, help="restrict to subset specified by ETF members")
#         self.zip_path = click.option("-z", "--zip_path", default="/nfs/lobster_data/lobster_raw/2016", help="zip files path")
#         self.tickers = click.option("--tickers", default=None, multiple=True, type=str, help="tickers to dump")
#         self.max_workers = click.option("-m", "--max_workers", default=20, help="max workers for parallelisation")
# O = Options()

# def apply_options(options: list):
#     def decorator(f):
#         for option in reversed(options):
#             f = option(f)
#         return f
#     return decorator

# def inherit_docstring_from(source_fn):
#     def decorator(target_fn):
#         target_fn.__doc__ = source_fn.__doc__
#         return target_fn
#     return decorator

# def infer_options(func) -> list[Callable]:
#     """Works together with the `auto_apply` to automatically infer arguments.
    
#     Used together this looks like:
#     @auto_apply(infer_options)
#     """
#     sig = signature(func)
#     param_names = [
#         param.name
#         for param in sig.parameters.values()
#         if param.kind == param.POSITIONAL_OR_KEYWORD
#     ]
#     options_list = [getattr(O, name) for name in param_names]
#     return options_list

# def inherits_from(func):
#     """Inherit docstring and options from `func`. Actually this wasn't the best idea. Keep separate"""
#     options_list = infer_options(func)

#     # still stlightly confused about the order of the decorators, oh well
#     def decorator(target_fn):
#         @inherit_docstring_from(func)
#         @apply_options(options_list)
#         @wraps(target_fn)
#         def wrapper(*args, **kwargs):
#             return target_fn(*args, **kwargs)
#         return wrapper
#     return decorator

# # def simple_inherits_from(func):
# #     """Simple without using functools.wraps"""
# #     options_list = infer_options(func)
# #     def decorator(target_fn):
# #         decorated = apply_options(options_list)(target_fn)
# #         decorated.__doc__ = func.__doc__
# #         return decorated
# #     return decorator

# @click.group(context_settings=CONTEXT_SETTINGS)
# def arctic():
#     pass

# @arctic.command()
# @apply_options([O.db_path])
# def list_libraries(db_path) -> None:
#     """List arcticdb libraries"""
#     arctic = Arctic(f"lmdb://{db_path}")
#     print(arctic.list_libraries())

# @arctic.command()
# @apply_options([O.db_path, O.library])
# def list_symbols(db_path, library) -> None:
#     """List symbols in the arcticdb library."""
#     arctic = Arctic(f"lmdb://{db_path}")
#     print(arctic[library].list_symbols())

# @arctic.command()
# @apply_options([O.db_path, O.library])
# def create_library(db_path, library) -> None:
#     """Create a blank new arcticdb library."""
#     arctic = Arctic(f"lmdb://{db_path}")
#     arctic.create_library(library) 
#     print(arctic[library])

# @arctic.command()
# @apply_options([O.db_path, O.library])
# @click.confirmation_option(prompt='Are you sure you want to delete the entire library?')
# def delete_library(db_path, library) -> None:
#     """Delete entire arcticdb library"""
#     arctic = Arctic(f"lmdb://{db_path}")
#     arctic.delete_library(library) 

# @arctic.command()
# @apply_options([O.db_path, O.library, O.ticker, O.start_date, O.end_date])
# def read(db_path, library, ticker, start_date, end_date,
# ):
#     """Read ticker and print head and tail."""
#     arctic = Arctic(f"lmdb://{db_path}")

#     if start_date and end_date:
#         start_datetime = pd.Timestamp(f"{start_date}T{NASDAQExchange.exchange_open}")
#         end_datetime = pd.Timestamp(f"{end_date}T{NASDAQExchange.exchange_close}")
#         date_range = (start_datetime, end_datetime)
#         df = arctic[library].read(ticker, date_range=date_range).data
#     else:
#         print("not using start or end dates")
#         df = arctic[library].read(ticker).data
    
#     print(f"Printing df.head() and df.tail() for ticker {ticker}")
#     print(df.head())
#     print(df.tail())

# def _write(
#     db_path,
#     library,
#     csv_path,
#     ticker,
#     start_date,
#     end_date,
# ):
#     """Preprocess and write ticker to database."""
#     arctic = Arctic(f"lmdb://{db_path}")

#     date_range = (start_date, end_date)
#     data = Data(
#         directory_path=csv_path,
#         ticker=ticker,
#         date_range=date_range,
#         aggregate_duplicates=False,
#     )
#     lobster = Lobster(data=data)
#     df = pd.concat([lobster.messages, lobster.book], axis=1)
#     print(f"head of ticker {ticker}")
#     print(df.head())

#     arctic[library].write(symbol=ticker, data=df)

# @arctic.command()
# @apply_options([O.db_path, O.library, O.csv_path, O.ticker, O.start_date, O.end_date])
# def write(**kwargs):
#     _write(**kwargs)


# # if want to also access _say from other functions then need to do this.
# def _say(
#     db_path,
#     library,
# ):
#     """Print some really important information"""
#     print(db_path, library)


# # @arctic.command()
# # @apply_options(infer_options(_say))
# # @inherit_docstring_from(_say)
# # def say(**kwargs):
# #     _say(**kwargs)

# @arctic.command()
# @inherits_from(_say)
# def say(**kwargs):
#     _say(**kwargs)

# @arctic.command()
# @apply_options([O.db_path, O.library, O.csv_path, O.start_date, O.end_date])
# def generate_jobs(db_path, library, csv_path, start_date, end_date):
#     ticker_date_dict = infer_ticker_to_date_range(csv_path)
#     with open('arctic_commands.txt', 'w') as f:
#         for ticker, (inferred_start_date, inferred_end_date) in ticker_date_dict.items():
#             # if date is None use the inferred date, otherwise use the CLI argument
#             start_date = start_date or inferred_start_date
#             end_date = end_date or inferred_end_date
#             f.write(f"arctic write --csv_path={csv_path} --db_path={db_path} --library={library} --ticker={ticker} --start_date={start_date} --end_date={end_date} \n")

# def sleepy(csv_path, folder_info):
#     time.sleep(5)
#     print(csv_path, folder_info.full)

# def extract_7z(input_path, output_path):
#     try:
#         subprocess.run(["7z", "x", input_path, f"-o{output_path}"], check=True)
#     except subprocess.CalledProcessError as e:
#         print(f"An error occurred: {str(e)}")

# @arctic.command()
# @apply_options([O.zip_path, O.csv_path, O.etf, O.max_workers])
# def zip(zip_path, csv_path, etf, max_workers):
#     folder_infos = infer_ticker_dict(zip_path)

#     # filter first
#     if etf:
#         def in_etf(folder_info):
#             return folder_info.ticker in ETFMembers().mapping[etf] + [etf]
#         folder_infos = list(filter(in_etf, folder_infos))

#     # commands = [f"mkdir -p {csv_path}/{folder_info.ticker_till_end}\n"
#     #             for folder_info in folder_infos]

#     # with open("zip_commands.txt", "w") as f:
#     #     [f.write(command) for command in commands]
#     with ProcessPoolExecutor(max_workers=max_workers) as executor:
#         # outputs_dirs = [folder_info.ticker_till_end for folder_info in folder_infos]
#         futures = [
#             executor.submit(os.mkdir, path=f"{csv_path}/{folder_info.ticker_till_end}")
#             for folder_info in folder_infos
#         ]
#         wait(futures)
#         futures = [
#             executor.submit(extract_7z, input_path=folder_info.full, output_path=f"{csv_path}/{folder_info.ticker_till_end}")
#             for folder_info in folder_infos
#         ]


#         # for folder_info in folder_infos:
#         #     # print(folder_info.ticker)
#         #     f.write(f"examle: mkdir {csv_path}/{folder_info.ticker_till_end}\n")


#     # ticker_date_dict = infer_ticker_to_ticker_path(zip_path)
#     # print(ticker_date_dict)
#     # if etf:
#     #     print(ETFMembers().mapping[etf])
#     #     ticker_date_dict = {
#     #         ticker: ticker_path
#     #         for ticker, ticker_path in ticker_date_dict.items()
#     #         if ticker in ETFMembers().mapping[etf] + [etf]
#     #     }
#     # print(ticker_date_dict)
#     # ticker_dict = infer_ticker_dict(zip_path)
#     # with open("zip_commands.txt", "w") as f:
#     #     for ticker, dict_ in ticker_dict.items():
#     #         full = dict_["full"]
#     #         ticker_till_end = dict_["ticker_till_end"]
#     #         f.write(f"mkdir {csv_path}/{ticker_till_end}\n")
#     #         f.write(f"/nfs/home/nicolasp/usr/bin/7z x {full} -o{ticker_till_end}\n")


# @arctic.command()
# @apply_options([O.db_path, O.library, O.csv_path, O.tickers, O.max_workers])
# def dump(
#     db_path,
#     library,
#     csv_path,
#     tickers,
#     max_workers,
# ):
#     """Dump all csv to arctic_db inferring start and end date from folder."""
#     folder_infos = infer_ticker_dict(csv_path)
#     print("inferred from folder")
#     print(folder_infos)

#     if tickers:
#         folder_infos = [folder_info for folder_info in folder_infos if folder_info.ticker in tickers]

#     print("filtered folder_info after filtering for tickers.")
#     print(folder_infos)

#     with ProcessPoolExecutor(max_workers=max_workers) as executor:
#         # small job with only a few dates
#         # futures = [
#         #     executor.submit(write_, csv_path=csv_path, db_path=db_path, library=library, ticker=folder_info.ticker, start_date="2016-01-01", end_date="2016-01-04")
#         #     for folder_info in folder_infos
#         # ]
#         # full job with whole year
#         futures = [
#             executor.submit(_write, csv_path=csv_path, db_path=db_path, library=library, ticker=folder_info.ticker, start_date=folder_info.start_date, end_date=folder_info.end_date)
#             for folder_info in folder_infos
#         ]
#     print('done')


# @dataclass
# class ArcticLibraryInfo:
#     ticker: str
#     dates_ndarray: np.ndarray
#     dates_series: pd.Series

#     def __post_init__(self):
#         self.dates_list: list[str] = list(self.dates_ndarray)
#         self.start_date = min(self.dates_ndarray)
#         self.end_date = max(self.dates_ndarray)

# def _info(db_path, library) -> list[ArcticLibraryInfo]:
#     """Return information about ticker info in database."""
#     arctic = Arctic(f"lmdb://{db_path}")

#     arcticdb_infos: list[ArcticLibraryInfo] = []
#     for ticker in arctic[library].list_symbols():
#         q = QueryBuilder()
#         # there is one auction each morning
#         q = q[q.event == Event.CROSS_TRADE.value]
#         df = arctic[library].read(symbol=ticker, query_builder=q).data

#         dates_series: pd.Series = df.index.date
#         dates_ndarray: np.ndarray = df.index.to_series().dt.strftime("%Y-%m-%d").values
#         arcticdb_infos.append(
#             ArcticLibraryInfo(ticker=ticker, dates_ndarray=dates_ndarray, dates_series=dates_series)
#         )
#     return arcticdb_infos


# @arctic.group()
# @inherits_from(_info)
# def info(**kwargs):
#     pass

# @info.command()
# @apply_options(infer_options(_info))
# def tickers(db_path, library):
#     """Print tickers in db."""
#     arctic = Arctic(f"lmdb://{db_path}")
#     print(arctic[library].list_symbols())

# @info.command()
# @apply_options([O.db_path, O.library])
# def versions(db_path, library):
#     arctic = Arctic(f"lmdb://{db_path}")
#     print(arctic[library].list_versions())

# @info.command()
# @apply_options(infer_options(_info))
# def dates(**kwargs):
#     """Print ticker information"""
#     arcticdb_infos = _info(**kwargs)
#     print({x.ticker: (x.start_date, x.end_date) for x in arcticdb_infos})


Refactor so that arctic gets Arctci() and passes that down to subcommands.

In [None]:
# | export
@dataclass
class ArcticLibraryInfo:
    ticker: str
    dates_ndarray: np.ndarray
    dates_series: pd.Series

    def __post_init__(self):
        self.dates_list: list[str] = list(self.dates_ndarray)
        self.start_date = min(self.dates_ndarray)
        self.end_date = max(self.dates_ndarray)

In [None]:
# | export
CONTEXT_SETTINGS = dict(
    help_option_names=["-h", "--help"],
    token_normalize_func=lambda x: x.lower() if isinstance(x, str) else x,
    show_default=True,
    auto_envvar_prefix="ARCTIC"
)

In [None]:
# | export
# REFACTORINOOOOO


def get_library_info(
    arctic_library: Library,  # arcticdb library
    tickers: list[str] | None = None,  # tickers to filter on
) -> list[ArcticLibraryInfo]:
    """Return information about ticker info in database."""

    arctic_symbols = arctic_library.list_symbols()
    if tickers:
        if not set(tickers).issubset(set(arctic_symbols)):
            raise ValueError(
                f"Some of the tickers specified were not in the databasee. The invalid tickers were {set(tickers) - set(arctic_symbols)}"
            )
    else:
        tickers = arctic_symbols

    arctic_library_infos: list[ArcticLibraryInfo] = []
    for ticker in tickers:
        q = QueryBuilder()
        # there is one auction each morning
        q = q[q.event == Event.CROSS_TRADE.value]
        df = arctic_library.read(symbol=ticker, query_builder=q).data

        dates_series: pd.Series = df.index.date
        dates_ndarray: np.ndarray = df.index.to_series().dt.strftime("%Y-%m-%d").values
        arctic_library_infos.append(
            ArcticLibraryInfo(
                ticker=ticker, dates_ndarray=dates_ndarray, dates_series=dates_series
            )
        )
    return arctic_library_infos


class Options:
    def __init__(self) -> None:
        self.db_path = click.option(
            "-d", "--db_path", default=cfg.db.db_path, help="Database path"
        )
        self.library = click.option(
            "-l", "--library", default=cfg.db.library, help="Library name"
        )
        self.ticker = click.option(
            "-t", "--ticker", required=True, help="ticker to print"
        )
        self.start_date = click.option(
            "-s", "--start_date", default=None, help="start date"
        )
        self.end_date = click.option("-e", "--end_date", default=None, help="end date")
        self.csv_path = click.option(
            "-c",
            "--csv_path",
            default=cfg.data_config.csv_files_path,
            help="csv files path",
        )
        self.etf = click.option(
            "--etf", default=None, help="restrict to subset specified by ETF members"
        )
        self.zip_path = click.option(
            "-z",
            "--zip_path",
            default="/nfs/lobster_data/lobster_raw/2016",
            help="zip files path",
        )
        self.tickers = click.option(
            "--tickers", default=None, multiple=True, type=str, help="tickers to dump"
        )
        self.max_workers = click.option(
            "-m", "--max_workers", default=20, help="max workers for parallelisation"
        )


O = Options()


class Notify:
    @property
    def warn(self):
        click.secho("WARNING:", fg="red", bold=True)

    @property
    def info(self):
        click.secho("INFO:", fg="yellow", bold=True)

N = Notify()

def apply_options(options: list):
    def decorator(f):
        for option in reversed(options):
            f = option(f)
        return f

    return decorator


class ClickCtxObj(TypedDict):
    """Purely for type hinting. for instance `arctic_library` not always there."""

    library: str
    db_path: str
    arctic: Arctic
    arctic_library: Library


class ClickCtx(Protocol):
    obj: ClickCtxObj


@click.group(context_settings=CONTEXT_SETTINGS)
@click.option(
    "-d", "--db_path", default=cfg.db.db_path, envvar="DB_PATH", help="Database path"
)
@click.option(
    "-l", "--library", default=cfg.db.library, envvar="LIBRARY", help="Library name"
)
@click.option("--debug", is_flag=True, default=False, help="print logging info")
@click.pass_context
def arctic(ctx, db_path, library):
    ctx.ensure_object(dict)
    arctic = Arctic(f"lmdb://{db_path}")
    ctx.obj.update(
        {
            "arctic": arctic,
            "library": library,
            "db_path": db_path,
        }
    )
    try:
        ctx.obj["arctic_library"] = arctic[library]
    except LibraryNotFound:
        pass

@arctic.command()
@click.pass_context
def echo(ctx: ClickCtx) -> None:
    """Echo back inputs."""
    click.echo(pformat(ctx.obj))


# not a good idea ! monkey patch click.echo to accept a color
# def new_echo(text, file=None, nl=True, err=False, color=None):
#     if color:
#         text = click.style(text, fg=color)
#     click.echo.original(text, file=file, nl=nl, err=err)

# def warn(msg):
#     return click.style(msg, fg="red", bold=True, blink=True)

@arctic.command()
def init_autocomplete():
    """Initialise autocomplete for arctic CLI."""
    # TODO: improve performance of CLI
    os.system("_ARCTIC_COMPLETE=bash_source arctic > ~/.arctic-complete.bash")

    with open(os.path.expanduser("~/.bashrc"), "a") as f:
        f.write(
            textwrap.dedent(
                """\
                # >>> arctic init_autocomplete >>>
                # Contents within this block were generated by arctic init_autocomplete
                . ~/.arctic-complete.bash
                # <<< arctic init_autocomplete <<<
                """
            )
        )

    click.echo(
        "Autocomplete initialized. Please restart your shell or run `source ~/.bashrc`."
    )
    click.echo(
        "Autocomplete initialized. Please restart your shell or run `source ~/.bashrc`."
    )


@arctic.command()
@click.pass_context
def create(ctx: ClickCtx) -> None:
    """Create a blank new arcticdb library."""
    arctic = ctx.obj["arctic"]
    library = ctx.obj["library"]
    arctic.create_library(library)
    click.echo(arctic[library])


@arctic.group()
@click.pass_context
def ls(ctx: ClickCtx):
    """List information about database."""
    # NOTE: Using word list clashed with python type hints!"""
    pass


@ls.command()
@click.pass_context
def libraries(ctx: ClickCtx):
    arctic = ctx.obj["arctic"]
    click.echo(arctic.list_libraries())


@ls.command()
@click.pass_context
def symbols(ctx: ClickCtx):
    arctic_library = ctx.obj["arctic_library"]
    click.echo(arctic_library.list_symbols())


@ls.command()
@click.pass_context
def versions(ctx: ClickCtx):
    arctic_library = ctx.obj["arctic_library"]

    click.echo(
        (
            pd.DataFrame(arctic_library.list_versions())
            .transpose()
            .drop(columns=[1, 2])
            .rename(columns={0: "created_on"})
            .assign(
                created_on=lambda df: df["created_on"].dt.strftime("%Y-%m-%d %H:%M:%S")
            )
            .rename_axis(["ticker", "version"])
            .sort_index(level=[0, 1], ascending=[True, False])
        )
    )


def parse_comma_separated(ctx, param, value: str):
    """Convert a comma (or space) separated option to a list of options"""
    if value is not None:
        delimiters = r"[ ,]"
        option_list = re.split(delimiters, value)
        option_list = list(filter(None, option_list))
        return option_list


@ls.command()
# @click.option("-t", "--tickers", callback=parse_comma_separated , help="Comma or space separated tickers")
@click.option(
    "-t", "--tickers", multiple=True, type=str, help="Provide ticker(s) to filter on"
)
@click.option(
    "-a",
    "--all",
    is_flag=True,
    default=False,
    help="print all dates not just start and end",
)
@click.pass_context
def dates(ctx: ClickCtx, tickers, all):
    arctic_library = ctx.obj["arctic_library"]

    arctic_library_infos = get_library_info(arctic_library, tickers=tickers)

    if all:
        click.echo(pformat({x.ticker: x.dates_list for x in arctic_library_infos}))
    else:
        click.echo(
            pformat(
                {x.ticker: (x.start_date, x.end_date) for x in arctic_library_infos}
            )
        )


@arctic.group()
@click.pass_context
def rm(ctx: ClickCtx):
    """List information about database."""
    # NOTE: Using word del clashed with python!
    pass


@rm.command()
@click.pass_context
@click.option(
    "-l", "--library", required=True, type=str, help="Library to permanently delete"
)
@click.option(
    "-s",
    "--simple",
    is_flag=True,
    default=False,
    help="For dealing with 'Error removing LMDB tree at path=...'",
)
def library(ctx: ClickCtx, library, simple):
    arctic = ctx.obj["arctic"]

    def _simple_delete_library(arctic, library):
        if arctic.has_library(library):
            if click.confirm(
                "Are you sure you want to permanently delete the library?"
            ):
                arctic.delete_library(library)
        else:
            click.echo("No library found to delete.")

    def _normal_delete_library(arctic, library):
        try:
            arctic_library = arctic[library]
            click.echo(
                textwrap.dedent(
                    f"""\
                    Library information:
                    {arctic_library}

                    Tickers in this library:
                    {arctic_library.list_symbols()}"""))
            N.warn
            if click.confirm(("Are you sure you want to permanently delete the library?")):
                # i'm not sure if the below will free the connection 100% of the time. use simple if encountering problems
                del arctic_library
                gc.collect()
                arctic.delete_library(library)
        except LibraryNotFound:
            click.echo("No library found to delete.")

    if simple:
        _simple_delete_library(arctic=arctic, library=library)
    else:
        _normal_delete_library(arctic=arctic, library=library)

NameError: name 'Library' is not defined

In [None]:
tickers="COP, APA, MRO"
# tickers="COP APA"
tickers = re.split(r'[ ,]', tickers)
tickers = list(filter(None, tickers))
# [ticker for ticker in tickers if ticker]
# tickers = [ticker.strip() for ticker in tickers if ticker.strip()]

print(tickers)

['COP', 'APA', 'MRO']


In [None]:
" APA, ".strip().strip(",")

'APA '

In [None]:
tickers

['COP', 'APA']

In [None]:
# #bizzare stuff

# @list.command()
# @click.pass_context
# def versions(ctx: ClickCtx):
#     arctic = ctx.obj["arctic"]
#     library = ctx.obj["library"]
#     print(arctic[library].list_versions())

#     # grouped information
#     print("grouped info \n")
#     # data_ = arctic[library].list_versions()
#     # print(data_)
#     # absolutely bizzare behaviour of click
#     # logger.info(list(data_.items()))
#     # print(list(data_.items()), flush=True)
#     df = pd.DataFrame(arctic[library].list_versions())
#     print(df)
#     # df = pd.DataFrame(list(data.items()), columns=["Key", "Date"])
#     # print(df)
#     # print(df)
#     # print(df)
#     # df['Ticker'] = df['Key'].str.split('_').str[0]
#     # df['Version'] = df['Key'].str.split('_').str[1]

#     # grouped = df.groupby('Ticker').apply(lambda x: x[['Version', 'Date']].to_dict(orient='records')).to_dict()
#     # print(grouped)

In [None]:
# | eval: false
db_path = cfg.db.db_path
library = cfg.db.library
arctic = Arctic(f"lmdb://{db_path}")
arctic_library = arctic[library]
print(type(arctic_library))

try:
    arctic["not_there_library"]
except LibraryNotFound:
    print('yooo')

<class 'arcticdb.version_store.library.Library'>
yooo


In [None]:
# | eval: false
db_path = cfg.db.db_path
library = cfg.db.library
arctic = Arctic(f"lmdb://{db_path}")

v = arctic[library].list_versions()
v

{XOM_v2: (date=2023-09-27 02:26:44.171414752+00:00),
 XOM_v1: (date=2023-09-27 02:23:08.300722194+00:00),
 XOM_v0: (date=2023-09-27 02:21:24.583990502+00:00),
 WMB_v1: (date=2023-09-27 02:26:37.634894225+00:00),
 WMB_v0: (date=2023-09-27 02:23:01.919661098+00:00),
 SLB_v2: (date=2023-09-27 02:52:34.397110262+00:00),
 SLB_v1: (date=2023-09-27 02:26:40.570974982+00:00),
 SLB_v0: (date=2023-09-27 02:23:04.757768323+00:00),
 PXD_v2: (date=2023-09-27 02:36:05.408726228+00:00),
 PXD_v1: (date=2023-09-27 02:26:33.943345840+00:00),
 PXD_v0: (date=2023-09-27 02:22:58.175370382+00:00),
 PSX_v2: (date=2023-09-27 02:42:50.364743713+00:00),
 PSX_v1: (date=2023-09-27 02:26:35.778975550+00:00),
 PSX_v0: (date=2023-09-27 02:23:00.022232307+00:00),
 OKE_v2: (date=2023-09-27 02:42:27.439229401+00:00),
 OKE_v1: (date=2023-09-27 02:26:35.475166884+00:00),
 OKE_v0: (date=2023-09-27 02:22:59.797585503+00:00),
 MRO_v1: (date=2023-09-27 02:26:38.848555778+00:00),
 MRO_v0: (date=2023-09-27 02:23:03.547991502+0

In [None]:
# | eval: false
df = (
    pd.DataFrame(v)
    .transpose()
    .drop(columns=[1, 2])
    .rename(columns={0: "datetime"})
    .rename_axis(['ticker','version'])
    .sort_index(level=[0,1], ascending=[True, False])
)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,datetime
ticker,version,Unnamed: 2_level_1
COP,1,2023-09-27 02:26:45.534948670+00:00
COP,0,2023-09-27 02:23:10.412711442+00:00
CVX,2,2023-09-27 02:26:43.056291198+00:00
CVX,1,2023-09-27 02:23:07.055408249+00:00
CVX,0,2023-09-27 02:21:23.782315110+00:00
DVN,1,2023-09-27 02:26:37.936182529+00:00
DVN,0,2023-09-27 02:23:02.413986022+00:00
FANG,2,2023-09-27 02:35:11.487607770+00:00
FANG,1,2023-09-27 02:26:33.445289575+00:00
FANG,0,2023-09-27 02:22:57.693123744+00:00


In [None]:
# | eval: false
# cfg = get_config()
db_path = cfg.db.db_path
library = cfg.db.library
tickers = ['PXD', 'WMB', 'MRO', 'COP', 'SLB', 'KMI', 'XOM', 'DVN', 'PSX', 'OKE', 'FANG', 'CVX', 'HAL']
ticker = "PXD"
arctic = Arctic(f"lmdb://{db_path}")

q = QueryBuilder()
q = q[q.event == Event.CROSS_TRADE.value]
df = arctic[library].read(symbol=ticker, query_builder=q).data
dates = df.index.date
dates_str = df.index.to_series().dt.strftime("%Y-%m-%d").values
print(dates_str)


# print({ticker:arctic[library].read(ticker).version for ticker in tickers})
# df = arctic[library].read(ticker).data
# display(df)
# display(df.head())
# display(df.tail())

In [None]:
# | exports
# | code-fold: true
@click.command(context_settings=CONTEXT_SETTINGS)
@click.option("-d", "--db_path", default=cfg.db.db_path, help="database path")
@click.option("-l", "--library", default=cfg.db.library, help="library name")
def arctic_list_symbols(db_path, library) -> None:
    """List symbols in the arcticdb library."""
    arctic_library = get_arctic_library(db_path=db_path, library=library)
    print(f"Symbols in library {library}")
    print(arctic_library.list_symbols())

In [None]:
# | exports
# | code-fold: true
@click.command(context_settings=CONTEXT_SETTINGS)
@click.option("-d", "--db_path", default=cfg.db.db_path, help="database path")
@click.option("-l", "--library", default=cfg.db.library, help="library name")
def arctic_create_new_library(db_path, library) -> None:
    """Create a blank new arcticdb library."""
    conn = f"lmdb://{db_path}"
    arctic = Arctic(conn)
    arctic.create_library(library) 
    print(arctic[library])

In [None]:
# | exports
# | code-fold: true
@click.command(context_settings=CONTEXT_SETTINGS)
@click.option("-d", "--db_path", default=cfg.db.db_path, help="database path")
def arctic_list_libraries(db_path) -> None:
    """List arcticdb libraries"""

    conn = f"lmdb://{db_path}"
    arctic = Arctic(conn)
    print(arctic.list_libraries())

In [None]:
# | exports
# | code-fold: true
@click.command(context_settings=CONTEXT_SETTINGS)
@click.option("-d", "--db_path", default=cfg.db.db_path, help="database path")
@click.option("-l", "--library", default=cfg.db.library, help="library name")
def arctic_delete_library(db_path, library) -> None:
    """Delete arcticdb library"""

    user_input = input("Proceed by deleting this entire library? (y/n): ")
    user_input = user_input.lower()
    match user_input:
        case "y":
            pass
        case "n":
            sys.exit(0)
        case _:
            sys.exit(1)

    conn = f"lmdb://{db_path}"
    arctic = Arctic(conn)
    arctic.delete_library(library) 

In [None]:
# | exports
# | code-fold: true
@click.command(context_settings=CONTEXT_SETTINGS)
@click.option("-d", "--db_path", default=cfg.db.db_path, help="database path")
@click.option("-l", "--library", default=cfg.db.library, help="library name")
@click.option("-t", "--ticker", required=True, help="ticker to print")
@click.option("-s", "--start_date", default=None, help="start date")
@click.option("-e", "--end_date", default=None, help="end date")
def arctic_read_symbol(db_path, library, ticker, start_date, end_date,
):
    """Print df.head() and available columns for ticker in arcticdb library."""
    arctic_library = get_arctic_library(db_path=db_path, library=library)

    if start_date and end_date:
        start_datetime = pd.Timestamp(f"{start_date}T{NASDAQExchange.exchange_open}")
        end_datetime = pd.Timestamp(f"{end_date}T{NASDAQExchange.exchange_close}")
        date_range = (start_datetime, end_datetime)
        df = arctic_library.read(ticker, date_range=date_range).data
    else:
        df = arctic_library.read(ticker).data
    
    print(f"Printing df.head() and df.tail() for ticker {ticker}")
    print(df.head())
    print(df.tail())

Write to arctic again

In [None]:
# | exports
# | code-fold: true
@click.command(context_settings=CONTEXT_SETTINGS)
@click.option(
    "-c", "--csv_path", default=cfg.data_config.csv_files_path, help="csv files path"
)
@click.option("-d", "--db_path", default=cfg.db.db_path, help="database path")
@click.option("-l", "--library", default=cfg.db.library, help="library name")
@click.option("-t", "--ticker", required=True, help="ticker to write to db")
@click.option("-s", "--start_date", default="2020-01-01", help="start date")
@click.option("-e", "--end_date", default="2020-02-01", help="end date")
def arctic_write_symbol(
    db_path,
    library,
    csv_path,
    ticker,
    start_date,
    end_date,
):
    arctic_library = get_arctic_library(db_path=db_path, library=library)

    # if ticker in arctic_library.list_symbols():
    #     print("warning - there is already data for ths ticker")
    #     user_input = input("Proceed by adding data to this symbol? (y/n): ")
    #     user_input = user_input.lower()
    #     match user_input:
    #         case "y":
    #             pass
    #         case "n":
    #             sys.exit(0)
    #         case _:
    #             sys.exit(1)

    date_range = (start_date, end_date)
    data = Data(
        directory_path=csv_path,
        ticker=ticker,
        date_range=date_range,
        aggregate_duplicates=False,
    )
    lobster = Lobster(data=data)
    df = pd.concat([lobster.messages, lobster.book], axis=1)

    arctic_library.write(symbol=ticker, data=df)

In [None]:
# | exports
# | code-fold: true
@click.command(context_settings=CONTEXT_SETTINGS)
@click.option(
    "-c", "--csv_path", default=cfg.data_config.csv_files_path, help="csv files path"
)
@click.option("-d", "--db_path", default=cfg.db.db_path, help="database path")
@click.option("-l", "--library", default=cfg.db.library, help="library name")
@click.option("-s", "--start_date", default=None, help="start date")
@click.option("-e", "--end_date", default=None, help="end date")
def arctic_generate_jobs(csv_path, db_path, library, start_date, end_date):
    ticker_date_dict = infer_ticker_to_date_range(csv_path)
    with open('arctic_commands.txt', 'w') as f:
        for ticker, (inferred_start_date, inferred_end_date) in ticker_date_dict.items():
            # if date is None use the inferred date, otherwise use the CLI argument
            start_date = start_date or inferred_start_date
            end_date = end_date or inferred_end_date
            f.write(f"arctic_write_symbol --csv_path={csv_path} --db_path={db_path} --library={library} --ticker={ticker} --start_date={start_date} --end_date={end_date} \n")

In [None]:
# | exports
# | code-fold: true
@click.command(context_settings=CONTEXT_SETTINGS)
@click.option(
    "-z",
    "--zip_path",
    default="/nfs/lobster_data/lobster_raw/2016",
    help="zip files path",
)
@click.option(
    "-c", "--csv_path", default=cfg.data_config.csv_files_path, help="csv files path"
)
@click.option(
    "-e", "--etf", default=None, help="restrict to subset specified by ETF members"
)
def zip_generate_jobs(zip_path, csv_path, etf):
    # ticker_date_dict = infer_ticker_to_ticker_path(zip_path)
    # print(ticker_date_dict)
    # if etf:
    #     print(ETFMembers().mapping[etf])
    #     ticker_date_dict = {
    #         ticker: ticker_path
    #         for ticker, ticker_path in ticker_date_dict.items()
    #         if ticker in ETFMembers().mapping[etf] + [etf]
    #     }
    # print(ticker_date_dict)
    ticker_dict = infer_ticker_dict(zip_path)
    with open("zip_commands.txt", "w") as f:
        for ticker, dict_ in ticker_dict.items():
            full = dict_["full"]
            ticker_till_end = dict_["ticker_till_end"]
            f.write(f"mkdir {csv_path}/{ticker_till_end}\n")
            f.write(f"/nfs/home/nicolasp/usr/bin/7z x {full} -o{ticker_till_end}\n")

In [None]:
# | exports
# | code-fold: true
@click.command(context_settings=CONTEXT_SETTINGS)
@click.option(
    "-c", "--csv_path", default=cfg.data_config.csv_files_path, help="csv files path"
)
@click.option("-d", "--db_path", default=cfg.db.db_path, help="database path")
@click.option("-l", "--library", default=cfg.db.library, help="library name")
@click.option("-t", "--ticker", required=True, help="ticker to write to db")
@click.option("-s", "--start_date", default="2020-01-01", help="start date")
@click.option("-e", "--end_date", default="2020-02-01", help="end date")
def arctic_dump_all(
    db_path,
    library,
    csv_path,
    ticker,
    start_date,
    end_date,
):
    arctic_library = get_arctic_library(db_path=db_path, library=library)

    if ticker in arctic_library.list_symbols():
        print("warning - there is already data for ths ticker")
        user_input = input("Proceed by adding data to this symbol? (y/n): ")
        user_input = user_input.lower()
        match user_input:
            case "y":
                pass
            case "n":
                sys.exit(0)
            case _:
                sys.exit(1)

    date_range = (start_date, end_date)
    data = Data(
        directory_path=csv_path,
        ticker=ticker,
        date_range=date_range,
        aggregate_duplicates=False,
    )
    lobster = Lobster(data=data)
    df = pd.concat([lobster.messages, lobster.book], axis=1)
    print(df)

    arctic_library.append(symbol=ticker, data=df)

In [None]:
# | hide
import nbdev

nbdev.nbdev_export()