# Description

This notebook contains examples of data from http://www.kibot.com/
- The data is loaded using code from `im/kibot/data/`

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import logging
import os

import pandas as pd
import requests
import requests.adapters as radapt
import requests.packages.urllib3.util as rpuuti
import tqdm

import helpers.hdbg as hdbg
import helpers.henv as henv
import helpers.hio as hio
import helpers.hprint as hprint
import im.kibot.data.config as imkidacon
import im.kibot.data.extract.download as imkdaexdo
import im.kibot.metadata.load.kibot_metadata as imkmlkime

  from tqdm.autonotebook import tqdm


In [2]:
hdbg.init_logger(verbosity=logging.INFO)

_LOG = logging.getLogger(__name__)

_LOG.info("%s", henv.get_system_signature()[0])

hprint.config_notebook()

[0m[36mINFO[0m: > cmd='/venv/lib/python3.8/site-packages/ipykernel_launcher.py -f /home/.local/share/jupyter/runtime/kernel-c96d9ca5-0cea-4a77-bacb-b233dde0605d.json'
[31m-----------------------------------------------------------------------------
This code is not in sync with the container:
code_version='1.4.2' != container_version='1.4.0'
-----------------------------------------------------------------------------
You need to:
- merge origin/master into your branch with `invoke git_merge_master`
- pull the latest container with `invoke docker_pull`[0m
INFO  # Git
  branch_name='CMTask3743_kibot_gallery'
  hash='e34617faf'
  # Last commits:
    * e34617faf Daniil Tikhomirov CMTask3743: Add notebook                                          ( 2 minutes ago) Sun Feb 5 21:44:26 2023  (HEAD -> CMTask3743_kibot_gallery, origin/CMTask3743_kibot_gallery)
    *   006f59faa Daniil Tikhomirov CMTask3551: Merge                                                 (    4 days ago) Wed Feb 1 15:1

# Metadata

- Load Kibot Metadata to estimate contracts and equities available

## Continuous contracts

In [3]:
kibot_metadata = imkmlkime.KibotMetadata()

In [4]:
# Get available contract types based on `KibotMetadata` documentation.
contract_types = ["1min", "daily", "tick-bid-ask"]

In [5]:
min_metadata = kibot_metadata.get_metadata("1min")
print(min_metadata.shape)
display(min_metadata.head(3))
display(min_metadata.tail(3))

(252, 12)


Unnamed: 0,Kibot_symbol,Description,StartDate,Exchange,Exchange_group,Exchange_abbreviation,Exchange_symbol,num_contracts,min_contract,max_contract,num_expiries,expiries
0,AC,CONTINUOUS ETHANOL CONTRACT,2009-09-28,Chicago Board Of Trade (CBOT GLOBEX),CME,CBOT,EH,122,2009-10-01,2019-12-01,12,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]"
1,AD,CONTINUOUS AUSTRALIAN DOLLAR CONTRACT,2009-09-27,Chicago Mercantile Exchange (CME GLOBEX),,,,65,2009-12-01,2020-12-01,12,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]"
2,AE,CONTINUOUS BLOOMBERG COMMODITY INDEX CONTRACT,NaT,,CME,CBOT,AW,38,2010-06-01,2019-09-01,4,"[3, 6, 9, 12]"


Unnamed: 0,Kibot_symbol,Description,StartDate,Exchange,Exchange_group,Exchange_abbreviation,Exchange_symbol,num_contracts,min_contract,max_contract,num_expiries,expiries
249,ZLT,CONTINUOUS SOYBEAN OIL TAS CONTRACT,NaT,,CME,CBOT,ZLT,36,2015-07-01,2019-12-01,8,"[1, 3, 5, 7, 8, 9, 10, 12]"
250,ZMT,CONTINUOUS SOYBEAN MEAL TAS CONTRACT,NaT,,CME,CBOT,ZMT,37,2015-07-01,2019-12-01,8,"[1, 3, 5, 7, 8, 9, 10, 12]"
251,ZWT,CONTINUOUS WHEAT TAS CONTRACT,NaT,,CME,CBOT,ZWT,25,2015-07-01,2020-07-01,5,"[3, 5, 7, 9, 12]"


In [6]:
daily_metadata = kibot_metadata.get_metadata("daily")
print(daily_metadata.shape)
display(daily_metadata.head(3))
display(daily_metadata.tail(3))

(252, 12)


Unnamed: 0,Kibot_symbol,Description,StartDate,Exchange,Exchange_group,Exchange_abbreviation,Exchange_symbol,num_contracts,min_contract,max_contract,num_expiries,expiries
0,AC,CONTINUOUS ETHANOL CONTRACT,2009-09-28,Chicago Board Of Trade (CBOT GLOBEX),CME,CBOT,EH,122,2009-10-01,2019-12-01,12,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]"
1,AD,CONTINUOUS AUSTRALIAN DOLLAR CONTRACT,2009-09-27,Chicago Mercantile Exchange (CME GLOBEX),,,,65,2009-12-01,2020-12-01,12,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]"
2,AE,CONTINUOUS BLOOMBERG COMMODITY INDEX CONTRACT,NaT,,CME,CBOT,AW,38,2010-06-01,2019-09-01,4,"[3, 6, 9, 12]"


Unnamed: 0,Kibot_symbol,Description,StartDate,Exchange,Exchange_group,Exchange_abbreviation,Exchange_symbol,num_contracts,min_contract,max_contract,num_expiries,expiries
249,ZLT,CONTINUOUS SOYBEAN OIL TAS CONTRACT,NaT,,CME,CBOT,ZLT,36,2015-07-01,2019-12-01,8,"[1, 3, 5, 7, 8, 9, 10, 12]"
250,ZMT,CONTINUOUS SOYBEAN MEAL TAS CONTRACT,NaT,,CME,CBOT,ZMT,37,2015-07-01,2019-12-01,8,"[1, 3, 5, 7, 8, 9, 10, 12]"
251,ZWT,CONTINUOUS WHEAT TAS CONTRACT,NaT,,CME,CBOT,ZWT,25,2015-07-01,2020-07-01,5,"[3, 5, 7, 9, 12]"


In [7]:
tick_metadata = kibot_metadata.get_metadata("tick-bid-ask")
print(tick_metadata.shape)
display(tick_metadata.head(3))
display(tick_metadata.tail(3))

(128, 12)


Unnamed: 0,Kibot_symbol,Description,StartDate,Exchange,Exchange_group,Exchange_abbreviation,Exchange_symbol,num_contracts,min_contract,max_contract,num_expiries,expiries
0,AC,CONTINUOUS ETHANOL CONTRACT,2009-09-28,Chicago Board Of Trade (CBOT GLOBEX),CME,CBOT,EH,116.0,2010-04-01,2019-12-01,12.0,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]"
1,AD,CONTINUOUS AUSTRALIAN DOLLAR CONTRACT,2009-09-27,Chicago Mercantile Exchange (CME GLOBEX),,,,63.0,2010-06-01,2020-12-01,12.0,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]"
2,AJY,CONTINUOUS AUSTRALIAN $/JAPANESE YEN CONTRACT,2009-10-21,Chicago Mercantile Exchange (CME GLOBEX),,,,39.0,2010-06-01,2019-12-01,4.0,"[3, 6, 9, 12]"


Unnamed: 0,Kibot_symbol,Description,StartDate,Exchange,Exchange_group,Exchange_abbreviation,Exchange_symbol,num_contracts,min_contract,max_contract,num_expiries,expiries
125,ZLT,,NaT,,CME,CBOT,ZLT,,NaT,NaT,,
126,ZMT,,NaT,,CME,CBOT,ZMT,,NaT,NaT,,
127,ZWT,,NaT,,CME,CBOT,ZWT,,NaT,NaT,,


# Downloading data

- An example of data downloaded from Kibot
- `im/kibot/data/extract/download.py` is a script that puts compressed data to S3
   - Examples of metadata of already downloaded datasets is provided above
- Here we download a small dataset to provide an example of the data

## Downloading continuous futures dataset

- On the example of `all_futures_continuous_contracts_daily`
- This section follows the `im/kibot/data/extract/download.py` script, focusing on saving the output locally and demonstrating intermediary parsing results.

In [8]:
# Create directories.
source_dir = "source_data"
hio.create_dir(source_dir, incremental=False)
converted_dir = "converted_data"
hio.create_dir(converted_dir, incremental=False)

In [9]:
# Load local login information.
with open("kibot_login.txt", "r") as f:
    file = f.read()
    username = file.split("\n")[0]
    password = file.split("\n")[1]

In [10]:
username

'Aben@comcast.net'

In [11]:
# Log in.
requests_session = requests.Session()
requests_retry = rpuuti.Retry(
    total=12,
    backoff_factor=2,
    status_forcelist=[104, 403, 500, 501, 502, 503, 504],
)
requests_session.mount("http://", radapt.HTTPAdapter(max_retries=requests_retry))
requests_session.mount("https://", radapt.HTTPAdapter(max_retries=requests_retry))
kibot_account = imkidacon.ENDPOINT + "account.aspx"
login_result = imkdaexdo._log_in(
    kibot_account, username, str(password), requests_session
)

In [12]:
my_account_file = os.path.join(source_dir, "my_account.html")
# Download my account html page.
if not os.path.exists(my_account_file):
    _LOG.warning("Missing '%s': downloading it", my_account_file)
    imkdaexdo._download_page(my_account_file, kibot_account, requests_session)
dataset_links_csv_file = os.path.join(converted_dir, "dataset_links.csv")
# Extract available datasets.
dle = imkdaexdo.DatasetListExtractor()
dataset_links_df = dle.extract_dataset_links(
    os.path.join(source_dir, "my_account.html")
)
dataset_links_df.head(3)

INFO  Requesting page 'http://www.kibot.com/account.aspx'
INFO  Storing page to 'source_data/my_account.html'


Unnamed: 0,dataset,link
0,all_stocks_1min,"/download.aspx?product=0,All_Stocks_1min"
1,all_stocks_unadjusted_1min,"/download.aspx?product=0,All_Stocks_unadjusted..."
2,all_stocks_daily,"/download.aspx?product=0,All_Stocks_daily"


In [13]:
# Create a directory for target dataset.
dataset = "all_futures_continuous_contracts_daily"
dataset_dir = os.path.join(converted_dir, dataset)
hio.create_dir(dataset_dir, incremental=True)
# Get specific payload addresses.
de = imkdaexdo.DatasetExtractor(dataset, requests_session)
to_download = de.get_dataset_payloads_to_download(
    dataset_links_df,
    source_dir,
    converted_dir,
)
to_download.head(3)

INFO  Saving to S3 in 's3://cryptokaizen-data/data/kibot/all_futures_continuous_contracts_daily'
INFO  Requesting page 'http://www.kibot.com/download.aspx?product=4,All_Futures_Continuous_Contracts_daily'
INFO  Storing page to 'source_data/all_futures_continuous_contracts_daily.html'
INFO  Number of files to download: 234:
0  # Symbol                                               Link                                 Description
1  1     JY  http://api.kibot.com/?action=download&link=151...            CONTINUOUS JAPANESE YEN CONTRACT
2  2     TY  http://api.kibot.com/?action=download&link=151...  CONTINUOUS 10 YR US TREASURY NOTE CONTRACT
3  3     FV  http://api.kibot.com/?action=download&link=iui...   CONTINUOUS 5 YR US TREASURY NOTE CONTRACT
4  4     ES  http://api.kibot.com/?action=download&link=t1t...          CONTINUOUS E-MINI S&P 500 CONTRACT
5  5     EU  http://api.kibot.com/?action=download&link=v3v...                 CONTINUOUS EURO FX CONTRACT


Unnamed: 0,#,Symbol,Link,Description
1,1,JY,http://api.kibot.com/?action=download&link=151...,CONTINUOUS JAPANESE YEN CONTRACT
2,2,TY,http://api.kibot.com/?action=download&link=151...,CONTINUOUS 10 YR US TREASURY NOTE CONTRACT
3,3,FV,http://api.kibot.com/?action=download&link=iui...,CONTINUOUS 5 YR US TREASURY NOTE CONTRACT


In [14]:
# Download payloads.
func = lambda row: de.download_payload_page(
    dataset_dir,
    row,
    **{
        "download_compressed": True,
        "skip_if_exists": False,
        "clean_up_artifacts": False,
    },
)
# Download a single payload from Kibot.
tqdm_ = tqdm.tqdm(
    to_download.iloc[1:2].iterrows(),
    total=len(to_download),
    desc="Downloading Kibot data",
)

Downloading Kibot data:   0%|                           | 0/234 [00:00<?, ?it/s]

In [15]:
for _, row in tqdm_:
    func(row)
# Show downloaded files.
print(os.listdir(dataset_dir))

Downloading Kibot data:   0%|                   | 1/234 [00:02<10:52,  2.80s/it]

['TY.csv.gz']





In [16]:
# Example of output data.
df = pd.read_csv(os.path.join(dataset_dir, "TY.csv.gz"))
display(df.head(5))
display(df.tail(5))

Unnamed: 0,05/03/1982,64.1875,64.3125,63.9375,63.9375.1,0
0,05/04/1982,63.8125,64.5,63.7188,64.4375,0
1,05/05/1982,64.593803,64.8125,64.375,64.531197,0
2,05/06/1982,64.9375,65.406197,64.906197,65.375,0
3,05/07/1982,65.656197,65.75,65.3125,65.6875,0
4,05/10/1982,65.406197,65.656197,65.3125,65.468803,0


Unnamed: 0,05/03/1982,64.1875,64.3125,63.9375,63.9375.1,0
8548,01/30/2023,114.65625,114.78125,114.171875,114.28125,1185210
8549,01/31/2023,114.40625,114.8125,114.296875,114.515625,2074922
8550,02/01/2023,114.640625,115.59375,114.546875,115.484375,2129670
8551,02/02/2023,115.3125,116.0,115.3125,115.546875,1955041
8552,02/03/2023,115.5,115.703125,114.25,114.421875,2445886
