# 1. Import packages

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# 2. Data loading

## 2.1. Dataset description

### A toy dataset
- **Triangular** currency pairs: `gbpusd`, `usdjpy`, **`jpygbp`**
  - $\frac{USD}{GBP}\times\frac{JPY}{USD}\times\frac{GBP}{JPY}=1$. Therefore, the following holds assuming a frictionless market condition:
  - $\log{\frac{USD}{GBP}}-\log{\frac{JPY}{USD}}-\log{\frac{GBP}{JPY}}=0.$
- Period: One day. May 1st, 2019.
- Frequency: 1 minute (=1440 data points per day per currency pair)

In [4]:
toy_path = '../dataset/'
toy_folders = ['toyset']

# csv files have no explicit column header, but column names come in this order.
# 'timestamp',  'opening', 'high', 'low', 'close', 'volume'
# As we are only interested in timestamp and close prices, we set `usecols` = [0,4], a list of indices
col_names = ['timestamp',  'opening', 'high', 'low', 'close', 'volume']
usecols = [0, 4]

df = {}
f = pd.DataFrame(columns=col_names)
print("Loading...")
for folder in toy_folders:
    files = os.listdir(toy_path+folder)
    for file in files:
        if file.endswith(".csv"):
            print(file)
            tmp = pd.read_csv(os.path.join(toy_path, folder, file),
                              delimiter=';', header=0, names=col_names, usecols=usecols)
            df[file[:6]] = tmp.copy()
print("Complted.")

Loading...
gbpjpy_DAT_ASCII_GBPJPY_M1_201905.csv
gbpusd_DAT_ASCII_GBPUSD_M1_201905.csv
usdjpy_DAT_ASCII_USDJPY_M1_201905.csv
Complted.


In [5]:
df.keys()

dict_keys(['gbpjpy', 'gbpusd', 'usdjpy'])

#### Conversion: gbpjpy -> jpygbp
- In the raw data set, only gbpjpy exists but we need `jpygbp`
  - Very easy to convert: the reciprocal of gbpjpy is `jpygbp`

In [6]:
df['jpygbp'] = df['gbpjpy'].copy()
df['jpygbp'].close = df['jpygbp'].close.apply(lambda x: 1.0/x)
df.pop('gbpjpy', None)

Unnamed: 0,timestamp,close
0,20190501 000100,145.451
1,20190501 000200,145.465
2,20190501 000300,145.459
3,20190501 000400,145.453
4,20190501 000500,145.450
...,...,...
31945,20190531 165400,136.769
31946,20190531 165500,136.765
31947,20190531 165600,136.772
31948,20190531 165700,136.776


In [7]:
df.keys()

dict_keys(['gbpusd', 'usdjpy', 'jpygbp'])