In [1]:
import glob
import numpy as np
import pandas as pd
import plotly
import plotly.graph_objs as go
plotly.offline.init_notebook_mode(connected=True)
import os
from datetime import datetime

# Load the Data

In [2]:
exchange = 'Bitfinex'
currency = 'BTC'  # or BTC

# Load data for te whole month
load_whole_month = False
# Load data only for 1 day
load_whole_day = True

assert load_whole_month ^ load_whole_day, 'Can only load 1 month or 1 day'

year_month_to_load = '2018_01'
day_to_load = '01' # gets ignored if loading data for the whole month

In [4]:
if load_whole_month:
    trade_days = glob.glob("data/Raw/%s/%sUSD/%s/*.csv.gz" % (exchange, currency, year_month_to_load))
    
    df_list = [pd.read_csv(trade_day) for trade_day in trade_days]
    print('Read %d days of trading' % len(df_list))
    df = pd.concat(df_list)
else:
    filename = 'data/Raw/%s/%sUSD/%s/%s_%sUSD_trades_%s_%s.csv.gz'  % (exchange, currency, year_month_to_load,
                                                                             exchange, currency, year_month_to_load,
                                                                         day_to_load)
    print('Reading %s' % filename)
    df = pd.read_csv(filename)
df.shape

Reading data/Raw/Bitfinex/BTCUSD/2018_01/Bitfinex_BTCUSD_trades_2018_01_01.csv.gz


(117998, 7)

In [5]:
df['date'] = pd.to_datetime(df['date'],unit='ms')
df = df.set_index('date')
df.head()

Unnamed: 0_level_0,id,exchange,symbol,price,amount,sell
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-01-01 00:00:01,24774565,bf,btcusd,13763.0,0.1,True
2018-01-01 00:00:01,24774566,bf,btcusd,13766.0,0.01,False
2018-01-01 00:00:02,24774567,bf,btcusd,13766.0,0.025234,False
2018-01-01 00:00:02,24774568,bf,btcusd,13766.0,0.244811,False
2018-01-01 00:00:02,24774569,bf,btcusd,13767.0,0.105929,False


# OHLC

In [6]:
ohlc_bar_len = '1H' # 30Min
df_ohlc = df['price'].resample(ohlc_bar_len).ohlc()
df_ohlc.head()

Unnamed: 0_level_0,open,high,low,close
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018-01-01 00:00:00,13763.0,13788.0,13505.0,13607.0
2018-01-01 01:00:00,13621.0,13655.0,13251.0,13281.0
2018-01-01 02:00:00,13280.0,13440.0,13214.0,13362.0
2018-01-01 03:00:00,13363.0,13591.0,13310.0,13394.0
2018-01-01 04:00:00,13393.0,13584.0,13262.0,13584.0


## DUMP

In [7]:
def _create_dir(path):
    if not os.path.exists(path):
        os.makedirs(path)

In [8]:
target_dir = 'data/processed/%s/%sUSD/%s' % ((exchange, currency, year_month_to_load))
_create_dir(target_dir)

if load_whole_month:
    df_ohlc.to_csv(target_dir+'/ohlc_%s_%sUSD_%s.csv' % (exchange, currency, year_month_to_load), index=False)
else:
    df_ohlc.to_csv(target_dir+'/ohlc_%s_%sUSD_trades_%s_%s.csv.gz'  % (exchange, currency, year_month_to_load, day_to_load),
                  index=False)

## PLOT

In [9]:
ohlcs = go.Candlestick(x=df_ohlc.index,
                       open=df_ohlc.open,
                       high=df_ohlc.high,
                       low=df_ohlc.low,
                       close=df_ohlc.close)
plotly.offline.iplot([ohlcs])