In [22]:
# Testing loading data from mongo db into clean dataframes
    # Mongo db -> Pandas dataframe -> csv

In [1]:
# Imports
import pandas as pd
import numpy as np
import json

In [2]:
# Pymongo import for connection to local client DB
import pymongo
from pymongo import MongoClient

# Mongo database and collection specification:
mongo_client = MongoClient('mongodb://localhost:27017/')
db = mongo_client.btcusd_db # Database specification
input_data = db.btcusd_collection #Collection specification (in database)
data = pd.DataFrame(list(input_data.find()))

In [3]:
# Verify that data was loaded from mongo into dataframe
data.head(10)

Unnamed: 0,_id,asks,bids,changes,channels,maker_order_id,message,price,product_id,reason,sequence,side,size,taker_order_id,time,trade_id,type
0,5ad3dc2ee3ae711d3ca939a4,"[[8337.67, 35.73324449], [8337.7, 1], [8337.71...","[[8337.66, 1.92259307], [8337, 0.001], [8336, ...",,,,,,BTC-USD,,,,,,,,snapshot
1,5ad3dc2ee3ae711d3ca939a5,,,,,97e82bf6-c071-48d7-9810-0cf52e9a3041,,8337.66,BTC-USD,,5686020000.0,buy,0.2359079,f87435c9-9d7d-485b-b52a-ca76b0bec88d,2018-04-15T23:11:40.934000Z,41700565.0,last_match
2,5ad3dc2ee3ae711d3ca939a6,,,,"[{'name': 'level2', 'product_ids': ['BTC-USD']...",,,,,,,,,,,,subscriptions
3,5ad3dc2ee3ae711d3ca939a7,,,"[[buy, 8300.00000000, 33.48472626]]",,,,,BTC-USD,,,,,,2018-04-15T23:11:43.047Z,,l2update
4,5ad3dc2ee3ae711d3ca939a8,,,"[[sell, 8353.46000000, 0.046]]",,,,,BTC-USD,,,,,,2018-04-15T23:11:43.060Z,,l2update
5,5ad3dc2ee3ae711d3ca939a9,,,"[[buy, 8310.42000000, 0]]",,,,,BTC-USD,,,,,,2018-04-15T23:11:43.062Z,,l2update
6,5ad3dc2ee3ae711d3ca939aa,,,"[[sell, 8359.00000000, 6.151]]",,,,,BTC-USD,,,,,,2018-04-15T23:11:43.065Z,,l2update
7,5ad3dc2ee3ae711d3ca939ab,,,,,,Failed to unsubscribe,,,You need to specify at least one product ID fo...,,,,,,,error
8,5ad3dc2ee3ae711d3ca939ac,,,"[[buy, 8300.40000000, 2]]",,,,,BTC-USD,,,,,,2018-04-15T23:11:43.071Z,,l2update
9,5ad3dc2ee3ae711d3ca939ad,,,"[[buy, 8300.40000000, 0]]",,,,,BTC-USD,,,,,,2018-04-15T23:11:43.074Z,,l2update


In [4]:
# Show information about test dataset from mongodb -> dataframe
data.info(verbose=True,  memory_usage=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4003 entries, 0 to 4002
Data columns (total 17 columns):
_id               4003 non-null object
asks              1 non-null object
bids              1 non-null object
changes           3946 non-null object
channels          1 non-null object
maker_order_id    54 non-null object
message           1 non-null object
price             54 non-null object
product_id        4001 non-null object
reason            1 non-null object
sequence          54 non-null float64
side              54 non-null object
size              54 non-null object
taker_order_id    54 non-null object
time              4000 non-null object
trade_id          54 non-null float64
type              4003 non-null object
dtypes: float64(2), object(15)
memory usage: 531.7+ KB


In [5]:
# Display unique response types from 'type' column
data['type'].unique()

array(['snapshot', 'last_match', 'subscriptions', 'l2update', 'error',
       'match'], dtype=object)

In [6]:
# Create individual dataframes for response types
    # Main relevant response types: snapshot, l2update, match, last_match
snapshot = pd.DataFrame(list(input_data.find({'type':'snapshot'})))
l2update = pd.DataFrame(list(input_data.find({'type':'l2update'})))
match = pd.DataFrame(list(input_data.find({'type':'match'})))

last_match = pd.DataFrame(list(input_data.find({'type':'last_match'})))
subscriptions = pd.DataFrame(list(input_data.find({'type':'subscriptions'})))
error = pd.DataFrame(list(input_data.find({'type':'error'})))
    # Error for unsubscribe message is issue with GDAX python API websocket call

In [7]:
snapshot.head()

Unnamed: 0,_id,asks,bids,product_id,type
0,5ad3dc2ee3ae711d3ca939a4,"[[8337.67, 35.73324449], [8337.7, 1], [8337.71...","[[8337.66, 1.92259307], [8337, 0.001], [8336, ...",BTC-USD,snapshot


In [8]:
l2update.head()

Unnamed: 0,_id,changes,product_id,time,type
0,5ad3dc2ee3ae711d3ca939a7,"[[buy, 8300.00000000, 33.48472626]]",BTC-USD,2018-04-15T23:11:43.047Z,l2update
1,5ad3dc2ee3ae711d3ca939a8,"[[sell, 8353.46000000, 0.046]]",BTC-USD,2018-04-15T23:11:43.060Z,l2update
2,5ad3dc2ee3ae711d3ca939a9,"[[buy, 8310.42000000, 0]]",BTC-USD,2018-04-15T23:11:43.062Z,l2update
3,5ad3dc2ee3ae711d3ca939aa,"[[sell, 8359.00000000, 6.151]]",BTC-USD,2018-04-15T23:11:43.065Z,l2update
4,5ad3dc2ee3ae711d3ca939ac,"[[buy, 8300.40000000, 2]]",BTC-USD,2018-04-15T23:11:43.071Z,l2update


In [9]:
match.head()

Unnamed: 0,_id,maker_order_id,price,product_id,sequence,side,size,taker_order_id,time,trade_id,type
0,5ad3dc30e3ae711d3ca93aac,ce82a35b-53cd-4985-8f1d-5da12aa58f48,8337.67,BTC-USD,5686020611,sell,1.08279483,e65b1a76-cc10-489a-adfe-856f106304cf,2018-04-15T23:11:45.752000Z,41700566,match
1,5ad3dc33e3ae711d3ca93b91,97e82bf6-c071-48d7-9810-0cf52e9a3041,8337.66,BTC-USD,5686020956,buy,1.7527921,77393178-0068-4a3a-94e9-0e0f8b593251,2018-04-15T23:11:49.142000Z,41700567,match
2,5ad3dc33e3ae711d3ca93b93,f1ba4337-522e-4238-9d8f-83359339aa3b,8337.66,BTC-USD,5686020958,buy,0.1,77393178-0068-4a3a-94e9-0e0f8b593251,2018-04-15T23:11:49.142000Z,41700568,match
3,5ad3dc33e3ae711d3ca93b95,1839d6c8-ac26-4192-ac9d-b5a588173c08,8337.66,BTC-USD,5686020960,buy,0.06850097,77393178-0068-4a3a-94e9-0e0f8b593251,2018-04-15T23:11:49.142000Z,41700569,match
4,5ad3dc33e3ae711d3ca93b97,112560eb-aa03-4d67-9e45-801a1531f888,8337.66,BTC-USD,5686020962,buy,0.0013,77393178-0068-4a3a-94e9-0e0f8b593251,2018-04-15T23:11:49.142000Z,41700570,match


In [10]:
last_match.head()

Unnamed: 0,_id,maker_order_id,price,product_id,sequence,side,size,taker_order_id,time,trade_id,type
0,5ad3dc2ee3ae711d3ca939a5,97e82bf6-c071-48d7-9810-0cf52e9a3041,8337.66,BTC-USD,5686020020,buy,0.2359079,f87435c9-9d7d-485b-b52a-ca76b0bec88d,2018-04-15T23:11:40.934000Z,41700565,last_match


In [11]:
subscriptions['channels'][0]

[{'name': 'level2', 'product_ids': ['BTC-USD']},
 {'name': 'matches', 'product_ids': ['BTC-USD']}]

In [12]:
# Format snapshot dataframe (snapshot is initial snapshot of L2 orderbook state)
snapshot.head()

Unnamed: 0,_id,asks,bids,product_id,type
0,5ad3dc2ee3ae711d3ca939a4,"[[8337.67, 35.73324449], [8337.7, 1], [8337.71...","[[8337.66, 1.92259307], [8337, 0.001], [8336, ...",BTC-USD,snapshot


In [35]:
# Extract asks/bid individual column of array of arrays into lists
snapshot_asks = snapshot[['asks'][0]][0]
snapshot_bids = snapshot[['bids'][0]][0]

# Convert list (of array of arrays) into dataframe
snapshot_asks_df =pd.DataFrame(snapshot_asks)
snapshot_bids_df =pd.DataFrame(snapshot_bids)
   
# Rename columns to snapshot array format:
    # snapshot array format: [price, size]
    # Ask = sell price, bid = buy price
snapshot_asks_df.rename(columns ={0:'price',1:'size'}, inplace =True)
snapshot_bids_df.rename(columns ={0:'price',1:'size'}, inplace =True)

In [36]:
snapshot_asks_df.head()

Unnamed: 0,price,size
0,8337.67,35.73324449
1,8337.7,1.0
2,8337.71,1.19825
3,8339.77,0.0486
4,8339.8,0.0985


In [37]:
snapshot_bids_df.head()

Unnamed: 0,price,size
0,8337.66,1.92259307
1,8337.0,0.001
2,8336.0,0.001
3,8335.83,0.9
4,8335.65,0.01


In [13]:
l2update = l2update[['_id','changes','time']]
l2update.head()

Unnamed: 0,_id,changes,time
0,5ad3dc2ee3ae711d3ca939a7,"[[buy, 8300.00000000, 33.48472626]]",2018-04-15T23:11:43.047Z
1,5ad3dc2ee3ae711d3ca939a8,"[[sell, 8353.46000000, 0.046]]",2018-04-15T23:11:43.060Z
2,5ad3dc2ee3ae711d3ca939a9,"[[buy, 8310.42000000, 0]]",2018-04-15T23:11:43.062Z
3,5ad3dc2ee3ae711d3ca939aa,"[[sell, 8359.00000000, 6.151]]",2018-04-15T23:11:43.065Z
4,5ad3dc2ee3ae711d3ca939ac,"[[buy, 8300.40000000, 2]]",2018-04-15T23:11:43.071Z


In [14]:
match.head()

Unnamed: 0,_id,maker_order_id,price,product_id,sequence,side,size,taker_order_id,time,trade_id,type
0,5ad3dc30e3ae711d3ca93aac,ce82a35b-53cd-4985-8f1d-5da12aa58f48,8337.67,BTC-USD,5686020611,sell,1.08279483,e65b1a76-cc10-489a-adfe-856f106304cf,2018-04-15T23:11:45.752000Z,41700566,match
1,5ad3dc33e3ae711d3ca93b91,97e82bf6-c071-48d7-9810-0cf52e9a3041,8337.66,BTC-USD,5686020956,buy,1.7527921,77393178-0068-4a3a-94e9-0e0f8b593251,2018-04-15T23:11:49.142000Z,41700567,match
2,5ad3dc33e3ae711d3ca93b93,f1ba4337-522e-4238-9d8f-83359339aa3b,8337.66,BTC-USD,5686020958,buy,0.1,77393178-0068-4a3a-94e9-0e0f8b593251,2018-04-15T23:11:49.142000Z,41700568,match
3,5ad3dc33e3ae711d3ca93b95,1839d6c8-ac26-4192-ac9d-b5a588173c08,8337.66,BTC-USD,5686020960,buy,0.06850097,77393178-0068-4a3a-94e9-0e0f8b593251,2018-04-15T23:11:49.142000Z,41700569,match
4,5ad3dc33e3ae711d3ca93b97,112560eb-aa03-4d67-9e45-801a1531f888,8337.66,BTC-USD,5686020962,buy,0.0013,77393178-0068-4a3a-94e9-0e0f8b593251,2018-04-15T23:11:49.142000Z,41700570,match


In [41]:
# Save test data to .csv format
    #https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_csv.html
data.to_csv("test_data/raw_mongo_input.csv",header = True,encoding='utf-8',index =False)

l2update.to_csv("test_data/l2update.csv",header=True,encoding='utf-8',index =False)
snapshot_asks_df.to_csv("test_data/snapshot_asks.csv",header=True,encoding='utf-8',index =False)
snapshot_bids_df.to_csv("test_data/snapshot_bids.csv",header=True,encoding='utf-8',index =False)

last_match.to_csv("test_data/last_match.csv",header=True,encoding='utf-8')
match.to_csv("test_data/match.csv",header=True,encoding='utf-8',index =False)
subscriptions.to_csv("test_data/subscriptions.csv",header=True,encoding='utf-8',index =False)

In [42]:
l2update.head()

Unnamed: 0,_id,changes,time
0,5ad3dc2ee3ae711d3ca939a7,"[[buy, 8300.00000000, 33.48472626]]",2018-04-15T23:11:43.047Z
1,5ad3dc2ee3ae711d3ca939a8,"[[sell, 8353.46000000, 0.046]]",2018-04-15T23:11:43.060Z
2,5ad3dc2ee3ae711d3ca939a9,"[[buy, 8310.42000000, 0]]",2018-04-15T23:11:43.062Z
3,5ad3dc2ee3ae711d3ca939aa,"[[sell, 8359.00000000, 6.151]]",2018-04-15T23:11:43.065Z
4,5ad3dc2ee3ae711d3ca939ac,"[[buy, 8300.40000000, 2]]",2018-04-15T23:11:43.071Z


In [43]:
l2update['changes'][0]

[['buy', '8300.00000000', '33.48472626']]

In [47]:
l2update['changes'][0][0]

['buy', '8300.00000000', '33.48472626']

In [50]:
l2update['changes'][0][0][0]
l2update['changes'][0][0][1]
l2update['changes'][0][0][2]

'33.48472626'