In [1]:
#Checking to see if cloud instance is working :) 
print('Hello')

Hello


In [3]:
from datetime import datetime, timedelta
import time
import pandas as pd
import numpy as np

## Data Processing and Mining with Big Query

In [4]:
#Creating a credential object from a specific key_path where the credential json file is located
from google.cloud import bigquery
from google.oauth2 import service_account
key_path='../credential/Xpring Dev Sandbox-edb2a98acf17.json'
credentials = service_account.Credentials.from_service_account_file(
    key_path,
    scopes=["https://www.googleapis.com/auth/cloud-platform"],
)

In [5]:
key_path

'../credential/Xpring Dev Sandbox-edb2a98acf17.json'

In [6]:
#Checking to see if credential file exists
import os.path
from os import path

path.exists(key_path)

True

In [7]:
def gbq_query(query,cred, query_params=None):
    """
    Run a query against Google Big Query, returning a pandas dataframe of the result.

    Parameters
    ----------
    query: str
        The query string
    cred: obj
        Credential class instance
    
    query_params: list, optional
        The query parameters to pass into the query string
    """
    client = bigquery.Client(credentials=cred)
    job_config = bigquery.QueryJobConfig()
    job_config.query_parameters = query_params
    return client.query(query, job_config=job_config).to_dataframe()

In [8]:
#Dates that query data will fall between
start_date='2019-12-15'
end_date='2019-12-27'

In [9]:
#Query and query_parameters
query = """
Select 
  TIMESTAMP(l.CloseTime) as `TimeStamp`,
  t.Account,t.Destination,t.Fee,t.TxnSignature,t.AmountXRP,t.LedgerIndex
FROM
  `xrpledgerdata.fullhistory.transactions`t
JOIN
  `xrpledgerdata.fullhistory.ledgers` l
  on t.LedgerIndex=l.LedgerIndex
where t.TransactionResult = "tesSUCCESS"
AND TxnSignature IS NOT NULL
AND Destination IS NOT NULL
AND AmountXRP  IS NOT NULL
AND TIMESTAMP(l.CloseTime) >=TIMESTAMP(@start_date)
AND TIMESTAMP(l.CloseTime) <=TIMESTAMP(@end_date)


ORDER BY TimeStamp
LIMIT 1000;
"""
query_params = [
    bigquery.ScalarQueryParameter("start_date", "STRING", start_date),
    bigquery.ScalarQueryParameter("end_date", "STRING", end_date)
]


In [10]:
xrp = gbq_query(query,credentials,query_params)
xrp

Unnamed: 0,TimeStamp,Account,Destination,Fee,TxnSignature,AmountXRP,LedgerIndex
0,2019-12-15 00:00:00+00:00,rPsmHDMkheWZvbAkTA8A9bVnUdadPn7XBK,rUMhAy8zECaY9u6ntqTAXFu1t8gixCeHka,10,304502210089A1985CB7AF1F667C61405FB4A609F06DEB...,48560300000,52058308
1,2019-12-15 00:00:01+00:00,rEvoRfi83Hc5gxqpwRtc9jh7Z4C2hECCWY,rnuPTVikw8HKK4hBGCtnq2J2433VYaZPZQ,12,304402203E8C80A7E4F36EEB754BCA9EF3CB2D5B2BFA08...,999999,52058309
2,2019-12-15 00:00:01+00:00,rw2ciyaNshpHe7bCHo4bRWq6pqqynnWKQg,rLNaPoKeeBjZe2qs6x52yVPZpZ8td4dc6w,40,3045022100AEF609203571841221F6D13CD5991EE66AAF...,50000083642,52058309
3,2019-12-15 00:00:10+00:00,ryBANkk28Mj71jRKAkt13U1X9ubztsGWZ,rwU8rAiE2eyEPz3sikfbHuqCuiAtdXqa2v,7707,304402203D711CFEB876FFC22375F0C8C3CC3B1DB8952D...,15728776000,52058310
4,2019-12-15 00:00:11+00:00,rLNaPoKeeBjZe2qs6x52yVPZpZ8td4dc6w,rwpMvfxoodXggJ1g4qv6MWAPQqWDwQyHUW,40,3045022100F4BEBE96EF6610512BD07D789C1E2629342F...,1162250116,52058311
...,...,...,...,...,...,...,...
995,2019-12-15 01:20:40+00:00,rJb5KsHsDHF1YS5B5DU6QCkH5NsPaKQTcy,rU7xJs7QmjbiyxpEozNYUFQxaRD5kueY7z,200000,3044022042E953DD34BC42DFCB67071FF8A5572F1A9307...,96790306,52059536
996,2019-12-15 01:20:40+00:00,rJb5KsHsDHF1YS5B5DU6QCkH5NsPaKQTcy,rMdG3ju8pgyVh29ELPWaDuA74CpWW6Fxns,200000,3044022015EE58FA708018EF55241EDD23281D95989382...,82766900,52059536
997,2019-12-15 01:20:40+00:00,rJb5KsHsDHF1YS5B5DU6QCkH5NsPaKQTcy,rDsbeomae4FXwgQTJp9Rs64Qg9vDiTCdBv,200000,3044022043BB41D7229D7433B594C3DD866F46BC6FA951...,4636750000,52059536
998,2019-12-15 01:20:40+00:00,rJb5KsHsDHF1YS5B5DU6QCkH5NsPaKQTcy,rQrQMKhcw3WnptGeWiYSwX5Tz3otyJqPnq,200000,3044022019F15D308AE593BF1EE0E5D7DC89924E605CD1...,82966700,52059536


In [11]:
def datetotime(col):
    #returns a list of datetime objects
    #takes in a df column
    x=[]
    for entry in col:
        x.append(datetime.strptime(entry,'%Y-%m-%d %H:%M:%S'))
    return x

In [12]:
#Replaces xrp timestamp list with a list of strings
xrp['TimeStamp']= [str(x)[:19] for x in xrp['TimeStamp']]

In [13]:
xrp['TimeStamp'][1]

'2019-12-15 00:00:01'

In [14]:
#Converts strings into datetime objects using strptime
xrp['TimeStamp']=datetotime(xrp['TimeStamp'])

In [18]:
xrp['LedgerIndex'][0]

52058308

In [30]:
xrp.loc(0)[4]

TimeStamp                                     2019-12-15 00:00:11
Account                        rLNaPoKeeBjZe2qs6x52yVPZpZ8td4dc6w
Destination                    rwpMvfxoodXggJ1g4qv6MWAPQqWDwQyHUW
Fee                                                            40
TxnSignature    3045022100F4BEBE96EF6610512BD07D789C1E2629342F...
AmountXRP                                              1162250116
LedgerIndex                                              52058311
Name: 4, dtype: object

## CloseTime Distribution

In [38]:
def plotclosetimedist(data):
    UniqueLedgerIndex=np.unique(data['LedgerIndex'])
    closetimezeros=[0]*(len(UniqueLedgerIndex)-1)
    dicCloseTime = {k:v for k,v in zip(UniqueLedgerIndex,closetimezeros)}
    
    curledger=data['LedgerIndex'][0]
    curtime=data['TimeStamp'][0]
    for index,row in data.iterrows():
        if row['LedgerIndex']!=curledger:
            dicCloseTime[row['LedgerIndex']]=row['TimeStamp']-curtime
            curledger=row['LedgerIndex']
            curtime=row['TimeStamp']
        else:
            None
    return dicCloseTime


        
        
    

In [39]:
plotclosetimedist(xrp)

{52058308: 0,
 52058309: Timedelta('0 days 00:00:01'),
 52058310: Timedelta('0 days 00:00:09'),
 52058311: Timedelta('0 days 00:00:01'),
 52058312: Timedelta('0 days 00:00:01'),
 52058313: Timedelta('0 days 00:00:08'),
 52058314: Timedelta('0 days 00:00:01'),
 52058315: Timedelta('0 days 00:00:09'),
 52058316: Timedelta('0 days 00:00:01'),
 52058317: Timedelta('0 days 00:00:01'),
 52058320: Timedelta('0 days 00:00:18'),
 52058321: Timedelta('0 days 00:00:01'),
 52058322: Timedelta('0 days 00:00:01'),
 52058323: Timedelta('0 days 00:00:08'),
 52058325: Timedelta('0 days 00:00:10'),
 52058326: Timedelta('0 days 00:00:01'),
 52058327: Timedelta('0 days 00:00:01'),
 52058328: Timedelta('0 days 00:00:08'),
 52058329: Timedelta('0 days 00:00:01'),
 52058330: Timedelta('0 days 00:00:09'),
 52058331: Timedelta('0 days 00:00:01'),
 52058336: Timedelta('0 days 00:00:19'),
 52058337: Timedelta('0 days 00:00:01'),
 52058338: Timedelta('0 days 00:00:09'),
 52058340: Timedelta('0 days 00:00:02'),
 5

## Calculating Sliding Window Account Balances

In [15]:
#Creating a merged a numpy array of unique account and destination hashes
UniqueAccount=np.unique(xrp['Account'])
UniqueDestination=np.unique(xrp['Destination'])
UniqueHash=np.unique(np.concatenate([UniqueDestination, UniqueAccount]))

In [16]:
#Creating a dictionary with account hashes as keys and account balances initialized at 0 as the values
#The value is an array of length 24 for each increment of time window that will be computed ()
UniqueHashList=UniqueHash.tolist()
listofzeros = [[0] *24 ]*len(UniqueHashList)
dicAccBal = {k:v for k,v in zip(UniqueHashList,listofzeros)}



In [112]:
#for each unique account hash
for each in UniqueHashList:
    #looking at 1 hour time windows from 0-23 hours from startime (which is the first rows time, currently static)
    for i in range(1,25):
        starttime = xrp['TimeStamp'][0]
        endtime = timedelta(hours = i+1 ) + starttime
        #iterate through each transaction
        for index, row in xrp.iterrows():
            #Checking if the transaction's timestamp is within time window specified by starttime and endtime
            if (row['TimeStamp'] < endtime) & (row['TimeStamp'] > starttime):
                #Adding account balances to respective dictionary balance
                dicAccBal[row['Account']][i] += (-1) * (row['AmountXRP'])
                dicAccBal[row['Destination']][i] += (1) * (row['AmountXRP'])

KeyboardInterrupt: 