In [None]:
import urllib.request
import pandas as pd
import json
import numpy as np
import datetime
import time
import os

BASE_URL         = "https://min-api.cryptocompare.com/data/histominute" 
CURRENCY         = "USD"
APP_NAME         = "HES_SO_master_crypto_analysis"

file = open("current_crypto.txt", "r")
if file:
    CRYPTO_SYMBOL = file.read() 
file.close()
if CRYPTO_SYMBOL is None:
    CRYPTO_SYMBOL    = 'ETH' 
print(CRYPTO_SYMBOL)
MAX_LIMIT        = 2000 

## personal config
FILE_PATH        = f"data/crypto/{CRYPTO_SYMBOL}" 
SEP_CHAR         = '~' 
ENVS             = ['CRYPTO', 'MOST_RECENT_FILE', 'MOST_RECENT_FILE_LINE_COUNT', 'MOST_RECENT_TS'] 
MAX_ROW_PER_FILE = 10000 

In [None]:
def get_data_minutely(toTs, limit, crypto_symbol):
    contents = urllib.request.urlopen(
        f"{BASE_URL}?fsym={crypto_symbol}&tsym={CURRENCY}&limit={limit}&toTs={toTs}&extraParams={APP_NAME}"
    ).read()
    json_string = contents.decode("utf-8")
    obj = json.loads(json_string)
    df = pd.DataFrame.from_dict(obj['Data'])
    if not df.empty:
        return df.drop(['volumefrom', 'volumeto'], axis=1)
    return df

def get_var(key, crypto):
    df_var = pd.read_csv("data/crypto/var_crypto.csv", sep=',',
                         dtype={'MOST_RECENT_TS': np.int32, 'MOST_RECENT_FILE_LINE_COUNT': np.int32})
    return df_var[key].loc[df_var['CRYPTO'] == crypto].values[0]

def update_var(key, value, crypto):
    df_var = pd.read_csv("data/crypto/var_crypto.csv", sep=',',
                         dtype={'MOST_RECENT_TS': np.int32, 'MOST_RECENT_FILE_LINE_COUNT': np.int32})
    df_var[key].loc[df_var['CRYPTO'] == crypto] = str(value)
    df_var.to_csv("data/crypto/var_crypto.csv", index=False)
    
def add_new_crypto(crypto):
    df_var = pd.read_csv("data/crypto/var_crypto.csv", sep=',',
                         dtype={'MOST_RECENT_TS': np.int32, 'MOST_RECENT_FILE_LINE_COUNT': np.int32})
    if df_var[ENVS[0]].loc[df_var['CRYPTO'] == crypto].empty:
        new_line = pd.DataFrame([[crypto,-1,"",0]], columns=ENVS)
        df_var = df_var.append(new_line)
        df_var.to_csv("data/crypto/var_crypto.csv", index=False)

In [None]:
add_new_crypto(CRYPTO_SYMBOL)

In [None]:
df_histo = get_data_minutely(-1, 10, CRYPTO_SYMBOL)
print(df_histo['time'].iloc[-1])
print(datetime.datetime.fromtimestamp(int(df_histo['time'].iloc[0])).strftime('%Y-%m-%d %H:%M:%S'))
print(datetime.datetime.fromtimestamp(int(df_histo['time'].iloc[-1])).strftime('%Y-%m-%d %H:%M:%S'))
print(df_histo.shape)
df_histo.head(3)

In [None]:
total_wished = 60 * 24 * 7
toTs = -1
df_historical = pd.DataFrame()
last_ts = get_var(ENVS[3], CRYPTO_SYMBOL)
last_file_line_cnt = 0
last_file = None
if last_ts > 0:
    last_file = get_var(ENVS[1], CRYPTO_SYMBOL)
    last_file_line_cnt = get_var(ENVS[2], CRYPTO_SYMBOL)
    now = int(time.time())
    total_wished = int((now - last_ts)/60) 

while(total_wished > 0):
    limit = min(total_wished, MAX_LIMIT)
    df = get_data_minutely(toTs, limit, CRYPTO_SYMBOL)
    df_historical = df_historical.append(df)
    min_time = df['time'].iloc[0]
    max_time = df['time'].iloc[-1]
    print('range=', datetime.datetime.fromtimestamp(min_time).strftime('%Y-%m-%d %H:%M:%S'),
         datetime.datetime.fromtimestamp(max_time).strftime('%Y-%m-%d %H:%M:%S'))
    toTs = min_time
    total_wished = total_wished - df.shape[0]

if not df_historical.empty:

    df_historical = df_historical.sort_values(by=['time'])

    print(last_file_line_cnt)
    available_lines = MAX_ROW_PER_FILE - last_file_line_cnt
    df_to_fulfill = df_historical.head(available_lines)
    df_remaining  = df_historical.tail(df_historical.shape[0] - df_to_fulfill.shape[0])

    print('df_to_fulfill', df_to_fulfill.shape)
    print('df_remaining', df_remaining.shape)

    if not df_to_fulfill.empty:
        from_str = datetime.datetime.fromtimestamp(df_to_fulfill['time'].iloc[0]).strftime('%Y-%m-%d %H-%M-%S')
        max_time = df_to_fulfill['time'].iloc[-1]
        to_str = datetime.datetime.fromtimestamp(max_time).strftime('%Y-%m-%d %H-%M-%S')
        if last_file is None:
            last_file = f"{FILE_PATH}/{from_str}{SEP_CHAR}{to_str}.csv"
            df_to_fulfill.to_csv(last_file, mode='w', header=True, index=False)
        else:
            df_to_fulfill.to_csv(last_file, mode='a', header=False, index=False)
            filename = f"{last_file.split(SEP_CHAR)[0]}{SEP_CHAR}{to_str}.csv"
            os.rename(last_file, filename)
        last_file_line_cnt = last_file_line_cnt + df_to_fulfill.shape[0]
        
    list_df_remaining = [df_remaining[i:i+MAX_ROW_PER_FILE] for i in range(0, df_remaining.shape[0], MAX_ROW_PER_FILE)]
    for df_remain in list_df_remaining:
        from_str = datetime.datetime.fromtimestamp(df_remain['time'].iloc[0]).strftime('%Y-%m-%d %H-%M-%S')
        max_time = df_remain['time'].iloc[-1]
        to_str = datetime.datetime.fromtimestamp(max_time).strftime('%Y-%m-%d %H-%M-%S')
        filename = f"{FILE_PATH}/{from_str}{SEP_CHAR}{to_str}.csv"
        df_remain.to_csv(filename, index = False)
        last_file_line_cnt = df_remain.shape[0]

    update_var(ENVS[3], max_time, CRYPTO_SYMBOL)
    update_var(ENVS[1], filename, CRYPTO_SYMBOL)
    update_var(ENVS[2], last_file_line_cnt, CRYPTO_SYMBOL)

print('done')

In [None]:
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
init_notebook_mode(connected=True)

import glob

MAX_ROW = 500 

In [None]:
def get_most_recent_data_from_csv(folder, n_rows):

    files =  glob.glob(f"{folder}/*.csv")
    files = sorted(files)
    df = pd.DataFrame()
    for file in reversed(files):
        print(file)
        df = df.append(pd.read_csv(file))
        if df.shape[0] > n_rows:
            break
    return df.sort_values(by=['time']).tail(n_rows)

In [None]:
df_hist_minutes = get_most_recent_data_from_csv(FILE_PATH, MAX_ROW)
print('df shape', df_hist_minutes.shape)
from_str = datetime.datetime.fromtimestamp(df_hist_minutes['time'].iloc[0]).strftime('%Y-%m-%d %H-%M-%S')
to_str = datetime.datetime.fromtimestamp(df_hist_minutes['time'].iloc[-1]).strftime('%Y-%m-%d %H-%M-%S')
print('from', from_str, 'to', to_str)
df = df_hist_minutes
df['time'] = df_hist_minutes['time'].apply(lambda x: datetime.datetime.fromtimestamp(x).strftime('%Y-%m-%d %H:%M:%S'))
df.head(5)

In [None]:
trace = go.Candlestick(x=df.time,
                       open=df.open,
                       high=df.high,
                       low=df.low,
                       close=df.close)
data = [trace]
layout = {
    'title': f"{CRYPTO_SYMBOL} currency",
    'yaxis': {'title': 'Price USD'},
}
fig = dict(data=data, layout=layout)
iplot(fig)