In [51]:
import json
from functools import lru_cache
from operator import itemgetter
from typing import List
import boto3
import pandas as pd
import requests
from datetime import datetime
from pydantic import BaseModel, confloat, AnyUrl,conint
from ratelimit import RateLimitException, limits
from tenacity import *
from yarl import URL
import awswrangler as wr
import cryptocompare

In [52]:
crypto_compare_key = "6188db038cad874de5eb7da3821490d45967ae07911bb365904d2b1759400ea4"
cryptocompare.cryptocompare._set_api_key_parameter(crypto_compare_key)

'&api_key=6188db038cad874de5eb7da3821490d45967ae07911bb365904d2b1759400ea4'

In [53]:
session = boto3.Session(profile_name='master')
dynamo = session.resource('dynamodb')
s3_client = session.client('s3')

In [54]:
list_of_coins = [
    "BTC",
    "ETH",
    "BUSD",
    "USDT",
    "XRP",
    "SOL",
    "BNB",
    "DOT",
    "SHIB",
    "LTC"
  ]
list_of_currencies = ['USD']

In [55]:
df = wr.s3.read_csv('s3://kosmobiker-masterproject/data/coin_list/table/', boto3_session=session)


In [58]:
tmp_df = df[df['Name'].isin(list_of_coins)][['Name', 'ContentCreatedOn']]
created_on = dict(zip(tmp_df.Name,tmp_df.ContentCreatedOn))
created_on

{'BTC': 1417635237,
 'DOT': 1593791455,
 'USDT': 1499420073,
 'BUSD': 1569227610,
 'BNB': 1501700700,
 'SHIB': 1618843684,
 'SOL': 1586353077,
 'XRP': 1428671962,
 'LTC': 1426004323,
 'ETH': 1438259812}

In [154]:
def get_historical_data(coin:str,
                        cur:str,
                        created_on:dict,
                        ts=int(datetime.now().timestamp()),
                        limit=2000):
    data = []
    done = False
    while not done:
        try:
            tmp_json = cryptocompare.get_historical_price_hour(coin, cur, limit=limit, toTs=ts)
            if ts < created_on[coin]:
                done = True
            else:
                data.extend(tmp_json)
                ts -= 3600000
                done = False
        except Exception as err:
            print(err)
            done = True
    df = pd.DataFrame(data)
    df['coin'] = coin
    df['currency'] = cur
    return df 

In [155]:
df = get_historical_data('ETH', "USD", created_on)

In [156]:
df['date_time'] = pd.to_datetime(df['time'], unit='s', errors='raise')
df['year'] = df['date_time'].dt.year
df['month'] = df['date_time'].dt.month
df['day'] = df['date_time'].dt.day
df['hour'] = df['date_time'].dt.hour
df['minute'] = df['date_time'].dt.minute
df['delta'] = (df['close'] - df['open'])*100/df['open']
df['partition_col'] = df['date_time'].dt.strftime('%Y')
df.rename(mapper={
        'time' : 'time_stamp',
        'volumefrom' : 'volume_fsym',
        'volumeto' : 'volume_tsym',
        'coin' : 'ticker'
            }, axis=1, inplace=True)

df

Unnamed: 0,time_stamp,high,low,open,volume_fsym,volume_tsym,close,conversionType,conversionSymbol,ticker,currency,date_time,year,month,day,hour,minute,delta,partition_col
0,1653206400,1983.220,1971.020,1973.320,2852.21,5639338.82,1981.820,direct,,ETH,USD,2022-05-22 08:00:00,2022,5,22,8,0,0.430746,2022
1,1653210000,2030.460,1981.820,1981.820,13354.09,26804068.70,2026.460,direct,,ETH,USD,2022-05-22 09:00:00,2022,5,22,9,0,2.252475,2022
2,1653213600,2034.570,2023.560,2026.460,6404.42,12993160.61,2025.670,direct,,ETH,USD,2022-05-22 10:00:00,2022,5,22,10,0,-0.038984,2022
3,1653217200,2032.720,2010.070,2025.670,7515.08,15180390.62,2014.780,direct,,ETH,USD,2022-05-22 11:00:00,2022,5,22,11,0,-0.537600,2022
4,1653220800,2029.800,2014.600,2014.780,4776.57,9667772.35,2025.410,direct,,ETH,USD,2022-05-22 12:00:00,2022,5,22,12,0,0.527601,2022
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
124057,1440792000,1.204,1.198,1.192,15173.67,18234.33,1.202,multiply,BTC,ETH,USD,2015-08-28 20:00:00,2015,8,28,20,0,0.838926,2015
124058,1440795600,1.205,1.201,1.202,9948.66,11973.10,1.203,multiply,BTC,ETH,USD,2015-08-28 21:00:00,2015,8,28,21,0,0.083195,2015
124059,1440799200,1.199,1.190,1.203,17182.73,20510.67,1.194,multiply,BTC,ETH,USD,2015-08-28 22:00:00,2015,8,28,22,0,-0.748130,2015
124060,1440802800,1.194,1.188,1.194,13116.15,15627.61,1.191,multiply,BTC,ETH,USD,2015-08-28 23:00:00,2015,8,28,23,0,-0.251256,2015


In [163]:
def load(pandas_df: pd.DataFrame, path_table, database_name, table_name, col_partition):
    # try:
        wr.s3.to_parquet(
            df=pandas_df,
            index=False,
            path=path_table,
            dataset=True,
            database=database_name,
            compression='snappy',
            table=table_name,
            mode="append",
            partition_cols=col_partition,
            use_threads=True,
            concurrent_partitioning=True,
            boto3_session=session
        )
    # except Exception as err:
    #     print(err)

In [165]:
load(df, "s3://kosmobiker-masterproject/data/ohlc_data", "darhevich_data_lake", "ohlc_data", ['partition_col'])