In [1]:
import json
from functools import lru_cache
from operator import itemgetter
from typing import List
import boto3
import pandas as pd
import requests
from datetime import datetime
from pydantic import BaseModel, confloat, AnyUrl,conint
from ratelimit import RateLimitException, limits
from tenacity import *
from yarl import URL
import awswrangler as wr

In [2]:
session = boto3.Session(profile_name='master')
dynamo = session.resource('dynamodb')
s3_client = session.client('s3')

In [3]:
API_ENDPOINT = URL("https://api.coingecko.com/api/v3")
@retry(
    retry=retry_if_exception_type(requests.exceptions.RequestException),
    wait=wait_exponential(max=60),
    stop=stop_after_attempt(3),
)
@retry(
    retry=retry_if_exception_type(RateLimitException),
    wait=wait_fixed(60),
    stop=stop_after_delay(360),
)
@limits(calls=50, period=60)
def call_get(url: URL, params: dict) -> str:
    return requests.get(url.update_query(params)).text

class MarketDataInfo(BaseModel):
    id: str
    symbol: str
    current_price: float
    market_cap: int

In [4]:
params = {
    'vs_currency' : "usd",
    'days' : "1"
}
res = json.loads(call_get(API_ENDPOINT / "coins" / 'bitcoin' / 'ohlc', params))

In [9]:
df = pd.DataFrame(res, columns=['timestamp', 'open', 'high', 'low', 'close'])
df['datetime'] = pd.to_datetime(df['timestamp'], unit='ms')
df['coin'] = 'bitcoin'
df['formated_date'] = df['datetime'].dt.strftime('%Y-%m-%d')

In [10]:
r2 = json.loads(call_get(API_ENDPOINT / "coins" / 'bitcoin' / "market_chart", {"vs_currency" : "usd", "days" : "max"}))

In [107]:
def transform_json_dataframe(data: dict):
    ts = [i[0] for i in data['prices']]
    prices = [i[1] for i in data['prices']]
    market_caps = [i[1] for i in data['market_caps']]
    total_volumes = [i[1] for i in data['total_volumes']]
    temp = pd.DataFrame({
        "timestamp" : ts,
        "prices" : prices,
        "market_caps" : market_caps,
        "total_volumes" : total_volumes,
        "formated_date" : [datetime.fromtimestamp(i/1000).strftime('%Y-%m-%d') for i in ts]
        })
    temp['coin'] = 'bitcoin'
    temp['currency'] = 'usd'
    return temp


In [226]:
from boto3.dynamodb.conditions import Key

def convert_lists(response):
    paths = [i['PathS3'] for i in response]
    tokens = [i['TokenName'] for i in response]
    return [list(l) for l in zip(paths, tokens)]


def scan_paths_table(type, dynamodb=None):
    if not dynamodb:
        dynamodb = boto3.resource('dynamodb')

    table = dynamodb.Table('Pathes')
    scan_kwargs = {
        'FilterExpression': Key('TypeOfRecord').eq(type),
        'ProjectionExpression': "#pth, #tp, #tkn",
        'ExpressionAttributeNames': {"#pth": "PathS3", "#tp": "TypeOfRecord", "#tkn" : "TokenName"}
    }
    results = []
    done = False
    start_key = None
    while not done:
        if start_key:
            scan_kwargs['ExclusiveStartKey'] = start_key
        response = table.scan(**scan_kwargs)
        results.extend(convert_lists(response.get('Items', [])))
        start_key = response.get('LastEvaluatedKey', None)
        done = start_key is None

    return results


In [189]:
def get_json_s3(bucket, key, s3=None):
    """
    Function is used to get JSON file
    from S3 bucket
    """
    if not s3:
        s3 = boto3.client('s3')
    obj = s3.get_object(Bucket=bucket, Key=key)
    data = obj['Body'].read().decode('utf-8')
    return json.loads(data)

In [190]:
ddd = get_json_s3('kosmobiker-masterproject', 'data/raw/baby-samo-coin_historical_prices.json', s3_client)
df = transform_json_dataframe(ddd)

In [7]:
# wr.catalog.tables(database="master_project", boto3_session=session)
# wr.catalog.databases(boto3_session=session)
# wr.catalog.table(database="master_project", table="historical_data", boto3_session=session)
# for table in wr.catalog.get_tables(database="master_project", boto3_session=session):
#     wr.catalog.delete_table_if_exists(database="master_project", table=table["Name"], boto3_session=session)

Unnamed: 0,Column Name,Type,Partition,Comment
0,timestamp,bigint,False,
1,prices,double,False,
2,market_caps,double,False,
3,total_volumes,double,False,
4,formated_date,string,False,
5,currency,string,False,
6,coin,string,True,


In [224]:
# client = session.client('glue')

# response = client.get_tables(
#     DatabaseName='master_project'
# )
# response

In [11]:
wr.athena.repair_table('historical_data', 'master_project', boto3_session=session)

'SUCCEEDED'

In [7]:
dfs

Unnamed: 0,coin,timestamp,prices,market_caps,total_volumes,formated_date,currency
0,blink,1606176000000,0.013330,0.0,281865.620149,2020-11-24,usd
1,blink,1606262400000,0.013330,0.0,281865.620149,2020-11-25,usd
2,blink,1606348800000,0.016512,0.0,366310.890302,2020-11-26,usd
3,blink,1606435200000,0.012259,0.0,209744.470799,2020-11-27,usd
4,blink,1606521600000,0.012538,0.0,114575.793299,2020-11-28,usd
...,...,...,...,...,...,...,...
500,blink,1649376000000,0.005348,0.0,471.902531,2022-04-08,usd
501,blink,1649462400000,0.004900,0.0,3011.241105,2022-04-09,usd
502,blink,1649548800000,0.004849,0.0,788.728846,2022-04-10,usd
503,blink,1649635200000,0.004794,0.0,1433.303927,2022-04-11,usd


In [12]:
def currencies_info(data: dict, list_of_keys: List[str]):
    filtered_json = {key: data[key] for key in list_of_keys}
    temp_df = pd.json_normalize(filtered_json)
    temp_df['timestamp'] = datetime.now().timestamp()


In [93]:
list_of_keys = ['id', 'symbol', 'name', 'hashing_algorithm', 'categories', 'genesis_date',  'sentiment_votes_up_percentage',
                'sentiment_votes_down_percentage', 'market_cap_rank', 'coingecko_rank', 'coingecko_score', 'developer_score',
                'community_score', 'liquidity_score', 'public_interest_score', 'last_updated']