In [0]:
import time 
import requests
import boto3
from datetime import date
from botocore.client import Config
import json
from pyspark.sql import SparkSession
import pyspark.sql.functions as F
from pyspark.sql.functions import *
import pandas as pd
from io import BytesIO

In [0]:
 def extract_traders_data():
    url = "https://rest.coinapi.io/v1/trades/latest"
    headers = {"X-CoinAPI-Key" : dbutils.secrets.get(scope = "crypto_key", key = "key_api")}
    response = requests.get(url, headers=headers)
    data = response.json()
    bucket_name = "crypto-currency-data-prosimplee"
    file_name = "crypto/data/bronze/" + str(date.today()) + "_crypto_.json"
    s3 = boto3.resource("s3")
    try:
        s3.Bucket(bucket_name).put_object(Key=file_name, Body=json.dumps(data), ACL='private')
        print("Success")
    except ValueError:
        print("Extract Traders Data: FAILED!")
    
extract_traders_data()

In [0]:
def silver_crypto():
    s3 = boto3.resource("s3")
    content_object = s3.Object("crypto-currency-data-prosimplee", "crypto/data/bronze/" + str(date.today()) + "_crypto_.json")
    try:
        file_content = content_object.get()["Body"].read().decode("utf-8")
        json_content = json.loads(file_content)
        crypto_silver_data = []
        for crypto_raw in json_content:
            crypto_silver_data.append({
                "user_id": crypto_raw["uuid"],    
                "symbol_id": crypto_raw["symbol_id"], 
                "action": crypto_raw["taker_side"], 
                "size": crypto_raw["size"],
                "time_exchange": crypto_raw["time_exchange"]})
        
        crypto_silver_df = pd.DataFrame(crypto_silver_data)   
        bucket_name = "crypto-currency-data-prosimplee"
        file_name = "crypto/data/silver/" + str(date.today()) + "_crypto_.parquet"
        s3 = boto3.resource("s3")
        out_buffer = BytesIO()
        try:
            crypto_silver_df.to_parquet(out_buffer, index=False)   
            s3.Bucket(bucket_name).put_object(Key=file_name, Body=out_buffer.getvalue(), ACL="private") 
        except ValueError:
            print("Parquet traders data (silver) values into S3: FAILED!")
                                                               
    except ValueError:
        print("Connection to S3 (traders data): FAILED!")
        
silver_crypto()

In [0]:
spark = SparkSession.builder.getOrCreate()

In [0]:
golden_crypto = spark.read \
.option("inferSchema", True) \
.parquet("s3a://crypto-currency-data-prosimplee/crypto/data/silver/" + str(date.today()) + "_crypto_.parquet") \
.createOrReplaceTempView("crypto_data")

spark.conf.set("spark.databricks.io.cache.enabled", False)

golden_crypto_table = spark.table("crypto_data")

golden_crypto_table.limit(10).show()

In [0]:
def extract_symbol_from(column):
    symb_from = column.split("_")[2]
    return symb_from

sym_from_udf = F.udf(extract_symbol_from)

def extract_symbol_to(column):
    symb_to = column.split("_")[3]
    return symb_to

sym_to_udf = F.udf(extract_symbol_to)
    

In [0]:
golden_crypto = golden_crypto_table \
    .withColumn("symbol_from",sym_from_udf(golden_crypto_table.symbol_id)) \
    .withColumn("symbol_to",sym_to_udf(golden_crypto_table.symbol_id))\
    .withColumn("time_exchange", to_timestamp(F.col("time_exchange").cast("timestamp"))) 


In [0]:
golden_crypto.toPandas().head(10)

Unnamed: 0,user_id,symbol_id,action,size,time_exchange,symbol_from,symbol_to
0,94781a16-3837-49d2-b7de-8c13d9448f41,OKEX_SPOT_DMD_USDT,SELL,1.556053,2022-08-03 13:52:46.987,DMD,USDT
1,08840555-90b2-4bfa-a371-2b17a536b9e0,PHEMEX_PERP_AAVE_USD,BUY,167.0,2022-08-03 13:52:47.017,AAVE,USD
2,a6ff2d37-e945-49a6-b231-3836828613fe,CRYPTOCOM_SPOT_SRM_USDC,BUY,0.06,2022-08-03 13:52:46.945,SRM,USDC
3,28f7a3a0-568f-44b8-a975-49c1bb34bd87,CRYPTOCOM_SPOT_DOT_BTC,BUY,0.052,2022-08-03 13:52:47.003,DOT,BTC
4,f777c40b-1583-4031-bf20-fe1c5fed2c29,OKEX_SPOT_STX_USDT,SELL,127.348611,2022-08-03 13:52:46.979,STX,USDT
5,a9b6a9d0-3e01-4c09-8e17-d95990d97c8b,BINANCEFTS_PERP_FIL_USDT,BUY,2.4,2022-08-03 13:52:46.865,FIL,USDT
6,c28ae5d9-03a5-4cd3-87ec-f2c8768aea7f,BINANCEFTS_PERP_UNFI_USDT,BUY,30.7,2022-08-03 13:52:47.010,UNFI,USDT
7,74e0bc7a-df44-4e92-8460-bb189dcc5843,CRYPTOCOM_SPOT_STX_USDT,BUY,0.03,2022-08-03 13:52:46.893,STX,USDT
8,49d7ffc4-017d-4a3a-800a-c3aadc14bcfd,DIGIFINEX_SPOT_COMET_USDT_566C69,SELL,1688.5046,2022-08-03 13:13:52.000,COMET,USDT
9,7c36b583-7338-4bb3-b222-88f46ae64a7b,DIGIFINEX_SPOT_COMET_USDT_566C69,SELL,1078.2297,2022-08-03 13:16:28.000,COMET,USDT


In [0]:
db_crypto_data = golden_crypto.select(F.col("user_id"), 
                                      F.col("action"), 
                                      F.col("size"), 
                                      F.col("symbol_from"), 
                                      F.col("symbol_to"), 
                                      F.col("time_exchange"))



database = dbutils.secrets.get(scope = "database", key = "name")
table = "dbo.crypto_data"
user = dbutils.secrets.get(scope = "username", key = "usr")
password  = dbutils.secrets.get(scope = "mssql", key = "password")
server_name = dbutils.secrets.get(scope = "server", key = "name")

#write the dataframe into a sql table
db_crypto_data.write.mode("append") \
    .format("jdbc") \
    .option("url", f"jdbc:sqlserver://{server_name};databaseName={database};") \
    .option("dbtable", table) \
    .option("user", user) \
    .option("password", password) \
    .option("driver", "com.microsoft.sqlserver.jdbc.SQLServerDriver") \
    .save()

In [0]:
def via_api_extract_crypto_names():
    url = "https://rest.coinapi.io/v1/assets"
    headers = {"X-CoinAPI-Key" : dbutils.secrets.get(scope = "crypto_key", key = "key_api")}
    response = requests.get(url, headers=headers)
    data = response.json()
    bucket_name = "crypto-currency-data-prosimplee"
    file_name = "crypto/catalog/stag/crypto_catalog.json"
    s3 = boto3.resource("s3")
    try:
        s3.Bucket(bucket_name).put_object(Key=file_name, Body=json.dumps(data), ACL="private")
        print("Success")
    except ValueError:
        print("Extract Crypto Names via Api: FAILED!")
    
via_api_extract_crypto_names()

In [0]:
def from_s3_extract_crypto_names():
    s3 = boto3.resource("s3")
    content_object = s3.Object("crypto-currency-data-prosimplee", "crypto/catalog/stag/crypto_catalog.json")
    try:
        file_content = content_object.get()["Body"].read().decode("utf-8")
        json_content = json.loads(file_content)
        crypto_names = []
        for cr_n in json_content:
            try:
                dictionary_crypto = {"symbol_id" : cr_n["asset_id"], 
                                     "symbol_name" : cr_n["name"]}
                crypto_names.append(dictionary_crypto)
            except ValueError:
                print("Crypto Name ValueError!")

        crypto_name_dictionary = pd.DataFrame(crypto_names)
        bucket_name = "crypto-currency-data-prosimplee"
        file_name = "crypto/catalog/clean/crypto_catalog.parquet"
        s3 = boto3.resource("s3")
        out_buffer = BytesIO()
        try:
            crypto_name_dictionary.to_parquet(out_buffer, index=False)   
            s3.Bucket(bucket_name).put_object(Key=file_name, Body=out_buffer.getvalue(), ACL="private")
        except ValueError:
            print("Parquet Crypto Names (silver) values into S3: FAILED!")                                                        
    except ValueError:
        print("Connection to S3 (crypto names): FAILED!")
        
from_s3_extract_crypto_names()

In [0]:
crypto_names = spark.read\
.option("inferSchema", True)\
.parquet("s3a://crypto-currency-data-prosimplee/crypto/catalog/clean/crypto_catalog.parquet")\
.createOrReplaceTempView("crypto_names")

cr_names_table = spark.table("crypto_names")

cr_names_table.limit(10).show()


In [0]:
avg_size_sell = golden_crypto \
    .groupBy(F.col("symbol_from"), F.col("symbol_to"), F.col("action")) \
    .agg(F.avg(F.col("size")).alias("avg_size")) \
    .where(F.col("action") == "SELL")

avg_size_sell.limit(10).show()

In [0]:
result_sell = avg_size_sell.join(cr_names_table, avg_size_sell.symbol_from == cr_names_table.symbol_id, how = "inner").select(F.col("symbol_from"), 
                                                                                                                              F.col("symbol_name").alias("symbol_from_name"), 
                                                                                                                              F.col("symbol_to"), 
                                                                                                                              F.col("avg_size"))
sell_df = result_sell.join(cr_names_table, avg_size_sell.symbol_to == cr_names_table.symbol_id, how = "inner").select(F.col("symbol_from"),
                                                                                                                      F.col("symbol_from_name"), 
                                                                                                                      F.col("symbol_to"),
                                                                                                                      F.col("symbol_name").alias("symbol_to_name"), 
                                                                                                                      F.col("avg_size"))
sell_df.limit(10).show()

In [0]:
# Write to S3 
sell_df.write \
 .mode("OVERWRITE") \
 .option("header","true") \
 .parquet("s3a://crypto-currency-data-prosimplee/crypto/data/gold/sell_" + str(date.today()) + "_crypto_.parquet")

In [0]:
avg_size_buy = golden_crypto \
    .groupBy(F.col("symbol_from"), F.col("symbol_to"), F.col("action")) \
    .agg(F.avg(F.col("size")).alias("avg_size")) \
    .where(F.col("action") == "BUY")

avg_size_buy.limit(10).show()

In [0]:
avg_size_buy = avg_size_buy.join(cr_names_table, avg_size_buy.symbol_from == cr_names_table.symbol_id, how = "inner").select(F.col("symbol_from"), 
                                                                                                                             F.col("symbol_name").alias("symbol_from_name"), 
                                                                                                                             F.col("symbol_to"), 
                                                                                                                             F.col("avg_size"))
buy_df = avg_size_buy.join(cr_names_table, avg_size_buy.symbol_to == cr_names_table.symbol_id, how = "inner").select(F.col("symbol_from"),
                                                                                                                     F.col("symbol_from_name"), 
                                                                                                                     F.col("symbol_to"),
                                                                                                                     F.col("symbol_name").alias("symbol_to_name"), 
                                                                                                                     F.col("avg_size"))
buy_df.limit(10).show()

In [0]:
avg_size_buy.write \
 .mode("OVERWRITE") \
 .option("header","true") \
 .parquet("s3a://crypto-currency-data-prosimplee/crypto/data/gold/buy_" + str(date.today()) + "_crypto_.parquet")