# **main.py**

Make Requests to CoinLore API, cleaned data, and exported into S3


In [1]:

from requests.api import get
from pandas_functions import *
from  spark_functions import *
import pandas as pd
from dotenv import load_dotenv
import config as c
import glob
from contextlib import redirect_stdout




def main():
    """[
        - Data pipelines to extract, tranform and load data into S3 bucket
        ]
    """
    
    load_dotenv()
    # file paths
    coin_path = 'data/coins_data.csv'
    top_coins_path = 'data/top_coins.csv'
    coin_exchange_path = 'data/coin_exchange_info.csv'
    historical_data = 'data/historical_data.csv'
    historical_data_path = "data/Downloaded_data/*.csv"
    final_historical = "data/final_historical_data.csv"
    # Exchange data pipeline
    try:
        GET_all_exchanges = 'https://api.coinlore.net/api/exchanges/'
        exchange_requests = get_request(GET_all_exchanges)
        exchange_data = exchange_data_filter(exchange_requests)
        export_csv(exchange_data,coin_exchange_path)
        print(f"\n\nSucessfully get data and export out to {coin_exchange_path}!\n\n")
    except Exception as e:
        print(f'\nFail to GET at {GET_all_exchanges}\n')
        print(e)
        
    # coins data pipeline 
    try:
        coin_requests= get_coin_request(start=0, limit=7000)
        export_csv(coin_requests, coin_path)
        print(f"\n\nSucessfully get data and export out to {coin_path}!\n\n")
    except Exception as e:
        print(f'\nFail to GET at {coin_path}\n')
        print(e)

    #top coins data pipeline
    try:
        top_coins_path = 'data/top_coins.csv'
        top_coins = top_rank_coins(HowMany=25)
        export_csv(top_coins, top_coins_path)
        print(f"\nSucessfully get data and export out to: {top_coins_path}!\n\n")
    except Exception as e:
        print(f'\nFail to GET at {top_coins_path} \n')
        print(e)

    # binance historical coins data clean and export to csv
    try:
        path = r'data/historical_data.csv'
        binance_coins_data(historical_data_path, historical_data)
        all_files = glob.glob(path + "/*.csv")
        li = []
        for filename in all_files:
            df = pd.read_csv(filename, index_col=None, header=0)
            li.append(df)
        frame = pd.concat(li, axis=0, ignore_index=True)
        frame.to_csv('data/final_historical_data.csv', index=False)
        print(f"\nSucessfully clean and export historical data to {historical_data}\n")
    except Exception as e:
        print(f'\nFail to clean and export {historical_data_path}\n')
        print(e) 
    
    # Stage historical 1 minutes data to s3
    try:
        upload_to_s3(bucketname=c.S3_BUCKET_NAME, local_file_path=final_historical, s3_file_path=final_historical)
        print(f"\n\nSuccessfully Uploaded {final_historical} to S3!\n\n")
    except Exception as e:
        print(f'\nFail to upload {final_historical}\n')
        print(e)
    
    # stage coins_data info to s3
    
    try:
        upload_to_s3(bucketname=c.S3_BUCKET_NAME, local_file_path=coin_path, s3_file_path=coin_path)
        print(f"\nSuccessfully Uploaded {coin_path} to S3!\n")
    except Exception as e:
        print(f'\nFail to upload {coin_path}\n')
        print(e)
    
    # stage coin market data into s3
    try:
        upload_to_s3(bucketname=c.S3_BUCKET_NAME, local_file_path=top_coins_path, s3_file_path=top_coins_path)
        print(f"\n\nSuccessfully Uploaded {top_coins_path} to S3!\n\n")
    except Exception as e:
        print(f'\nFail to upload {top_coins_path}\n')
        print(e)
    
    # stage coin exchange path into s3
    try:
        upload_to_s3(bucketname=c.S3_BUCKET_NAME, local_file_path=coin_exchange_path, s3_file_path=coin_exchange_path)
        print(f"\n\nSuccessfully Uploaded {coin_exchange_path} to S3!\n\n")
    except Exception as e:
        print(f'\nFail to upload {coin_exchange_path}\n')
        print(e)
        

    with open('database/data_result_output.py', 'w') as f:
        with redirect_stdout(f):
            print(f"exchange_data_s3_result={exchange_data.shape}")
            print(f"coin_requests_s3_result={coin_requests.shape}")
            print(f"top_coins_s3_result={top_coins.shape}")
            print(f"historical_s3_result={frame.shape}")
            
if __name__ == "__main__":
    main()

Python-dotenv could not parse statement starting at line 1
Python-dotenv could not parse statement starting at line 6
Python-dotenv could not parse statement starting at line 17
Python-dotenv could not parse statement starting at line 24
Python-dotenv could not parse statement starting at line 30
Python-dotenv could not parse statement starting at line 1
Python-dotenv could not parse statement starting at line 6
Python-dotenv could not parse statement starting at line 17
Python-dotenv could not parse statement starting at line 24
Python-dotenv could not parse statement starting at line 30


exported to data/coin_exchange_info.csv!


Sucessfully get data and export out to data/coin_exchange_info.csv!


Successfully request number 0
Successfully request number 100
Successfully request number 200
Successfully request number 300
Successfully request number 400
Successfully request number 500
Successfully request number 600
Successfully request number 700
Successfully request number 800
Successfully request number 900
Successfully request number 1000
Successfully request number 1100
Successfully request number 1200
Successfully request number 1300
Successfully request number 1400
Successfully request number 1500
Successfully request number 1600
Successfully request number 1700
Successfully request number 1800
Successfully request number 1900
Successfully request number 2000
Successfully request number 2100
Successfully request number 2200
Successfully request number 2300
Successfully request number 2400
Successfully request number 2500
Successfully request number 2600
Successf

22/01/04 20:56:02 WARN Utils: Your hostname, DESKTOP-7O0VB5H resolves to a loopback address: 127.0.1.1; using 172.21.154.35 instead (on interface eth0)
22/01/04 20:56:02 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
https://repos.spark-packages.org/ added as a remote repository with the name: repo-1


:: loading settings :: url = jar:file:/mnt/c/Users/tienl/Udacity_Courses/DE_capstone/capstone_venv/lib/python3.8/site-packages/pyspark/jars/ivy-2.5.0.jar!/org/apache/ivy/core/settings/ivysettings.xml


Ivy Default Cache set to: /root/.ivy2/cache
The jars for the packages stored in: /root/.ivy2/jars
saurfang#spark-sas7bdat added as a dependency
:: resolving dependencies :: org.apache.spark#spark-submit-parent-d359c65b-3225-45d4-a3d5-1701c8edddd0;1.0
	confs: [default]
	found saurfang#spark-sas7bdat;2.0.0-s_2.11 in spark-packages
	found com.epam#parso;2.0.8 in central
	found org.slf4j#slf4j-api;1.7.5 in central
	found org.apache.logging.log4j#log4j-api-scala_2.11;2.7 in central
	found org.scala-lang#scala-reflect;2.11.8 in central
:: resolution report :: resolve 226ms :: artifacts dl 9ms
	:: modules in use:
	com.epam#parso;2.0.8 from central in [default]
	org.apache.logging.log4j#log4j-api-scala_2.11;2.7 from central in [default]
	org.scala-lang#scala-reflect;2.11.8 from central in [default]
	org.slf4j#slf4j-api;1.7.5 from central in [default]
	saurfang#spark-sas7bdat;2.0.0-s_2.11 from spark-packages in [default]
	---------------------------------------------------------------------
	| 


Sucessfully export file to: data/historical_data.csv!



Sucessfully clean and export historical data to data/historical_data.csv



# Full view of log output from main.py

exported to data/coin_exchange_info.csv!


Sucessfully get data and export out to data/coin_exchange_info.csv!


Successfully request number 0
Successfully request number 100
Successfully request number 200
Successfully request number 300
Successfully request number 400
Successfully request number 500
Successfully request number 600
Successfully request number 700
Successfully request number 800
Successfully request number 900
Successfully request number 1000
Successfully request number 1100
Successfully request number 1200
Successfully request number 1300
Successfully request number 1400
Successfully request number 1500
Successfully request number 1600
Successfully request number 1700
Successfully request number 1800
Successfully request number 1900
Successfully request number 2000
Successfully request number 2100
Successfully request number 2200
Successfully request number 2300
Successfully request number 2400
Successfully request number 2500
Successfully request number 2600
Successfully request number 2700
Successfully request number 2800
Successfully request number 2900
Successfully request number 3000
Successfully request number 3100
Successfully request number 3200
Successfully request number 3300
Successfully request number 3400
Successfully request number 3500
Successfully request number 3600
Successfully request number 3700
Successfully request number 3800
Successfully request number 3900
Successfully request number 4000
Successfully request number 4100
Successfully request number 4200
Successfully request number 4300
Successfully request number 4400
Successfully request number 4500
Successfully request number 4600
Successfully request number 4700
Successfully request number 4800
Successfully request number 4900
Successfully request number 5000
Successfully request number 5100
Successfully request number 5200
Successfully request number 5300
Successfully request number 5400
Successfully request number 5500
Successfully request number 5600
Successfully request number 5700
Successfully request number 5800
Successfully request number 5900
Successfully request number 6000
Successfully request number 6100
Successfully request number 6200
Successfully request number 6300
Successfully request number 6400
Successfully request number 6500
Successfully request number 6600
Successfully request number 6700
Successfully request number 6800
Successfully request number 6900
exported to data/coins_data.csv!


Sucessfully get data and export out to data/coins_data.csv!


Sending request to: https://api.coinlore.net/api/coin/markets/?id=90

Request number: 0

Sending request to: https://api.coinlore.net/api/coin/markets/?id=80

Request number: 1

Sending request to: https://api.coinlore.net/api/coin/markets/?id=2710

Request number: 2

Sending request to: https://api.coinlore.net/api/coin/markets/?id=518

Request number: 3

Sending request to: https://api.coinlore.net/api/coin/markets/?id=48543

Request number: 4

Sending request to: https://api.coinlore.net/api/coin/markets/?id=257

Request number: 5

Sending request to: https://api.coinlore.net/api/coin/markets/?id=33285

Request number: 6

Sending request to: https://api.coinlore.net/api/coin/markets/?id=58

Request number: 7

Sending request to: https://api.coinlore.net/api/coin/markets/?id=48537

Request number: 8

Sending request to: https://api.coinlore.net/api/coin/markets/?id=45219

Request number: 9

Sending request to: https://api.coinlore.net/api/coin/markets/?id=44883

Request number: 10

Sending request to: https://api.coinlore.net/api/coin/markets/?id=2

Request number: 11

Sending request to: https://api.coinlore.net/api/coin/markets/?id=45088

Request number: 12

Sending request to: https://api.coinlore.net/api/coin/markets/?id=48591

Request number: 13

Sending request to: https://api.coinlore.net/api/coin/markets/?id=2751

Request number: 14

Sending request to: https://api.coinlore.net/api/coin/markets/?id=33422

Request number: 15

Sending request to: https://api.coinlore.net/api/coin/markets/?id=47305

Request number: 16

Sending request to: https://api.coinlore.net/api/coin/markets/?id=34406

Request number: 17

Sending request to: https://api.coinlore.net/api/coin/markets/?id=1

Request number: 18

Sending request to: https://api.coinlore.net/api/coin/markets/?id=48563

Request number: 19

Sending request to: https://api.coinlore.net/api/coin/markets/?id=33830

Request number: 20

Sending request to: https://api.coinlore.net/api/coin/markets/?id=2321

Request number: 21

Sending request to: https://api.coinlore.net/api/coin/markets/?id=48571

Request number: 22

Sending request to: https://api.coinlore.net/api/coin/markets/?id=33644

Request number: 23

Sending request to: https://api.coinlore.net/api/coin/markets/?id=89

Request number: 24

exported to data/top_coins.csv!

Sucessfully get data and export out to: data/top_coins.csv!




# **start_redshift.py** script

In [4]:
#!/mnt/c/Users/tienl/Udacity_Courses/DE_capstone/capstone_venv/bin/python3.8
import pandas as pd
import json
import time
from pandas_functions import prettyRedshiftProps
import config as c
from contextlib import redirect_stdout
import os

exe_path = '/mnt/c/Users/tienl/Udacity_Courses/DE_capstone'
os.chdir(exe_path)

# (DWH_DB_USER, DWH_DB_PASSWORD, DWH_DB)
print("\n\n")
print(">>> Starting Redshift Cluster!\n>>> This is the configuration of redshift cluster!")
print("\n")

print(pd.DataFrame({"Param":
                  ["DWH_CLUSTER_TYPE", "DWH_NUM_NODES", "DWH_NODE_TYPE", "DWH_CLUSTER_IDENTIFIER", "DWH_DB", "DWH_DB_USER", "DWH_DB_PASSWORD", "DWH_PORT", "DWH_IAM_ROLE_NAME"],
              "Value":
                  [c.DWH_CLUSTER_TYPE, c.DWH_NUM_NODES, c.DWH_NODE_TYPE, c.DWH_CLUSTER_IDENTIFIER, c.DWH_DB, c.DWH_DB_USER, c.DWH_DB_PASSWORD, c.DWH_PORT, c.DWH_IAM_ROLE_NAME]
             }))

print("\n\n")


ec2 = c.ec2
s3 = c.s3
iam = c.iam
redshift = c.redshift


try:
    print("1.1 Creating a new IAM Role") 
    dwhRole = iam.create_role(
        Path='/',
        RoleName=c.DWH_IAM_ROLE_NAME,
        Description = "Allows Redshift clusters to call AWS services on your behalf.",
        AssumeRolePolicyDocument=json.dumps(
            {'Statement': [{'Action': 'sts:AssumeRole',
               'Effect': 'Allow',
               'Principal': {'Service': 'redshift.amazonaws.com'}}],
             'Version': '2012-10-17'})
    )    
except Exception as e:
    print(e)
    
#1.1 Create the role, 
try:
    print("1.1 Creating a new IAM Role") 
    dwhRole = iam.create_role(
        Path='/',
        RoleName=c.DWH_IAM_ROLE_NAME,
        Description = "Allows Redshift clusters to call AWS services on your behalf.",
        AssumeRolePolicyDocument=json.dumps(
            {'Statement': [{'Action': 'sts:AssumeRole',
               'Effect': 'Allow',
               'Principal': {'Service': 'redshift.amazonaws.com'}}],
             'Version': '2012-10-17'})
    )    
except Exception as e:
    print(e)
    
print("1.2 Attaching Policy")

iam.attach_role_policy(RoleName=c.DWH_IAM_ROLE_NAME,
                       PolicyArn="arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess"
                      )['ResponseMetadata']['HTTPStatusCode']

print("1.3 Get the IAM role ARN")
roleArn = iam.get_role(RoleName=c.DWH_IAM_ROLE_NAME)['Role']['Arn']

print(f"\n\n>>> {roleArn}\n\n")

try:
    response = redshift.create_cluster(        
        #HW
        ClusterType=c.DWH_CLUSTER_TYPE,
        NodeType=c.DWH_NODE_TYPE,
        NumberOfNodes=int(c.DWH_NUM_NODES),

        #Identifiers & Credentials
        DBName=c.DWH_DB,
        ClusterIdentifier=c.DWH_CLUSTER_IDENTIFIER,
        MasterUsername=c.DWH_DB_USER,
        MasterUserPassword=c.DWH_DB_PASSWORD,
        
        #Roles (for s3 access)
        IamRoles=[roleArn]  )
except Exception as e:
    print(f"\n >>> {e} \n\n")
    

myClusterProps = redshift.describe_clusters(ClusterIdentifier=c.DWH_CLUSTER_IDENTIFIER)['Clusters'][0]

cluster_df = prettyRedshiftProps(myClusterProps)

check_status = cluster_df['Value'][2]
status = 'available'


while True:
    time.sleep(5)
    myClusterProps = redshift.describe_clusters(ClusterIdentifier=c.DWH_CLUSTER_IDENTIFIER)['Clusters'][0]
    cluster_df = prettyRedshiftProps(myClusterProps)
    check_status = cluster_df['Value'][2]
    if check_status == status:
        DWH_ENDPOINT = myClusterProps['Endpoint']['Address']
        DWH_ROLE_ARN = myClusterProps['IamRoles'][0]['IamRoleArn']
        print("\n\n>>> Cluster successfully created!\n")
        print(f"\n>>> DWH_ENDPOINT :: {DWH_ENDPOINT}\n")
        print(f"\n>>> DWH_ROLE_ARN :: { DWH_ROLE_ARN}\n")
        break
    print('Cluster not up yet')


with open('database/.env', 'w') as file:
    with redirect_stdout(file):
        print(f'DWH_ENDPOINT={DWH_ENDPOINT}')
        print(f'DWH_ROLE_ARN={DWH_ROLE_ARN}')
     
try:
    vpc = ec2.Vpc(id=myClusterProps['VpcId'])
    defaultSg = list(vpc.security_groups.all())[0]
    print(defaultSg)
    defaultSg.authorize_ingress(
        GroupName=defaultSg.group_name,
        CidrIp='0.0.0.0/0',
        IpProtocol='TCP',
        FromPort=int(c.DWH_PORT),
        ToPort=int(c.DWH_PORT)
    )
except Exception as e:
    print(f"\n>>> {e}\n\n")
    





>>> Starting Redshift Cluster!
>>> This is the configuration of redshift cluster!


                    Param       Value
0  DWH_CLUSTER_TYPE        multi-node
1  DWH_NUM_NODES           4         
2  DWH_NODE_TYPE           dc2.large 
3  DWH_CLUSTER_IDENTIFIER  dwhCluster
4  DWH_DB                  dwh       
5  DWH_DB_USER             dwhuser   
6  DWH_DB_PASSWORD         Passw0rd  
7  DWH_PORT                5439      
8  DWH_IAM_ROLE_NAME       dwhRole   



1.1 Creating a new IAM Role
1.1 Creating a new IAM Role
An error occurred (EntityAlreadyExists) when calling the CreateRole operation: Role with name dwhRole already exists.
1.2 Attaching Policy
1.3 Get the IAM role ARN


>>> arn:aws:iam::539761204517:role/dwhRole




  pd.set_option('display.max_colwidth', -1)


Cluster not up yet
Cluster not up yet
Cluster not up yet
Cluster not up yet
Cluster not up yet
Cluster not up yet
Cluster not up yet
Cluster not up yet
Cluster not up yet
Cluster not up yet
Cluster not up yet
Cluster not up yet
Cluster not up yet
Cluster not up yet
Cluster not up yet
Cluster not up yet
Cluster not up yet
Cluster not up yet
Cluster not up yet
Cluster not up yet


>>> Cluster successfully created!


>>> DWH_ENDPOINT :: dwhcluster.cmjnfq1cdb24.us-west-2.redshift.amazonaws.com


>>> DWH_ROLE_ARN :: arn:aws:iam::539761204517:role/dwhRole

ec2.SecurityGroup(id='sg-fa39a2f3')

>>> An error occurred (InvalidPermission.Duplicate) when calling the AuthorizeSecurityGroupIngress operation: the specified rule "peer: 0.0.0.0/0, TCP, from port: 5439, to port: 5439, ALLOW" already exists




# **db_main.py** script

In [1]:
#!/mnt/c/Users/tienl/Udacity_Courses/DE_capstone/capstone_venv/bin/python3.8
import psycopg2
from dotenv import load_dotenv
import os
from pathlib import Path
from create_table_sql import drop_table_queries, create_table_queries, copy_table_queries, insert_table_queries

exe_path = '/mnt/c/Users/tienl/Udacity_Courses/DE_capstone/database'
os.chdir(exe_path)
#Dynamic Environment variables
load_dotenv()
HOST = os.getenv('DWH_ENDPOINT')
DWH_ROLE_ARB=os.getenv('DWH_ROLE_ARB')

#Local and Static Environment Variables
my_path = Path('/mnt/c/Users/tienl/Udacity_Courses/DE_capstone/.env')
load_dotenv(my_path)
localhost=os.getenv('localhost')
local_dbname=os.getenv('local_dbname')
local_password=os.getenv('local_password')
local_username=os.getenv('local_username')
DB_NAME = os.getenv("DB_NAME")
DB_USER = os.getenv("DB_USER")
DB_PASSWORD = os.getenv("DB_PASSWORD")
DB_PORT = os.getenv("DWH_PORT")
S3_BUCKET_NAME=os.getenv('S3_BUCKET_NAME')


def drop_tables(cur, conn):
    """
    This function drop all tables if already exists
    """
    for query in drop_table_queries:
        cur.execute(query)
        print(f"\nSucessfully DROPPED \n{query}!\n")
        conn.commit()


def create_tables(cur, conn):
    """
    This function creates table if not exists
    """
    for query in create_table_queries:
        cur.execute(query)
        conn.commit()


def load_staging_tables(cur, conn):
    """
    This function load the staging tables into our DW
    """
    for query in copy_table_queries:
        cur.execute(query)
        conn.commit()
        
def insert_tables(cur, conn):
    """
    This function insert data into the tables in the DW
    """
    for query in insert_table_queries:
        print(f'TRYING INSERT AT: \n{query}')
        cur.execute(query)
        conn.commit()


def main():
    """
    Using another files with all connection string to connect into DW cluster
    """
    ### Redshift database connection
    conn = psycopg2.connect(f"host={HOST} dbname={DB_NAME} user={DB_USER} password={DB_PASSWORD} port={DB_PORT}")
    
    ### Local postgres database connection
    # conn = psycopg2.connect(host=f'{localhost}',database=f'{local_dbname}',user=f'{local_username}',password=f'{local_password}')
    cur = conn.cursor()
    try:
        drop_tables(cur, conn)
    except Exception as e:
        print(f'1: \n{e}')
    try:      
        create_tables(cur, conn)
    except Exception as e:
        print(f'2: \n{e}')
    try:        
        load_staging_tables(cur,conn)
        print("\n\nSucessfully COPY new tables!\n")
    except Exception as e:
        print(f'3: \n{e}')
    try:
        insert_tables(cur,conn)
        print("Sucessfully inserted!")
    except Exception as e:
        print(f'4: \n{e}')

    
    conn.close()
    
main()
# if __name__ == "__main__":
#     main()


Python-dotenv could not parse statement starting at line 1
Python-dotenv could not parse statement starting at line 6
Python-dotenv could not parse statement starting at line 17
Python-dotenv could not parse statement starting at line 24
Python-dotenv could not parse statement starting at line 30
Python-dotenv could not parse statement starting at line 1
Python-dotenv could not parse statement starting at line 6
Python-dotenv could not parse statement starting at line 17
Python-dotenv could not parse statement starting at line 24
Python-dotenv could not parse statement starting at line 30



Sucessfully DROPPED 
DROP TABLE IF EXISTS coins_data_table_stage!


Sucessfully DROPPED 
DROP TABLE IF EXISTS exchange_data_table_stage!


Sucessfully DROPPED 
DROP TABLE IF EXISTS top_coins_table_stage!


Sucessfully DROPPED 
DROP TABLE IF EXISTS historical_data_table_stage!


Sucessfully DROPPED 
DROP TABLE IF EXISTS exchange_data_table!


Sucessfully DROPPED 
DROP TABLE IF EXISTS coins_data_table!


Sucessfully DROPPED 
 DROP TABLE IF EXISTS historical_data_table!


Sucessfully DROPPED 
 DROP TABLE IF EXISTS top_coins_data_table!


Sucessfully DROPPED 
 DROP TABLE IF EXISTS bridge_table!



Sucessfully COPY new tables!

TRYING INSERT AT: 
 INSERT INTO top_coins_data_table( id, name, base, quote, price, price_usd, volume, volume_usd, time)
SELECT  id::integer
       ,name
       ,base
       ,quote
       ,price::float
       ,price_usd::float
       ,volume::float
       ,volume_usd::float
       ,time::date
FROM top_coins_table_stage

TRYING INSERT AT: 
INSERT INTO coins_data_tabl

# double click to view full log output from db_main.py


Sucessfully DROPPED 
DROP TABLE IF EXISTS coins_data_table_stage!


Sucessfully DROPPED 
DROP TABLE IF EXISTS exchange_data_table_stage!


Sucessfully DROPPED 
DROP TABLE IF EXISTS top_coins_table_stage!


Sucessfully DROPPED 
DROP TABLE IF EXISTS historical_data_table_stage!


Sucessfully DROPPED 
DROP TABLE IF EXISTS exchange_data_table!


Sucessfully DROPPED 
DROP TABLE IF EXISTS coins_data_table!


Sucessfully DROPPED 
 DROP TABLE IF EXISTS historical_data_table!


Sucessfully DROPPED 
 DROP TABLE IF EXISTS top_coins_data_table!


Sucessfully DROPPED 
 DROP TABLE IF EXISTS bridge_table!



Sucessfully COPY new tables!

TRYING INSERT AT: 
 INSERT INTO top_coins_data_table( id, name, base, quote, price, price_usd, volume, volume_usd, time)
SELECT  id::integer
       ,name
       ,base
       ,quote
       ,price::float
       ,price_usd::float
       ,volume::float
       ,volume_usd::float
       ,time::date
FROM top_coins_table_stage

TRYING INSERT AT: 
INSERT INTO coins_data_table(id, symbol, name, rank , market_cap_usd, price_usd, price_btc)
SELECT  id::integer
       ,symbol
       ,name
       ,rank::float
       ,replace(market_cap_usd,'0?', '0.0')::float
       ,price_usd::float
       ,price_btc::float
FROM coins_data_table_stage;
TRYING INSERT AT: 
 INSERT INTO historical_data_table(unix, date, symbol, open_price, high, low, close, volume_bnb, volume_usdt, tradecount)
SELECT  unix::varchar
       ,date::date 
       ,split_part(symbol,'/',1) AS symbol
       ,open_price::float
       ,high::float
       ,low::float
       ,close::float
       ,volume_bnb::float
       ,volume_usdt::float
       ,tradecount::integer
FROM historical_data_table_stage

TRYING INSERT AT: 
INSERT INTO exchange_data_table( id, name, url, country, date_live, volume_usd, volume_usd_adj)
SELECT  id::numeric
       ,name
       ,url
       ,country
       ,replace(date_live,'00-00','01-01')::date AS date_live
       ,volume_usd::numeric
       ,volume_usd_adj::numeric
FROM exchange_data_table_stage;
TRYING INSERT AT: 

INSERT INTO bridge_table(coin_names)
SELECT  distinct symbol
FROM historical_data_table
Sucessfully inserted!


# **database_validation_script.py**

In [2]:
import unittest
import psycopg2
from pyspark.sql.functions import row_number
from data_result_output import *
import pandas as pd
from db_main import DB_NAME, DB_USER, DB_PASSWORD, DB_PORT, S3_BUCKET_NAME, HOST, DWH_ROLE_ARB, localhost, local_dbname, local_username, local_password
from create_table_sql import validation_queries


col_names = []
my_list = []
# conn = psycopg2.connect(host=f'{localhost}',database=f'{local_dbname}',user=f'{local_username}',password=f'{local_password}')

conn = psycopg2.connect(f"host={HOST} dbname={DB_NAME} user={DB_USER} password={DB_PASSWORD} port={DB_PORT}")
cur = conn.cursor()
for query in validation_queries:
    cur.execute(query)
    colnames = [desc[0] for desc in cur.description]
    row_num = cur.fetchall()
    col_names.append(colnames)
    my_list.append(row_num)
    conn.commit()
conn.close()

coin_row, exchange_row, top_coin_row, historical_row, redshift_demo_query_row = my_list
coin_col, exchange_col, top_coin_col, historical_col, redshift_demo_query_col = col_names

coin_shape = (len(coin_row), len(coin_col))
exchange_shape = (len(exchange_row), len(exchange_col))
top_coin_shape = (len(top_coin_row), len(top_coin_col))
historical_shape = (len(historical_row), len(historical_col))

redshift_demo_query = pd.DataFrame(redshift_demo_query_row, columns=redshift_demo_query_col)
print("\n\n>>> Redshift Demo's query of top coins table and exchange table for analysis: \n".upper(), redshift_demo_query, "\n\n")
print("\n\n>>> Redshift DataBase staging table's shape result: ".upper(),coin_shape, exchange_shape, top_coin_shape, historical_shape, "\n\n")

class my_test(unittest.TestCase):    
    
    def test_1(self):
        self.assertEqual(coin_requests_s3_result, coin_shape, f"\n\n>>> Test coin_data_result_s3 result should be:{coin_requests_s3_result}")
    
    def test_2(self):
        self.assertEqual(exchange_data_s3_result, exchange_shape, f"\n\n>>> Test exchange_data_result_s3 result should be:{exchange_data_s3_result}")

    def test_3(self):
        self.assertEqual(historical_s3_result, historical_shape,f"\n\n>>> Test historical_data_result_s3 result should be:{historical_s3_result}")
    
    def test_4(self):
        self.assertEqual(top_coins_s3_result, top_coin_shape, f"\n\n>>> Test top_coins_data_result_s3 result should be: {top_coins_s3_result}")

suite = unittest.TestLoader().loadTestsFromTestCase(my_test)
runner = unittest.TextTestRunner(verbosity=2)
runner.run(suite)

# if __name__ == '__main__':
#     unittest.main()

Python-dotenv could not parse statement starting at line 1
Python-dotenv could not parse statement starting at line 6
Python-dotenv could not parse statement starting at line 17
Python-dotenv could not parse statement starting at line 24
Python-dotenv could not parse statement starting at line 30
test_1 (__main__.my_test) ... ok
test_2 (__main__.my_test) ... ok
test_3 (__main__.my_test) ... ok
test_4 (__main__.my_test) ... 



>>> REDSHIFT DEMO'S QUERY OF TOP COINS TABLE AND EXCHANGE TABLE FOR ANALYSIS: 
                name coin_symbol         price     price_usd  \
0           Binance         BTC  45932.800000  45932.800000   
1            Bitrue         BTC  45925.920000  45925.920000   
2          Bitfinex         BTC  45905.000000  45905.000000   
3            Indoex         BTC  46040.521403  46040.521403   
4             TOKOK         BTC  45913.080000  45913.080000   
..              ...         ...           ...           ...   
832    VCC Exchange         FTM      1.689500      1.689500   
833          OMGFIN         FTM      0.000038      1.732410   
834            Bkex         XLM      0.328600      0.328600   
835    Coinbase Pro         XLM      0.278989      0.329890   
836  Bithumb Global         XLM      0.328109      0.328109   

                           url                                 country  \
0      https://www.binance.com                                   Japan   
1      https:

ok

----------------------------------------------------------------------
Ran 4 tests in 0.003s

OK


<unittest.runner.TextTestResult run=4 errors=0 failures=0>

# Dobule click to view full Logs output from database_validation_script.py


>>> REDSHIFT DEMO'S QUERY OF TOP COINS TABLE AND EXCHANGE TABLE FOR ANALYSIS: 
                name coin_symbol         price     price_usd  \
0           Binance         BTC  45932.800000  45932.800000   
1            Bitrue         BTC  45925.920000  45925.920000   
2          Bitfinex         BTC  45905.000000  45905.000000   
3            Indoex         BTC  46040.521403  46040.521403   
4             TOKOK         BTC  45913.080000  45913.080000   
..              ...         ...           ...           ...   
832    VCC Exchange         FTM      1.689500      1.689500   
833          OMGFIN         FTM      0.000038      1.732410   
834            Bkex         XLM      0.328600      0.328600   
835    Coinbase Pro         XLM      0.278989      0.329890   
836  Bithumb Global         XLM      0.328109      0.328109   

                           url                                 country  \
0      https://www.binance.com                                   Japan   
1      https://www.bitrue.com/                               Singapore   
2     https://www.bitfinex.com                               Hong Kong   
3            https://indoex.io                                 Estonia   
4       https://www.tokok.com/                  British Virgin Islands   
..                         ...                                     ...   
832       https://vcc.exchange                                 Vietnam   
833         https://omgfin.com                 Estonia European Union    
834      https://www.bkex.com/                  British Virgin Islands   
835  https://www.coinbase.com/  San Francisco California United States   
836   https://www.bithumb.pro/                              Seychelles   

      date_live        volume  
0    2014-01-01  6.494519e+04  
1    2018-01-01  1.178438e+04  
2    2012-01-01  7.568765e+03  
3    2018-01-01  4.446091e+03  
4    2018-01-01  3.452379e+03  
..          ...           ...  
832  2019-01-01  4.259107e+06  
833  2018-10-10  3.478680e+05  
834  2018-01-01  6.358333e+07  
835  2012-06-20  2.956410e+07  
836  2019-01-01  1.661179e+07  

[837 rows x 8 columns] 




>>> REDSHIFT DATABASE STAGING TABLE'S SHAPE RESULT:  (7000, 15) (290, 11) (1011, 9) (2713925, 10) 




# **stop_redshift.py** script

In [1]:
from dotenv import load_dotenv
import os
from botocore.exceptions import ClientError
import boto3
import time
import pandas as pd
from pandas_functions import prettyRedshiftProps
import config as c

def main():
    
    status = 'deleting'
    c.redshift.delete_cluster( ClusterIdentifier=c.DWH_CLUSTER_IDENTIFIER, SkipFinalClusterSnapshot=True)
    c.iam.detach_role_policy(RoleName=c.DWH_IAM_ROLE_NAME, PolicyArn="arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess")
    c.iam.delete_role(RoleName=c.DWH_IAM_ROLE_NAME)
    while True:
        time.sleep(5)
        myClusterProps = c.redshift.describe_clusters(ClusterIdentifier=c.DWH_CLUSTER_IDENTIFIER)['Clusters'][0]
        cluster_df = prettyRedshiftProps(myClusterProps)
        check_status = cluster_df['Value'][2]
        # check_status = cluster_df['Value'][2]
        try:
            if check_status == status:
                print(f'\n\n>>> Cluster status: {check_status}!\n')
        except Exception as e:
            if e == e:
                print('\n\n>>> Cluster deleted!\n')
                break

if __name__ == "__main__":
    try:
        main()
    except Exception as e:
        if e ==e:
            print('\n\n>>> No cluster found!\n>>> Cluster deleted!\n\n')

Python-dotenv could not parse statement starting at line 1
Python-dotenv could not parse statement starting at line 6
Python-dotenv could not parse statement starting at line 17
Python-dotenv could not parse statement starting at line 24
Python-dotenv could not parse statement starting at line 30
  pd.set_option('display.max_colwidth', -1)




>>> Cluster status: deleting!



>>> Cluster status: deleting!



>>> Cluster status: deleting!



>>> Cluster status: deleting!



>>> Cluster status: deleting!



>>> Cluster status: deleting!



>>> Cluster status: deleting!



>>> Cluster status: deleting!



>>> Cluster status: deleting!



>>> Cluster status: deleting!



>>> Cluster status: deleting!



>>> Cluster status: deleting!



>>> No cluster found!
>>> Cluster deleted!


