# API Binance


https://api.binance.com/api/v1/ticker/24hr

## Function connect_to_redshift

In [2]:
import psycopg2
import logging

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logging.info("Libraries calls ok")

def connect_to_redshift(host, dbname, user, password, port=5439):
    """
    Establishes a connection to a Redshift database.

    Parameters:
    - host (str): The hostname or IP address of the Redshift cluster.
    - dbname (str): The name of the Redshift database to connect to.
    - user (str): The username for authenticating with the Redshift database.
    - password (str): The password for authenticating with the Redshift database.
    - port (int, optional): The port number on which the Redshift cluster is listening. Default is 5439.

    Returns:
    - conn: A psycopg2 connection object representing the connection to the Redshift database.

    Raises:
    - Exception: If an error occurs during the connection attempt.
    """
    try:
        conn = psycopg2.connect(
            dbname=dbname,
            user=user,
            password=password,
            host=host,
            port=port
        )
        logging.info("Connection established")
        return conn
    except Exception as error:
        logging.info(f"An error occurred: {error}")


2024-04-01 18:07:27,713 - INFO - Libraries calls ok


## Function execute_query

In [3]:
import logging

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logging.info("Libraries calls ok")

def execute_query(connection, query):
    """
    Executes a SQL query on the provided database connection.

    Parameters:
    - connection: psycopg2 connection object representing the connection to the database.
    - query (str): The SQL query to be executed.

    Returns:
    - None

    Raises:
    - Exception: If an error occurs during the query execution.
    """
    try:
        cursor = connection.cursor()
        cursor.execute(query)
        connection.commit()
        logging.info("Query executed successfully")
    except Exception as error:
        logging.info(f"An error occurred: {error}")


2024-04-01 18:07:27,731 - INFO - Libraries calls ok


### Truncate table mateobelossi_coderhouse.binance_coins if exists.

In [4]:
import logging

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logging.info("Libraries calls ok")

dbname = "data-engineer-database"
port = 5439
host = "data-engineer-cluster.cyhh5bfevlmn.us-east-1.redshift.amazonaws.com"
table_name = "binance_coins"
schema_name = "mateobelossi_coderhouse"
user = "mateobelossi_coderhouse"
with open("/home/mateo/Desktop/Curso DE/✍️Consignas de Pre.entregas y Proyecto Final-20240331T125607Z-001/1er_pre_entrega/password_redshift.txt ",'r') as f:
    password= f.read()

logging.info("Connecting to redshift...")
conn = connect_to_redshift(host, dbname, user, password, port)
cur = conn.cursor()

logging.info("Checking if table exists...")
cur.execute(f"SELECT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_schema = '{schema_name}' AND table_name = '{table_name}')")
table_exists = cur.fetchone()[0]
logging.info(f"Table exists ? : {table_exists}")

if table_exists:
    logging.info(f"Truncating table {schema_name}.{table_name}")
    query = f"truncate table {schema_name}.{table_name}"
    execute_query(conn, query)
else:
    logging.info(f"table {schema_name}.{table_name} do not exist.")

cur.close()
conn.close()

2024-04-01 18:07:27,836 - INFO - Libraries calls ok
2024-04-01 18:07:27,838 - INFO - Connecting to redshift...


2024-04-01 18:07:29,267 - INFO - Connection established
2024-04-01 18:07:29,270 - INFO - Checking if table exists...
2024-04-01 18:07:29,677 - INFO - Table exists ? : True
2024-04-01 18:07:29,682 - INFO - Truncating table mateobelossi_coderhouse.binance_coins
2024-04-01 18:07:30,126 - INFO - Query executed successfully


### Create table mateobelossi_coderhouse.binance_coins if not exits.

In [5]:
import logging

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logging.info("Libraries calls ok")

dbname = "data-engineer-database"
user = "mateobelossi_coderhouse"
with open("/home/mateo/Desktop/Curso DE/✍️Consignas de Pre.entregas y Proyecto Final-20240331T125607Z-001/1er_pre_entrega/password_redshift.txt ",'r') as f:
    password= f.read()
port = 5439
host = "data-engineer-cluster.cyhh5bfevlmn.us-east-1.redshift.amazonaws.com"
table_name = "binance_coins"
schema_name = "mateobelossi_coderhouse"

CREATE_TABLE =f"""
CREATE TABLE {user}.{table_name} (
    symbol VARCHAR(256),
    priceChange FLOAT,
    priceChangePercent FLOAT,
    weightedAvgPrice FLOAT,
    prevClosePrice FLOAT,
    lastPrice FLOAT,
    lastQty FLOAT,
    bidPrice FLOAT,
    bidQty FLOAT,
    askPrice FLOAT,
    askQty FLOAT,
    openPrice FLOAT,
    highPrice FLOAT,
    lowPrice FLOAT,
    volume FLOAT,
    quoteVolume FLOAT,
    openTime BIGINT,
    closeTime BIGINT,
    firstId BIGINT,
    lastId BIGINT,
    count BIGINT,
    created_at TIMESTAMP
);
"""

logging.info("Connecting to redshift...")
conn = connect_to_redshift(host, dbname, user, password, port)
cur = conn.cursor()

logging.info("Checking if table exists...")
cur.execute(f"SELECT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_schema = '{schema_name}' AND table_name = '{table_name}')")
table_exists = cur.fetchone()[0]
logging.info(f"Table exists ? : {table_exists}")

if not table_exists:
    logging.info(f"Creating table {schema_name}.{table_name}")
    cur.execute(
    CREATE_TABLE
    )
    conn.commit()
else:
    logging.info(f"table {schema_name}.{table_name} already exists.")

cur.close()
conn.close()

2024-04-01 18:07:30,187 - INFO - Libraries calls ok
2024-04-01 18:07:30,194 - INFO - Connecting to redshift...


2024-04-01 18:07:31,442 - INFO - Connection established
2024-04-01 18:07:31,445 - INFO - Checking if table exists...
2024-04-01 18:07:31,850 - INFO - Table exists ? : True
2024-04-01 18:07:31,852 - INFO - table mateobelossi_coderhouse.binance_coins already exists.


#### 1) requests to api binance https://api.binance.com/api/v1/ticker/24hr
#### 2) Include a column named "created_at" in the obtained results, indicating the current time at the moment of making the requests.
#### 3) Save the results in a CSV file.
#### 4) Insert the results into Redshift.

In [6]:
import pandas as pd
from pandas.io.json import json_normalize
import requests, json
from datetime import datetime
import logging
from psycopg2.extras import execute_values

dbname = "data-engineer-database"
user = "mateobelossi_coderhouse"
with open("/home/mateo/Desktop/Curso DE/✍️Consignas de Pre.entregas y Proyecto Final-20240331T125607Z-001/1er_pre_entrega/password_redshift.txt ",'r') as f:
    password= f.read()
port = 5439
host = "data-engineer-cluster.cyhh5bfevlmn.us-east-1.redshift.amazonaws.com"
table_name = "binance_coins"
schema_name = "mateobelossi_coderhouse"

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logging.info("Libraries calls ok")

CUR_TIME = datetime.now().strftime("%Y%m%d_%H%M%S")
logging.info(f"Start Time : {CUR_TIME} ")


logging.info(f"Making the requests to API binance.")
r = requests.get('https://api.binance.com/api/v1/ticker/24hr')

if r.status_code == 200:
    logging.info(f"Connection ok ; Requests Status: {r.status_code}")
    result = r.json()
    result = json_normalize(result)

    logging.info("Adding a column named created_at with current time.")
    result['created_at'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

    logging.info("Loading data from binance to a csv")
    name_csv = 'mercado_binance.csv'
    file_path = str("./binance/") + str(CUR_TIME) + '_' + str(name_csv)
    pd.DataFrame(result).to_csv(file_path, index=False)
    
    logging.info("Loading data from binance to redshift")
    
    conn = connect_to_redshift(host, dbname, user, password, port)
    data_to_insert = [tuple(row) for row in result.values]
    insert_query = f"""
        INSERT INTO {schema_name}.{table_name} (
            symbol, priceChange, priceChangePercent, weightedAvgPrice, prevClosePrice, lastPrice, lastQty,
            bidPrice, bidQty, askPrice, askQty, openPrice, highPrice, lowPrice, volume, quoteVolume,
            openTime, closeTime, firstId, lastId, count, created_at
        ) VALUES %s;
        """
    with conn.cursor() as cur:
        execute_values(
            cur,
            insert_query,
            data_to_insert,
            page_size=len(data_to_insert)
        )
        conn.commit()
        logging.info("Data inserted into Redshift successfully.")

else:
    logging.info(f"It was not able to connect to Binance ; Requests Status: {r.status_code}")

2024-04-01 18:07:36,312 - INFO - Libraries calls ok
2024-04-01 18:07:36,318 - INFO - Start Time : 20240401_180736 
2024-04-01 18:07:36,321 - INFO - Making the requests to API binance.
2024-04-01 18:07:37,530 - INFO - Connection ok ; Requests Status: 200
  result = json_normalize(result)
2024-04-01 18:07:37,638 - INFO - Adding a column named created_at with current time.
2024-04-01 18:07:37,670 - INFO - Loading data from binance to a csv
2024-04-01 18:07:37,778 - INFO - Loading data from binance to redshift
2024-04-01 18:07:39,011 - INFO - Connection established
2024-04-01 18:07:41,641 - INFO - Data inserted into Redshift successfully.


## Reading the data from Redshift.

In [7]:
import psycopg2
import pandas as pd

dbname = "data-engineer-database"
user = "mateobelossi_coderhouse"
with open("/home/mateo/Desktop/Curso DE/✍️Consignas de Pre.entregas y Proyecto Final-20240331T125607Z-001/1er_pre_entrega/password_redshift.txt ",'r') as f:
    password= f.read()
port = 5439
host = "data-engineer-cluster.cyhh5bfevlmn.us-east-1.redshift.amazonaws.com"
table_name = "binance_coins"
schema_name = "mateobelossi_coderhouse"

conn = psycopg2.connect(
    dbname=dbname,
    user=user,
    password=password,
    host=host,
    port=port
)

select_query = f"""
SELECT * FROM {schema_name}.{table_name}
"""

df = pd.read_sql(select_query, conn)

conn.close()


df.head()




Unnamed: 0,symbol,pricechange,pricechangepercent,weightedavgprice,prevcloseprice,lastprice,lastqty,bidprice,bidqty,askprice,...,highprice,lowprice,volume,quotevolume,opentime,closetime,firstid,lastid,count,created_at
0,ETHBTC,-0.0012,-2.339,0.05066,0.0513,0.0501,0.4904,0.0501,21.7642,0.05011,...,0.05134,0.04985,39090.1671,1980.290029,1711919256711,1712005656711,438971731,439081873,110143,2024-04-01 18:07:37
1,LTCBTC,-6.6e-05,-4.438,0.001512,0.001489,0.001421,4.167,0.00142,0.79,0.001421,...,0.00159,0.001417,204989.044,309.841946,1711919256665,1712005656665,96353740,96411965,58226,2024-04-01 18:07:37
2,BNBBTC,-0.000247,-2.883,0.008415,0.008564,0.008319,0.113,0.008319,3.811,0.00832,...,0.0086,0.008286,49785.058,418.922755,1711919256685,1712005656685,238145920,238243276,97357,2024-04-01 18:07:37
3,NEOBTC,-4e-06,-1.801,0.000231,0.000228,0.000224,13.49,0.000223,14.59,0.000224,...,0.000237,0.000221,73820.88,17.088082,1711919243445,1712005643445,46258075,46263252,5178,2024-04-01 18:07:37
4,QTUMETH,-2.9e-05,-2.2,0.001334,0.00132,0.001289,1.5,0.001286,114.2,0.001289,...,0.001355,0.00127,9772.0,13.035371,1711919256674,1712005656674,5438851,5439029,179,2024-04-01 18:07:37
