In [1]:
def execute_query(db_connection, query_statement):
    try:
        cursor = db_connection.cursor()
        cursor.execute(query_statement)
        cursor.close()
        # commit the changes
        db_connection.commit()
    except (Exception, psycopg2.DatabaseError) as error:
        print(f'Error executing query: "{query_statement}": {error}')

In [2]:
import os
import psycopg2

hostname = os.environ.get('DB_HOSTNAME')
database = os.environ.get('DB_DATABASE')
username = os.environ.get('DB_USERNAME')
pwd = os.environ.get('DB_PWD')
port = int(os.environ.get('DB_PORT'))


conn = psycopg2.connect(host=hostname, dbname=database, user=username, password=pwd, port=port)



In [6]:
create_table_query = '''CREATE TABLE IF NOT EXISTS klines (
    id INTEGER PRIMARY KEY GENERATED ALWAYS AS IDENTITY,
    created_at TIMESTAMP DEFAULT NOW(),
    updated_at TIMESTAMP DEFAULT NOW(),
    ticker VARCHAR(10),
    open_time TIMESTAMP,
    open_price FLOAT,
    high_price FLOAT,
    low_price FLOAT,
    close_price FLOAT,
    volume FLOAT,
    close_time TIMESTAMP,
    quote_asset_volume FLOAT,
    number_of_trades INT,
    symbol VARCHAR(10)
);'''

execute_query(conn, create_table_query)

In [8]:
import pandas as pd

symbol = 'BTCUSDT'
input_path = f'../outputs/{symbol}_transformed.csv'

df = pd.read_csv(input_path)
df.head()
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24 entries, 0 to 23
Data columns (total 10 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   open_time           24 non-null     object 
 1   open_price          24 non-null     float64
 2   high_price          24 non-null     float64
 3   low_price           24 non-null     float64
 4   close_price         24 non-null     float64
 5   volume              24 non-null     float64
 6   close_time          24 non-null     object 
 7   quote_asset_volume  24 non-null     float64
 8   number_of_trades    24 non-null     int64  
 9   symbol              24 non-null     object 
dtypes: float64(6), int64(1), object(3)
memory usage: 2.0+ KB


In [14]:
pks_ds = df['open_time'] + '_' + df['symbol'] + '_' + df['close_time']
pks_ds.head()

pks_ds.drop_duplicates()

0     2024-01-17 04:00:00_BTCUSDT_2024-01-17 04:59:5...
1     2024-01-17 05:00:00_BTCUSDT_2024-01-17 05:59:5...
2     2024-01-17 06:00:00_BTCUSDT_2024-01-17 06:59:5...
3     2024-01-17 07:00:00_BTCUSDT_2024-01-17 07:59:5...
4     2024-01-17 08:00:00_BTCUSDT_2024-01-17 08:59:5...
5     2024-01-17 09:00:00_BTCUSDT_2024-01-17 09:59:5...
6     2024-01-17 10:00:00_BTCUSDT_2024-01-17 10:59:5...
7     2024-01-17 11:00:00_BTCUSDT_2024-01-17 11:59:5...
8     2024-01-17 12:00:00_BTCUSDT_2024-01-17 12:59:5...
9     2024-01-17 13:00:00_BTCUSDT_2024-01-17 13:59:5...
10    2024-01-17 14:00:00_BTCUSDT_2024-01-17 14:59:5...
11    2024-01-17 15:00:00_BTCUSDT_2024-01-17 15:59:5...
12    2024-01-17 16:00:00_BTCUSDT_2024-01-17 16:59:5...
13    2024-01-17 17:00:00_BTCUSDT_2024-01-17 17:59:5...
14    2024-01-17 18:00:00_BTCUSDT_2024-01-17 18:59:5...
15    2024-01-17 19:00:00_BTCUSDT_2024-01-17 19:59:5...
16    2024-01-17 20:00:00_BTCUSDT_2024-01-17 20:59:5...
17    2024-01-17 21:00:00_BTCUSDT_2024-01-17 21:

In [None]:
df = pd.read_sql_table('klines', con=conn)
df.head()