In [1]:
def execute_query(db_connection, query_statement):
    try:
        cursor = db_connection.cursor()
        cursor.execute(query_statement)
        cursor.close()
        # commit the changes
        db_connection.commit()
    except (Exception, psycopg2.DatabaseError) as error:
        print(f'Error executing query: "{query_statement}": {error}')

In [2]:
import os
import psycopg2

hostname = os.environ.get('DB_HOSTNAME')
database = os.environ.get('DB_DATABASE')
username = os.environ.get('DB_USERNAME')
pwd = os.environ.get('DB_PWD')
port = int(os.environ.get('DB_PORT'))


conn = psycopg2.connect(host=hostname, dbname=database, user=username, password=pwd, port=port)



In [6]:
create_table_query = '''CREATE TABLE IF NOT EXISTS klines (
    id INTEGER PRIMARY KEY GENERATED ALWAYS AS IDENTITY,
    created_at TIMESTAMP DEFAULT NOW(),
    updated_at TIMESTAMP DEFAULT NOW(),
    ticker VARCHAR(10),
    open_time TIMESTAMP,
    open_price FLOAT,
    high_price FLOAT,
    low_price FLOAT,
    close_price FLOAT,
    volume FLOAT,
    close_time TIMESTAMP,
    quote_asset_volume FLOAT,
    number_of_trades INT,
    symbol VARCHAR(10)
);'''

execute_query(conn, create_table_query)

In [24]:
import pandas as pd

symbol = 'BTCUSDT'
input_path = f'../outputs/{symbol}_transformed.csv'

df = pd.read_csv(input_path)
df.head()
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24 entries, 0 to 23
Data columns (total 10 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   open_time           24 non-null     object 
 1   open_price          24 non-null     float64
 2   high_price          24 non-null     float64
 3   low_price           24 non-null     float64
 4   close_price         24 non-null     float64
 5   volume              24 non-null     float64
 6   close_time          24 non-null     object 
 7   quote_asset_volume  24 non-null     float64
 8   number_of_trades    24 non-null     int64  
 9   symbol              24 non-null     object 
dtypes: float64(6), int64(1), object(3)
memory usage: 2.0+ KB


In [32]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

df['pk'] = df['open_time'] + '_' + df['symbol'].str.lower() + '_' + df['close_time']
df.head()

Unnamed: 0,open_time,open_price,high_price,low_price,close_price,volume,close_time,quote_asset_volume,number_of_trades,symbol,pk
0,2024-01-17 04:00:00,42849.0,42921.1,42781.49,42854.01,791.4,2024-01-17 04:59:59.999,33911017.91,32838,BTCUSDT,2024-01-17 04:00:00_btcusdt_2024-01-17 04:59:59.999
1,2024-01-17 05:00:00,42854.01,42925.49,42765.41,42783.39,661.25,2024-01-17 05:59:59.999,28333635.53,30696,BTCUSDT,2024-01-17 05:00:00_btcusdt_2024-01-17 05:59:59.999
2,2024-01-17 06:00:00,42783.39,42876.34,42766.0,42806.05,802.28,2024-01-17 06:59:59.999,34360257.7,28881,BTCUSDT,2024-01-17 06:00:00_btcusdt_2024-01-17 06:59:59.999
3,2024-01-17 07:00:00,42806.05,42851.1,42619.6,42655.94,1357.31,2024-01-17 07:59:59.999,58019581.23,46737,BTCUSDT,2024-01-17 07:00:00_btcusdt_2024-01-17 07:59:59.999
4,2024-01-17 08:00:00,42655.95,42747.65,42564.0,42728.76,1188.19,2024-01-17 08:59:59.999,50705137.24,52602,BTCUSDT,2024-01-17 08:00:00_btcusdt_2024-01-17 08:59:59.999


In [44]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

db_pks_df= pd.read_sql_query("""select concat(open_time, '_', symbol, '_', close_time) as pk from klines""", conn)
db_pks_df.head()
db_pks_df.count()

  db_pks_df= pd.read_sql_query("""select concat(open_time, '_', symbol, '_', close_time) as pk from klines""", conn)


pk    21
dtype: int64

In [46]:
df['should_be_updated'] = df['pk'].isin(db_pks_df['pk'])
df.head()

Unnamed: 0,open_time,open_price,high_price,low_price,close_price,volume,close_time,quote_asset_volume,number_of_trades,symbol,pk,should_be_updated
0,2024-01-17 04:00:00,42849.0,42921.1,42781.49,42854.01,791.4,2024-01-17 04:59:59.999,33911017.91,32838,BTCUSDT,2024-01-17 04:00:00_btcusdt_2024-01-17 04:59:59.999,False
1,2024-01-17 05:00:00,42854.01,42925.49,42765.41,42783.39,661.25,2024-01-17 05:59:59.999,28333635.53,30696,BTCUSDT,2024-01-17 05:00:00_btcusdt_2024-01-17 05:59:59.999,False
2,2024-01-17 06:00:00,42783.39,42876.34,42766.0,42806.05,802.28,2024-01-17 06:59:59.999,34360257.7,28881,BTCUSDT,2024-01-17 06:00:00_btcusdt_2024-01-17 06:59:59.999,False
3,2024-01-17 07:00:00,42806.05,42851.1,42619.6,42655.94,1357.31,2024-01-17 07:59:59.999,58019581.23,46737,BTCUSDT,2024-01-17 07:00:00_btcusdt_2024-01-17 07:59:59.999,True
4,2024-01-17 08:00:00,42655.95,42747.65,42564.0,42728.76,1188.19,2024-01-17 08:59:59.999,50705137.24,52602,BTCUSDT,2024-01-17 08:00:00_btcusdt_2024-01-17 08:59:59.999,True


In [47]:
df.to_csv(f'../outputs/{symbol}_upload.csv', index=False)