In [None]:
AWS_ACCESS_KEY_ID = None
AWS_SECRET_ACCESS_KEY = None
EC2_PUBLIC_IP = None
SNOWFLAKE_ACCOUNT = None
SNOWFLAKE_DATABASE = None
SNOWFLAKE_PASSWORD = None
SNOWFLAKE_SCHEMA = None
SNOWFLAKE_USER = None
SNOWFLAKE_WAREHOUSE = None
target_date = None
ref_time_str = None

In [None]:
import pandas as pd
import numpy as np
import boto3
import psycopg2
import snowflake.connector
from datetime import datetime, timedelta
import botocore.exceptions
import lightgbm as lgb
import json
from snowflake.connector.pandas_tools import write_pandas
import requests

In [None]:
aws_access_key_id = AWS_ACCESS_KEY_ID
aws_secret_access_key = AWS_SECRET_ACCESS_KEY

s3_client = boto3.client(
    's3',
    aws_access_key_id=aws_access_key_id,
    aws_secret_access_key=aws_secret_access_key,
    region_name='us-east-1'
)

bucket_name = "de6-team7-bucket"

In [None]:
def try_convert(s, fmt="%Y-%m-%d %H:%M:%S"):
    try:
        dt = pd.to_datetime(s, errors='raise')  # 실패하면 예외 발생
        return dt.strftime(fmt)
    except:
        return pd.DataFrame()

def load_recent_minute_data() -> pd.DataFrame:
    # ref_time 기준 60분 전부터 조회되도록 쿼리
    url = f"http://" + EC2_PUBLIC_IP + ":8080/query"

    ref_time = datetime.strptime(ref_time_str, "%H:%M")
    start_time = (ref_time - timedelta(hours=1)).strftime("%H:%M")
    end_time = ref_time.strftime("%H:%M")
    query = {
        "sql": f"""
            SELECT *
            FROM raw_data.btc_derived_minute
            WHERE trade_time_min >= '{start_time}'
              AND trade_time_min < '{end_time}'
            ORDER BY trade_time_min
        """
    }

    response = requests.post(url, json=query)
    data = response.json()

    df = pd.DataFrame(data, columns=['trade_dt', 'trade_time_min', 'market', 'open_price', 'close_price', 'high_price', 'low_price', 'avg_price', 'cum_volume', 'cum_amount', 'ma_short', 'ma_mid', 'ma_long', 'bollinger_upper', 'bollinger_lower', 'bollinger_middle', 'rsi', 'macd', 'macd_signal', 'macd_histogram', 'obv', 'trading_strength_avg', 'buy_volume', 'sell_volume', 'spread', 'depth_imbalance'])
    df = df[['trade_time_min', 'market', 'avg_price', 'cum_volume', 'spread', 'ma_mid', 'bollinger_upper', 'bollinger_lower', 'rsi', 'obv', 'macd_histogram', 'trading_strength_avg', 'depth_imbalance']]
    df['trade_time_min'] = target_date + " " + df['trade_time_min'].astype(str)
    df['trade_time_min'] = df.apply(lambda row: try_convert(row['trade_time_min']), axis=1)
    #df.rename(columns={'trade_time_min': 'prediction_time'}, inplace=True)

    return df

In [None]:
def make_feature(df):
    try:
        if df.empty:
            print("Empty dataframe")
            return
        df.sort_values(by=['trade_time_min', 'market'], inplace=True)
        df['market'] = df['market'].astype('category')
        df['bollinger_width'] = df['bollinger_upper'] - df['bollinger_lower']
        df.drop(columns=['bollinger_upper', 'bollinger_lower'], axis=1, inplace=True)
        return df
    except Exception as e:
        print(e)
        return pd.DataFrame()

In [None]:
def check_booster_exists(bucket_name: str, s3_key: str) -> bool:
    try:
        s3_client.head_object(Bucket=bucket_name, Key=s3_key)
        return True  # 파일이 존재함
    except botocore.exceptions.ClientError as e:
        if e.response['Error']['Code'] == '404':
            return False  # 파일이 존재하지 않음
        else:
            raise  # 다른 예외는 그대로 raise

In [None]:
def load_booster(model_type='clf'):
    try:
        model_path = "/tmp/"
        if model_type == 'reg':
            booster_name = "btc_reg_booster.txt"
            params_name = "btc_reg_params.json"
        else:
            booster_name = "btc_clf_booster.txt"
            params_name = "btc_clf_params.json"

        s3_booster_key = f'models/{booster_name}'
        s3_params_key = f'models/{params_name}'

        if check_booster_exists(bucket_name, s3_booster_key):
            s3_client.download_file(bucket_name, s3_booster_key, model_path+booster_name)  # booster download
            s3_client.download_file(bucket_name, s3_params_key, model_path+params_name)  # params download
            booster = lgb.Booster(model_file=model_path+booster_name)
            with open(model_path+params_name, 'r') as f:
                params = json.load(f)
        else:
            raise
        return booster, params
    except Exception as e:
        raise

In [None]:
def predict_and_save(df):
    if df.empty:
        print("Empty dataframe")
        return
    feature_cols = df.columns.difference(['trade_time_min'])

    clf_booster, clf_params = load_booster()
    reg_booster, reg_params = load_booster('reg')

    pred_price = reg_booster.predict(df[feature_cols])
    pred_up = clf_booster.predict(df[feature_cols])
    pred_up = np.argmax(pred_up, axis=1)
    df['future_avg_price'] = pred_price
    df['target_direction'] = pred_up
    df['future_return'] = (df['future_avg_price'] - df['avg_price']) / df['avg_price']

    upload_to_snowflake(df, "BTC_PREDICTION_TABLE", target_date)
    return

In [None]:
# Snowflake 연결
def snowflake_connection():
    return snowflake.connector.connect(
        user=SNOWFLAKE_USER,
        password=SNOWFLAKE_PASSWORD,
        account=SNOWFLAKE_ACCOUNT,
        warehouse=SNOWFLAKE_WAREHOUSE,
        database=SNOWFLAKE_DATABASE,
        schema=SNOWFLAKE_SCHEMA
    )


# Snowflake 업로드
def upload_to_snowflake(df, table_name, target_dt):
    if df.empty:
        print("업로드할 데이터 없음")
        return
    df.columns = [col.strip().upper().replace(' ', '_') for col in df.columns]

    conn = None
    try:
        print(f"\n--- Snowflake에 결과 적재 시작 ---")
        print(f"대상 테이블: {table_name}")
        conn = snowflake_connection()
        if conn:
            cursor = conn.cursor()

            # DataFrame 적재
            print(f"\n--- DataFrame을 본 테이블로 직접 적재 ---")
            write_pandas(conn=conn, df=df, table_name=table_name.upper(), auto_create_table=False, overwrite=False)
            print(f"✅ Snowflake 테이블 적재 성공: {df.shape[0]} 행")

            cursor.close()
        else:
            print("🚨 Snowflake 연결 실패, 업로드를 건너뜁니다.")
    except Exception as e:
        print(f"🚨 Snowflake 적재 오류: {e}")
    finally:
        if conn is not None:
            conn.close()

In [None]:
if __name__ == "__main__":
    df = load_recent_minute_data()
    df = make_feature(df)
    predict_and_save(df)


--- Snowflake에 결과 적재 시작 ---
대상 테이블: BTC_PREDICTION_TABLE

--- DataFrame을 본 테이블로 직접 적재 ---


  write_pandas(conn=conn, df=df, table_name=table_name.upper(), auto_create_table=False, overwrite=False)


✅ Snowflake 테이블 적재 성공: 7579 행
