In [2]:
import tensorflow as tf
import pandas as pd
import numpy as np
import requests
import matplotlib.pyplot as plt
import pyupbit
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Dense, Flatten, Dropout, Reshape
import matplotlib

In [18]:
pip install numpy pandas tensorflow

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.3.1 -> 23.1.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
import schedule
import time

def upbit_api():
    # 함수 내용을 작성합니다.
    url = "https://api.upbit.com/v1/candles/minutes/1"
    params = {
        "market": "KRW-BTC",
        "count": 30
    }
    response = requests.get(url, params=params)
    data = response.json()
    return data

In [5]:
data = upbit_api()
print(data)

[{'market': 'KRW-BTC', 'candle_date_time_utc': '2023-05-17T08:10:00', 'candle_date_time_kst': '2023-05-17T17:10:00', 'opening_price': 36216000.0, 'high_price': 36220000.0, 'low_price': 36180000.0, 'trade_price': 36180000.0, 'timestamp': 1684311052409, 'candle_acc_trade_price': 384628272.31734, 'candle_acc_trade_volume': 10.62556036, 'unit': 1}, {'market': 'KRW-BTC', 'candle_date_time_utc': '2023-05-17T08:09:00', 'candle_date_time_kst': '2023-05-17T17:09:00', 'opening_price': 36222000.0, 'high_price': 36224000.0, 'low_price': 36215000.0, 'trade_price': 36215000.0, 'timestamp': 1684310999049, 'candle_acc_trade_price': 54568468.04949, 'candle_acc_trade_volume': 1.50659314, 'unit': 1}, {'market': 'KRW-BTC', 'candle_date_time_utc': '2023-05-17T08:08:00', 'candle_date_time_kst': '2023-05-17T17:08:00', 'opening_price': 36207000.0, 'high_price': 36223000.0, 'low_price': 36205000.0, 'trade_price': 36222000.0, 'timestamp': 1684310939976, 'candle_acc_trade_price': 88111662.10678, 'candle_acc_trad

In [10]:
def model_prediction(data):
    # 데이터 가져오기
    data = upbit_api()

    # 데이터프레임으로 변환
    df = pd.DataFrame(data)

    # 데이터 전처리
    df['candle_date_time_kst'] = pd.to_datetime(df['candle_date_time_kst']) #datetime 변환
    df.set_index('candle_date_time_kst', inplace=True)

    # 결측치 처리
    df = df.asfreq('60T') #60 간격마다 보관 시계열이용하기위한
    df = df.interpolate(method='linear')
    df = df.fillna(method='ffill')

    # 이상치 제거
    df = df[(np.abs(df['trade_price']) < 3*df['trade_price'].std()) & 
                (np.abs(df['candle_acc_trade_price']) < 3*df['candle_acc_trade_price'].std())]

    # 데이터 정규화
    scaler = MinMaxScaler()
    scale_cols = ['opening_price', 'high_price', 'low_price', 'trade_price', 'candle_acc_trade_price', 'candle_acc_trade_volume']
    scaled = scaler.fit_transform(df[['opening_price', 'high_price', 'low_price', 'trade_price', 'candle_acc_trade_price', 'candle_acc_trade_volume']])

    # 데이터 셋 만들기
    df = pd.DataFrame(scaled, columns=scale_cols)

    # 데이터셋 분리 비율
    train_ratio = 0.7
    val_ratio = 0.2
    test_ratio = 0.1

    # 학습 데이터와 테스트 데이터로 분리
    test_data_size = int(len(df) * test_ratio)
    train_val_data = df[:-test_data_size]
    test_data = df[-test_data_size:]

    # 학습 데이터와 검증 데이터로 분리
    val_data_size = int(len(train_val_data) * val_ratio / (train_ratio + val_ratio))
    train_data = train_val_data[:-val_data_size]
    val_data = train_val_data[-val_data_size:]

    # 데이터셋 생성 함수
    def create_dataset(X, y, time_steps=1):
        Xs, ys = [], []
        for i in range(len(X) - time_steps):
            Xs.append(X.iloc[i:(i + time_steps)].values)
            ys.append(y.iloc[i + time_steps])
        return np.array(Xs), np.array(ys)

    n_timesteps = 24
    # 학습 데이터셋 생성
    X_train, y_train = create_dataset(train_data, train_data['trade_price'], n_timesteps)

    # 검증 데이터셋 생성
    X_val, y_val = create_dataset(val_data, val_data['trade_price'], n_timesteps)

    # 테스트 데이터셋 생성
    X_test, y_test = create_dataset(test_data, test_data['trade_price'], n_timesteps)

    # CNN 모델과 LSTM 모델
    n_features = len(scale_cols)

    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(n_timesteps, n_features)))
    model.add(Dropout(0.025))
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
    model.add(Dropout(0.025))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Reshape((-1, 64)))
    model.add(LSTM(100, activation='relu'))
    model.add(Dense(1))

    model.compile(loss='mse', optimizer='adam')

    # 모델 학습하기
    history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=100, batch_size=32)

    # 모델 평가하기
    score = model.evaluate(X_test, y_test)

    # 모델 예측하기
    y_pred = model.predict(X_test)

    # 모델 저장하기
    model.save("./save.h5")
    #print("학습 모델 저장")

    last_sequence = scaled[-n_timesteps:]
    last_sequence = np.expand_dims(last_sequence, axis=0)
    y_pred = model.predict(last_sequence)

In [16]:
y_pred = model_prediction(data)
print(y_pred)

ValueError: Found array with 0 sample(s) (shape=(0, 6)) while a minimum of 1 is required by MinMaxScaler.