In [10]:
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import logging
import datetime
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Conv1D, MaxPooling1D, LSTM, Dense, Flatten, Dropout, Reshape

logging.info("모델 시작")
data = pd.read_csv("../data/bitcoin2023_t.csv")

In [11]:
print(df)

                      opening_price  high_price   low_price  trade_price   
candle_date_time_kst                                                       
2023-05-09 14:05:00      36994000.0  36995000.0  36973000.0   36975000.0  \
2023-05-09 14:00:00      37002000.0  37035000.0  36995000.0   36995000.0   
2023-05-09 13:55:00      37001000.0  37037000.0  37000000.0   37001000.0   
2023-05-09 13:50:00      37001000.0  37041000.0  37000000.0   37000000.0   
2023-05-09 13:45:00      37001000.0  37010000.0  37000000.0   37001000.0   
...                             ...         ...         ...          ...   
2017-09-25 12:20:00       4215000.0   4242000.0   4203000.0    4215000.0   
2017-09-25 12:15:00       4227000.0   4244000.0   4210000.0    4227000.0   
2017-09-25 12:10:00       4215000.0   4236000.0   4212000.0    4227000.0   
2017-09-25 12:05:00       4225000.0   4241000.0   4208000.0    4227000.0   
2017-09-25 12:00:00       4201000.0   4224000.0   4195000.0    4224000.0   

           

In [12]:
# 데이터 프레임으로 변환
df = pd.DataFrame(data)

# 데이터 전처리
df['candle_date_time_kst'] = pd.to_datetime(df['candle_date_time_kst'], format='%Y-%m-%dT%H:%M:%S')
df.set_index('candle_date_time_kst', inplace=True)

In [13]:
df.head()

Unnamed: 0_level_0,opening_price,high_price,low_price,trade_price,candle_acc_trade_price,candle_acc_trade_volume
candle_date_time_kst,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-05-09 14:05:00,36994000.0,36995000.0,36973000.0,36975000.0,577924362.6,15.624915
2023-05-09 14:00:00,37002000.0,37035000.0,36995000.0,36995000.0,778625951.1,21.043579
2023-05-09 13:55:00,37001000.0,37037000.0,37000000.0,37001000.0,347885140.1,9.398911
2023-05-09 13:50:00,37001000.0,37041000.0,37000000.0,37000000.0,588278143.7,15.89521
2023-05-09 13:45:00,37001000.0,37010000.0,37000000.0,37001000.0,654973275.7,17.701591


In [15]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Dropout, MaxPooling1D, Flatten, Reshape, LSTM, Dense
import datetime

# 데이터 정규화
scaler = MinMaxScaler()
scale_cols = ['opening_price', 'high_price', 'low_price', 'trade_price', 'candle_acc_trade_price', 'candle_acc_trade_volume']
scaled = scaler.fit_transform(df[scale_cols])

# 데이터셋 분리 비율
train_ratio = 0.7
val_ratio = 0.2
test_ratio = 0.1

# 학습 데이터와 테스트 데이터로 분리
test_data_size = int(len(df) * test_ratio)
train_val_data = df[:-test_data_size]
test_data = df[-test_data_size:]

# 학습 데이터와 검증 데이터로 분리
val_data_size = int(len(train_val_data) * val_ratio / (train_ratio + val_ratio))
train_data = train_val_data[:-val_data_size]
val_data = train_val_data[-val_data_size:]

# 데이터셋 생성 함수
def create_dataset(X, y, time_steps=1):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        Xs.append(X.iloc[i:(i + time_steps)].values)
        ys.append(y.iloc[i + time_steps])
    return np.array(Xs), np.array(ys)

n_timesteps = 24
# 학습 데이터셋 생성
X_train, y_train = create_dataset(train_data, train_data['trade_price'], n_timesteps)

# 검증 데이터셋 생성
X_val, y_val = create_dataset(val_data, val_data['trade_price'], n_timesteps)

# 테스트 데이터셋 생성
X_test, y_test = create_dataset(test_data, test_data['trade_price'], n_timesteps)

# CNN 모델과 LSTM 모델
n_features = len(scale_cols)

model = Sequential()
model.add(Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(n_timesteps, n_features)))
model.add(Dropout(0.025))
model.add(Conv1D(filters=32, kernel_size=3, activation='relu'))
model.add(Dropout(0.025))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Reshape((-1, 64)))
model.add(LSTM(50, activation='relu'))
model.add(Dense(1))

model.compile(loss='mse', optimizer='adam')

# 모델 학습하기
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=1, batch_size=32)

# 모델 평가하기
score = model.evaluate(X_test, y_test)

# 모델 예측하기
y_pred = model.predict(X_test)

last_sequence = scaled[-n_timesteps:]
last_sequence = np.expand_dims(last_sequence, axis=0)
future_pred = model.predict(last_sequence)

# 미래 날짜 계산
current_date = df.index[-1].to_pydatetime()
future_dates = [current_date + datetime.timedelta(days=i+1) for i in range(30)]

# 예측 결과를 저장할 딕셔너리
y_pred_dict = {
    'dateday': future_dates,
    'prediction': future_pred.flatten().tolist()
}

print(y_pred_dict)


{'dateday': [datetime.datetime(2017, 9, 26, 12, 0), datetime.datetime(2017, 9, 27, 12, 0), datetime.datetime(2017, 9, 28, 12, 0), datetime.datetime(2017, 9, 29, 12, 0), datetime.datetime(2017, 9, 30, 12, 0), datetime.datetime(2017, 10, 1, 12, 0), datetime.datetime(2017, 10, 2, 12, 0), datetime.datetime(2017, 10, 3, 12, 0), datetime.datetime(2017, 10, 4, 12, 0), datetime.datetime(2017, 10, 5, 12, 0), datetime.datetime(2017, 10, 6, 12, 0), datetime.datetime(2017, 10, 7, 12, 0), datetime.datetime(2017, 10, 8, 12, 0), datetime.datetime(2017, 10, 9, 12, 0), datetime.datetime(2017, 10, 10, 12, 0), datetime.datetime(2017, 10, 11, 12, 0), datetime.datetime(2017, 10, 12, 12, 0), datetime.datetime(2017, 10, 13, 12, 0), datetime.datetime(2017, 10, 14, 12, 0), datetime.datetime(2017, 10, 15, 12, 0), datetime.datetime(2017, 10, 16, 12, 0), datetime.datetime(2017, 10, 17, 12, 0), datetime.datetime(2017, 10, 18, 12, 0), datetime.datetime(2017, 10, 19, 12, 0), datetime.datetime(2017, 10, 20, 12, 0), d

In [27]:
import pymysql

def mysql_connection(y_pred_dict):
    # MySql 연결 설정
    connection = pymysql.connect(
        host='localhost',
        user='root',
        password='1234',
        database='bitcoin'
    )

    # 커서 생성
    cursor = connection.cursor()

    # 데이터 삽입 쿼리
    sql = "INSERT INTO predictions (dateday, prediction) VALUES (%s, %s)"

    # 데이터 삽입 실행
    for date, pred in zip(y_pred_dict['dateday'], y_pred_dict['prediction']):
        cursor.execute(sql, (date, pred))

    # 변경 사항 저장
    connection.commit()

    # 연결 종료
    cursor.close()
    connection.close()


# 데이터베이스에 삽입
mysql_connection(y_pred_dict)

In [26]:
y_pred_dict = model_prediction()
mysql_connection(y_pred_dict)

FileNotFoundError: [Errno 2] No such file or directory: './data/bitcoin2023_t.csv'

In [23]:
mysql_connection(y_pred_dict)

FileNotFoundError: [Errno 2] No such file or directory: './data/bitcoin2023_t.csv'

In [17]:
def collect_and_predict():
    result = model_prediction()  # model_prediction 함수의 반환값 저장
    if result.empty:
        logging.info("데이터가 비어있습니다.")
        return
    last_dateday = result.index[-1]  # 최신 데이터의 dateday 값
    prediction_value = result['prediction'].iloc[0]  # 최신 예측 값

    y_pred = {'dateday': last_dateday, 'prediction': prediction_value}  # 딕셔너리 생성

    mysql_connection(y_pred)  # 데이터 삽입