In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import logging
import datetime
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Conv1D, MaxPooling1D, LSTM, Dense, Flatten, Dropout, Reshape

logging.info("모델 시작")
data = pd.read_csv("../flask/data.csv")

In [2]:
# 데이터 프레임으로 변환
df = pd.DataFrame(data)

# 데이터 전처리
df['candle_date_time_kst'] = pd.to_datetime(df['candle_date_time_kst'], format='%Y-%m-%dT%H:%M:%S')
df.set_index('candle_date_time_kst', inplace=True)

In [3]:
df.head()

Unnamed: 0_level_0,opening_price,high_price,low_price,trade_price,candle_acc_trade_price,candle_acc_trade_volume
candle_date_time_kst,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-05-23 15:06:00,36411000.0,36412000.0,36410000.0,36411000.0,982987.2,0.026997
2023-05-23 15:06:00,36411000.0,36412000.0,36408000.0,36410000.0,7743630.0,0.212684
2023-05-23 15:06:00,36411000.0,36412000.0,36408000.0,36410000.0,7878793.0,0.216396
2023-05-23 15:06:00,36411000.0,36421000.0,36408000.0,36409000.0,20422770.0,0.560879
2023-05-23 15:06:00,36411000.0,36428000.0,36408000.0,36425000.0,46700440.0,1.282337


In [4]:
# 데이터 프레임 확인
print(df.head())  # 처음 몇 개의 행 출력
print(df.info())  # 데이터 프레임의 구조 및 열의 정보 출력

                      opening_price  high_price   low_price  trade_price   
candle_date_time_kst                                                       
2023-05-23 15:06:00      36411000.0  36412000.0  36410000.0   36411000.0  \
2023-05-23 15:06:00      36411000.0  36412000.0  36408000.0   36410000.0   
2023-05-23 15:06:00      36411000.0  36412000.0  36408000.0   36410000.0   
2023-05-23 15:06:00      36411000.0  36421000.0  36408000.0   36409000.0   
2023-05-23 15:06:00      36411000.0  36428000.0  36408000.0   36425000.0   

                      candle_acc_trade_price  candle_acc_trade_volume  
candle_date_time_kst                                                   
2023-05-23 15:06:00             9.829872e+05                 0.026997  
2023-05-23 15:06:00             7.743630e+06                 0.212684  
2023-05-23 15:06:00             7.878793e+06                 0.216396  
2023-05-23 15:06:00             2.042277e+07                 0.560879  
2023-05-23 15:06:00             4.6

In [5]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Dropout, MaxPooling1D, Flatten, Reshape, LSTM, Dense
import datetime

# 데이터 정규화
scaler = MinMaxScaler()
scale_cols = ['opening_price', 'high_price', 'low_price', 'trade_price', 'candle_acc_trade_price', 'candle_acc_trade_volume']
scaled = scaler.fit_transform(df[scale_cols])

# 데이터셋 생성
def create_dataset(data, target, time_steps=1):
    X, y = [], []
    for i in range(len(data) - time_steps):
        X.append(data[i:(i + time_steps)])
        y.append(target[i + time_steps])
    return np.array(X), np.array(y)



# 데이터 분할 비율
train_ratio = 0.6  # 학습 데이터 비율을 60%로 설정
val_ratio = 0.2    # 검증 데이터 비율을 20%로 설정
test_ratio = 0.2   # 테스트 데이터 비율을 20%로 설정

# 학습 데이터와 테스트 데이터 분할
test_data_size = int(len(scaled) * test_ratio)
train_val_data = scaled[:-test_data_size]
test_data = scaled[-test_data_size:]

# 학습 데이터와 검증 데이터 분할
val_data_size = int(len(train_val_data) * val_ratio / (train_ratio + val_ratio))
train_data = train_val_data[:-val_data_size]
val_data = train_val_data[-val_data_size:]

n_timesteps = 4

# 학습 데이터셋 생성
X_train, y_train = create_dataset(train_data, train_data[:, 3], n_timesteps)

# 검증 데이터셋 생성
X_val, y_val = create_dataset(val_data, val_data[:, 3], n_timesteps)

# 테스트 데이터셋 생성
X_test, y_test = create_dataset(test_data, test_data[:, 3], n_timesteps)

In [6]:
# 학습 데이터셋 생성
X_train, y_train = create_dataset(train_data, train_data[:, 3], n_timesteps)
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)

# 검증 데이터셋 생성
X_val, y_val = create_dataset(val_data, val_data[:, 3], n_timesteps)
print("X_val shape:", X_val.shape)
print("y_val shape:", y_val.shape)

# 테스트 데이터셋 생성
X_test, y_test = create_dataset(test_data, test_data[:, 3], n_timesteps)
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)


X_train shape: (14, 4, 6)
y_train shape: (14,)
X_val shape: (2, 4, 6)
y_val shape: (2,)
X_test shape: (2, 4, 6)
y_test shape: (2,)


In [7]:
n_features = len(scale_cols)

model = Sequential()
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(n_timesteps, n_features)))
model.add(Dropout(0.025))
model.add(Conv1D(filters=64, kernel_size=2, activation='relu'))
model.add(Dropout(0.025))
model.add(Flatten())
model.add(Dense(64, activation='relu'))  # Add Dense layer to match the number of features
model.add(Reshape((-1, 64)))
model.add(LSTM(50, activation='relu'))
model.add(Dense(1))

model.compile(loss='mse', optimizer='adam')

# 모델 학습하기
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=100, batch_size=32)

# 모델 평가하기
score = model.evaluate(X_test, y_test)

# 모델 예측하기
y_pred = model.predict(X_test)

last_sequence = scaled[-n_timesteps:]
last_sequence = np.expand_dims(last_sequence, axis=0)
#future_pred = model.predict(last_sequence)




Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [17]:
def reverse_min_max_scaling(org_x, x):
    org_x_np = np.asarray(org_x)
    x_np = np.asarray(x)
    return (x_np * (org_x_np.max() - org_x_np.min() + 1e-7)) + org_x_np.min()
# 예측 결과 역변환
y_pred_reshaped = np.reshape(y_pred, (-1, 1))
y_pred_original = reverse_min_max_scaling(df['trade_price'], y_pred_reshaped)


TypeError: 'float' object is not iterable

In [23]:
# 미래 날짜 계산
current_date = df.index[-1].to_pydatetime()
future_dates = [current_date + datetime.timedelta(hours=i+1) for i in range(1)]

# 예측 결과를 저장할 딕셔너리
y_pred_dict = {
    'dateday': future_dates,
    'prediction': [y_pred_original.flatten()[0]]
}


print(y_pred_dict)

{'dateday': [datetime.datetime(2023, 5, 23, 16, 8)], 'prediction': [36399610.0]}


In [24]:
import pymysql

def mysql_connection(y_pred_dict):
    # MySql 연결 설정
    connection = pymysql.connect(
        host='localhost',
        user='root',
        password='1234',
        database='bitcoin'
    )

    # 커서 생성
    cursor = connection.cursor()

    # 데이터 삽입 쿼리
    sql = "INSERT INTO predictions (dateday, prediction) VALUES (%s, %s)"

    # 데이터 삽입 실행
    for date, pred in zip(y_pred_dict['dateday'], y_pred_dict['prediction']):
        cursor.execute(sql, (date, pred))

    # 변경 사항 저장
    connection.commit()

    # 연결 종료
    cursor.close()
    connection.close()


# 데이터베이스에 삽입
mysql_connection(y_pred_dict)

In [25]:
y_pred_dict = model_prediction()
mysql_connection(y_pred_dict)

NameError: name 'model_prediction' is not defined

In [23]:
mysql_connection(y_pred_dict)

FileNotFoundError: [Errno 2] No such file or directory: './data/bitcoin2023_t.csv'

In [17]:
def collect_and_predict():
    result = model_prediction()  # model_prediction 함수의 반환값 저장
    if result.empty:
        logging.info("데이터가 비어있습니다.")
        return
    last_dateday = result.index[-1]  # 최신 데이터의 dateday 값
    prediction_value = result['prediction'].iloc[0]  # 최신 예측 값

    y_pred = {'dateday': last_dateday, 'prediction': prediction_value}  # 딕셔너리 생성

    mysql_connection(y_pred)  # 데이터 삽입