In [7]:
import pandas as pd
import numpy as np
import glob
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras import layers
import matplotlib.pyplot as plt


In [8]:
def load_data(pattern, num_files):
    data_list = []
    for i in range(1, num_files + 1):
        file_path = f'../store_dataset/{pattern}/{pattern}_{i}.csv'
        df = pd.read_csv(file_path)
        data_list.append(df[['Open', 'High', 'Low', 'Close', 'Volume']])
    
    # 모든 데이터를 하나로 결합
    combined_data = pd.concat(data_list, axis=0)
    return combined_data

def preprocess_data(df, sequence_length=50):
    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(df)
    
    # 시계열 데이터 생성
    sequences = []
    for i in range(len(scaled_data) - sequence_length):
        sequence = scaled_data[i:i + sequence_length]
        sequences.append(sequence)
    
    return np.array(sequences), scaler


In [9]:
def build_generator(input_shape):
    model = tf.keras.Sequential()
    model.add(layers.LSTM(100, return_sequences=True, input_shape=input_shape))
    model.add(layers.LSTM(50, return_sequences=True))
    model.add(layers.TimeDistributed(layers.Dense(5)))  # Output shape matches the data dimensions
    return model

def build_discriminator(input_shape):
    model = tf.keras.Sequential()
    model.add(layers.LSTM(100, return_sequences=True, input_shape=input_shape))
    model.add(layers.LSTM(50))
    model.add(layers.Dense(1, activation='sigmoid'))  # Binary classification (Real or Fake)
    return model

def build_gan(generator, discriminator):
    discriminator.compile(loss='binary_crossentropy', optimizer='adam')
    discriminator.trainable = False

    gan_input = tf.keras.Input(shape=(None, generator.input_shape[-1]))
    generated_data = generator(gan_input)
    gan_output = discriminator(generated_data)

    gan = tf.keras.Model(gan_input, gan_output)
    gan.compile(loss='binary_crossentropy', optimizer='adam')

    return gan


In [10]:
def train_gan(generator, discriminator, gan, data, epochs=100, batch_size=32):
    for epoch in range(epochs):
        for _ in range(batch_size):
            # Real data
            idx = np.random.randint(0, data.shape[0], batch_size)
            real_data = data[idx]

            # Fake data
            noise = np.random.normal(0, 1, (batch_size, generator.input_shape[1], generator.input_shape[2]))
            fake_data = generator.predict(noise)

            # Labels for real and fake data
            real_labels = np.ones((batch_size, 1))
            fake_labels = np.zeros((batch_size, 1))

            # Train discriminator
            discriminator.trainable = True
            d_loss_real = discriminator.train_on_batch(real_data, real_labels)
            d_loss_fake = discriminator.train_on_batch(fake_data, fake_labels)
            d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

            # Train generator (via GAN model)
            discriminator.trainable = False
            g_loss = gan.train_on_batch(noise, real_labels)

        print(f'Epoch {epoch + 1}/{epochs} | D Loss: {d_loss} | G Loss: {g_loss}')


In [11]:
def detect_pattern(generator, real_data, threshold=0.5):
    noise = np.random.normal(0, 1, (real_data.shape[0], generator.input_shape[1], generator.input_shape[2]))
    generated_data = generator.predict(noise)

    # Compare real and generated data
    differences = np.abs(real_data - generated_data)
    detected = differences.mean(axis=1) < threshold  # Simple thresholding for detection

    return detected


In [12]:
# 여러 패턴에 대한 데이터 로드 및 전처리
patterns = ['double_top', 'double_bottom', 'ascending_triangle']  # 학습할 패턴 리스트
num_files_per_pattern = 3  # 각 패턴당 사용할 파일 수

all_sequences = []

for pattern in patterns:
    df = load_data(pattern, num_files_per_pattern)
    sequences, _ = preprocess_data(df)
    all_sequences.append(sequences)

# 모든 패턴의 시퀀스를 하나로 결합
all_sequences = np.vstack(all_sequences)

# 이후 동일한 GAN 모델 훈련 과정
sequence_length = all_sequences.shape[1]
input_shape = (sequence_length, all_sequences.shape[2])
generator = build_generator(input_shape)
discriminator = build_discriminator(input_shape)
gan = build_gan(generator, discriminator)

train_gan(generator, discriminator, gan, all_sequences, epochs=100, batch_size=32)


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 300ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1

KeyboardInterrupt: 