In [None]:
#한글 폴트 설치
!sudo apt-get install -y fonts-nanum
!sudo fc-cache -fv
!rm ~/.cache/matplotlib -rf
#설치후 런타임 재시작

In [None]:
import pandas as pd
import numpy as np
import os

import keras.layers as layers
import keras.optimizers as optimizers
from keras.models import Model, load_model
from keras import layers
from keras import models
from keras.models import load_model


import matplotlib as mpl
import matplotlib.pyplot as plt
import ipywidgets
#경고 제거
import warnings
warnings.filterwarnings("ignore")

plt.rc('font', family='NanumBarunGothic')

In [None]:
#파일 다운로드
!wget -N 'https://www.dropbox.com/scl/fi/p2ei1cmzjbkfsxbq96qy6/kospi.csv?rlkey=u9ti7r24gv1lr3omvasbljafa&dl=0' -O 'kospi.csv'

In [None]:
data = pd.read_csv('kospi.csv', sep=',', dtype='str')
data = data.sort_values('날짜')
data = data.set_index('날짜')

data = data.iloc[:,:1]
data['종가'] = data['종가'].str.replace(',', '').astype('float')
data.shape

In [None]:
TRAINING_DAYS = 20
ntraining = data.values
ntraining = ntraining.reshape([ntraining.shape[0], 1])

plt.plot(ntraining)
plt.show()

In [None]:

def convert_to_matrix(data, step):
    x, y = [], []
    for i in range(len(data) - step):
        d = i + step
        x.append(data[i:d,:])
        y.append(data[d, :].reshape(1,data.shape[1]))
    return np.array(x), np.array(y)

d_x, d_y = convert_to_matrix(ntraining[:,:], TRAINING_DAYS)
ntraining_x  = d_x.copy()
ntraining_y  = d_y.copy()

for idx in range(ntraining_x.shape[0]):
    criteria = ntraining_x[idx,0,0]
    ntraining_x[idx, :, :] = (ntraining_x[idx,:,:] - criteria) / criteria
    ntraining_y[idx] = (ntraining_y[idx] - criteria) / criteria

print(ntraining_x.shape)
print(ntraining_y.shape)

In [None]:
#학습 내용 출력용 함수
def display_result(idx = 0) :
  global ntraining_x, ntraining_y, data

  x = ntraining_x[idx,:,0].copy()
  y = np.full([ntraining_x.shape[1]-1], np.nan)
  y = np.append(y, [ntraining_x[idx, -1, 0], ntraining_y[idx, 0, 0]] )
  plt.plot(x, label='input')
  plt.plot(y, label='output')
  plt.legend()
  plt.title('date : ' + str(data.index[idx]))
  plt.show()


In [None]:
#위젯을 이용하여  학습 결과 출력하기
ipywidgets.interact(display_result, idx=(0, ntraining_x.shape[0], 1) )

In [None]:
np.random.seed(1)
inputs = layers.Input(shape=(ntraining_x.shape[1], 1))
net = layers.BatchNormalization()(inputs)
net = layers.SimpleRNN(2, activation='relu', use_bias=False)(net)
net = layers.Dense(4, activation='tanh')(net)
net = layers.Dropout(0.5)(net)
net = layers.Dense(1, activation='linear')(net)
model = Model(inputs, net)

model.compile(optimizer='adam', loss='mse')

model.summary()


In [None]:
results = model.fit(ntraining_x[:,:], ntraining_y[:,:], epochs=10)


In [None]:
new_model = Model(model.input,model.layers[2].output)
pred_rnn = new_model.predict(ntraining_x)

plt.scatter(x=pred_rnn[:,0], y=pred_rnn[:,1])
plt.show()

In [None]:
#flatten 값 클러스터링
from sklearn.cluster import KMeans
# from scipy.spatial import distance

kmeans = KMeans(n_clusters=20)
kmeans.fit(pred_rnn)
result_kmean = np.array(kmeans.labels_)
result_kmean

# dist_kmean = kmeans.cluster_centers_
# dist_kmean = distance.cdist(pred_rnn, dist_kmean, 'euclidean')

# dist_kmean

In [None]:
#클러스터링 별로 출력해 보기
def display_cluster(idx = 0) :
  global ntraining_x, ntraining_y, data, result_kmean, dist_kmean

  f_cls = np.where(result_kmean == idx)[0]

  # dist = dist_kmean[f_cls, idx] #거리 가져오기
  # temp = dist.argsort()
  # ranks = temp.argsort()


  # f_cls = np.where(ranks < 10)[0]

  f_cls = np.random.choice(f_cls, 15)

  fig = plt.figure(figsize=(20,12))
  for i in range(len(f_cls)) :
    ax = fig.add_subplot(int((len(f_cls)) / 5+1), 5, i+1)
    ax.axis('off')

    x = ntraining_x[f_cls[i],:,0].copy()
    y = np.full([ntraining_x.shape[1]-1], np.nan)
    y = np.append(y, [ntraining_x[f_cls[i], -1, 0], ntraining_y[f_cls[i], 0, 0]] )
    ax.plot(x, label='input')
    ax.plot( y, label='output')
    ax.legend()
    ax.set_title('date : ' + str(data.index[idx]))

  plt.show()



In [None]:
#위젯을 이용하여  학습 결과 출력하기
ipywidgets.interact(display_cluster, idx=(0, 20, 1) )

In [None]:
"""==============================
The requirements to use the cuDNN implementation are:

activation == tanh
recurrent_activation == sigmoid
recurrent_dropout == 0
unroll is False
use_bias is True
Inputs, if use masking, are strictly right-padded.
Eager execution is enabled in the outermost context.
================================"""

np.random.seed(1)
inputs = layers.Input(shape=(ntraining_x.shape[1], 1))
net = layers.BatchNormalization()(inputs)
net = layers.LSTM(2, activation='tanh', use_bias=True)(net)
net = layers.Dense(4, activation='tanh')(net)
net = layers.Dropout(0.5)(net)
net = layers.Dense(1, activation='linear')(net)
lstm_model = Model(inputs, net)

lstm_model.compile(optimizer='adam', loss='mse')

lstm_model.summary()


In [None]:
results = lstm_model.fit(ntraining_x[:,:], ntraining_y[:,:], epochs=10)


In [None]:
new_model = Model(lstm_model.input,lstm_model.layers[2].output)
pred_lstm = new_model.predict(ntraining_x)

plt.scatter(x=pred_lstm[:,0], y=pred_lstm[:,1])
plt.show()

In [None]:
#flatten 값 클러스터링
from sklearn.cluster import KMeans
# from scipy.spatial import distance

kmeans = KMeans(n_clusters=20)
kmeans.fit(pred_lstm)
result_kmean = np.array(kmeans.labels_)
result_kmean

# dist_kmean = kmeans.cluster_centers_
# dist_kmean = distance.cdist(pred_rnn, dist_kmean, 'euclidean')

# dist_kmean

In [None]:
#위젯을 이용하여  학습 결과 출력하기
ipywidgets.interact(display_cluster, idx=(0, 20, 1) )