# CNN - fashion_mnist

In [None]:
from tensorflow import keras
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np

In [None]:
(train_input, train_target), (test_input, test_target) = keras.datasets.fashion_mnist.load_data()

In [None]:
train_input.shape, test_input.shape    # 6만개 28*28

In [None]:
train_target.shape, test_target.shape

In [None]:
plt.imshow(train_input[0], cmap='gray_r')

In [None]:
# CNN : 사진 데이터
# 데이터 전처리

train_scaled = train_input.reshape(-1, 28, 28, 1) / 255    # reshape(-1자동,가로,세로,채널)
test_scaled = test_input.reshape(-1, 28, 28, 1) / 255

In [None]:
train_scaled.shape    # 1채널 # RGB사진이면 3

In [None]:
train_scaled, val_scaled, train_target, val_target = train_test_split(train_scaled, train_target, test_size = 0.2)  # 검증데이터 

In [None]:
model = keras.Sequential()    # 인공신경망을 그리기위한 도화지

model.add(keras.layers.Conv2D(32, kernel_size=(3,3), activation='relu', padding='same', input_shape=(28, 28, 1)))    # 한 사진으로 32개 특징사진 생성  # 커널사이즈 3*3필터  # same패딩  # input_shape
model.add(keras.layers.MaxPooling2D((2,2)))    # MaxPooling2D  # 특징 잡으면서 크기 줄이기 - 속도향상  # (2,2) = 2    # 28*28 -> 14*14

model.add(keras.layers.Flatten())    # 1차원으로 늘이기
model.add(keras.layers.Dense(100, activation = 'relu'))    # 은닉층
model.add(keras.layers.Dropout(0.4))    # 드롭아웃
model.add(keras.layers.Dense(10, activation = 'softmax'))

# model.compile(loss = 'sparse_categorical_crossentropy', metrics = 'accuracy')
model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = 'accuracy')

checkpoint = keras.callbacks.ModelCheckpoint('best.h5')

early = keras.callbacks.EarlyStopping(patience=4, restore_best_weights=True)

history = model.fit(train_scaled, train_target,
                    epochs = 100,
                    validation_data = (val_scaled, val_target),
                    callbacks = [checkpoint, early])

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.legend(['loss',' val_loss'])

In [None]:
model.summary()

In [None]:
keras.utils.plot_model(model)

In [None]:
model1 = keras.Sequential()                                                                                                          # 인공신경망을 그리기위한 도화지

model1.add(keras.layers.Conv2D(32, kernel_size=(3,3), activation='relu', padding='same', input_shape=(28, 28, 1)))    # 한 사진으로 32개 특징사진 생성  # 커널사이즈 3*3필터  # same패딩  # input_shape
model1.add(keras.layers.MaxPooling2D((2,2)))    # MaxPooling2D  # 특징 잡으면서 크기 줄이기 - 속도향상  # (2,2) = 2  # 28*28 -> 14*14

model1.add(keras.layers.Flatten())    # 1차원으로 늘이기
model1.add(keras.layers.Dense(100, activation = 'relu'))    # 은닉층
model1.add(keras.layers.Dropout(0.4))    # 드롭아웃
model1.add(keras.layers.Dense(10, activation = 'softmax'))

model1.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = 'accuracy')

checkpoint = keras.callbacks.ModelCheckpoint('best.h5')

early = keras.callbacks.EarlyStopping(patience=4, restore_best_weights=True)

history = model1.fit(train_scaled, train_target,
                    epochs = 100,
                    validation_data = (val_scaled, val_target),
                    callbacks = [checkpoint, early])

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.legend(['loss',' val_loss'])

## 컨벌루전 층 추가

In [None]:
(train_input, train_target), (test_input, test_target) = keras.datasets.fashion_mnist.load_data()

train_scaled = train_input.reshape(-1, 28, 28, 1) / 255 
test_scaled = test_input.reshape(-1, 28, 28, 1) / 255

train_scaled, val_scaled, train_target, val_target = train_test_split(train_scaled, train_target, test_size = 0.2)

model = keras.Sequential() # 인공신경망을 그리기위한 도화지
model.add(keras.layers.Conv2D(32, kernel_size=(3,3), activation = 'relu', padding = 'same', input_shape = (28, 28, 1)))
model.add(keras.layers.MaxPooling2D(2)) # 특징이 잡힌상태로 줄어들기 때문에 속도가 훨씬 빨라집니다.
model.add(keras.layers.Conv2D(64, kernel_size=(3,3), activation = 'relu', padding = 'same'))
model.add(keras.layers.MaxPooling2D(2))

model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(100, activation = 'relu'))
model.add(keras.layers.Dropout(0.4))
model.add(keras.layers.Dense(10, activation = 'softmax'))

model.compile(loss = 'sparse_categorical_crossentropy', metrics = 'accuracy')

checkpoint = keras.callbacks.ModelCheckpoint('best.h5')
early = keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)

history = model.fit(train_scaled, train_target,
                    epochs = 100,
                    validation_data = (val_scaled, val_target),
                    callbacks = [checkpoint, early])

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.legend(['loss',' val_loss'])

# RNN - 주가 크롤링 + 예측


In [2]:
import pandas as pd    # 데이터프레임, 파일 읽기/저장
import numpy as np
import time            # 시간 조절
from tqdm import tqdm    # for문의 진행상황 확인


In [None]:

import requests        # 요청 라이브러리
from bs4 import BeautifulSoup    # 파이썬 HTML을 다루는 라이브러리

header = {'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5)'}

# 네이버금융 - 종목 선택 - 시세 - 주소창 : sise.naver -> sise_day.naver
res = requests.get("https://finance.naver.com/item/sise_day.naver?code=035720&page=1", headers = header) # 카카오
html = BeautifulSoup(res.text)

# table = html.select_one("table.type2")    # 테이블 확인용
# df = pd.read_html(str(table))[0]
# df = df[ df['날짜'].notnull() ]

last_page = int( html.select_one("td.pgRR > a")["href"].split("=")[-1] )
last_page

total = []

for n in tqdm(range(1, last_page + 1)):

    res = requests.get(f"https://finance.naver.com/item/sise_day.naver?code=035720&page={n}", headers= header)
    html = BeautifulSoup(res.text)

    table = html.select_one("table.type2")
    table = pd.read_html(str(table))[0]
    table = table[ table['날짜'].notnull() ]

    total.append(table)
    time.sleep(0.2)

df = pd.concat(total, ignore_index=True)    # 모든 표 합치기

df = df[::-1]    # 데이터프레임 역순으로 (날짜 오름차순)

df.to_excel('카카오 주가.xlsx')

In [3]:
df = pd.read_excel("카카오 주가 2020-05-10.xlsx", index_col=0)
dfx = df[['종가', '시가', '고가', '저가', '거래량']]
dfx

Unnamed: 0,종가,시가,고가,저가,거래량
5519,11200,11200,11200,11200,12
5518,14000,14000,14000,14000,405
5517,15650,15650,15650,15650,214
5516,17500,17500,17500,17500,191
5515,19600,19600,19600,19600,772
...,...,...,...,...,...
5,88500,87300,89200,87000,1435047
4,89000,88600,91900,88300,1865993
3,84300,86500,86600,84100,3592237
2,84100,84000,85000,82800,1533542


In [4]:
dfx_arr = dfx.to_numpy()    # 문제지 행렬화
dfx_arr

array([[  11200,   11200,   11200,   11200,      12],
       [  14000,   14000,   14000,   14000,     405],
       [  15650,   15650,   15650,   15650,     214],
       ...,
       [  84300,   86500,   86600,   84100, 3592237],
       [  84100,   84000,   85000,   82800, 1533542],
       [  83600,   82000,   84300,   81900, 2390050]], dtype=int64)

In [5]:
dfx_arr.shape

(5519, 5)

In [6]:
# 0~1 범위로 문제지 정규화
from sklearn.preprocessing import MinMaxScaler

a = MinMaxScaler()
a.fit(dfx_arr)
dfx_arr = a.transform(dfx_arr)
dfx_arr    # 문제지

array([[0.00000000e+00, 2.02166065e-02, 1.99643494e-02, 2.03266788e-02,
        6.35083673e-07],
       [5.12070227e-03, 2.52707581e-02, 2.49554367e-02, 2.54083485e-02,
        2.14340740e-05],
       [8.13825896e-03, 2.82490975e-02, 2.78966132e-02, 2.84029038e-02,
        1.13256588e-05],
       ...,
       [1.33686906e-01, 1.56137184e-01, 1.54367201e-01, 1.52631579e-01,
        1.90114256e-01],
       [1.33321141e-01, 1.51624549e-01, 1.51515152e-01, 1.50272232e-01,
        8.11606239e-02],
       [1.32406730e-01, 1.48014440e-01, 1.50267380e-01, 1.48638838e-01,
        1.26490144e-01]])

In [9]:
dfy_arr = dfx_arr[:, 0]    # 모든 행에 대해서 0번째 열 (종가) - to_numpy()로 행렬화 되어있는 경우 df[행인덱스, 열인덱스] 로 접근하기 때문
dfy_arr    # 정답지

array([0.        , 0.0051207 , 0.00813826, ..., 0.13368691, 0.13332114,
       0.13240673])

In [11]:
# 일주일 데이터로 다음날 종가 예측을 위한 데이터셋 생성
# 문제집 : 1/1 ~ 1/10
# 정답지 : 1/11의 종가

data_x = []
data_y = []

size = 10    # sequence length
for i in range(len(dfx_arr) - size):
    data_x.append( dfx_arr[i : i + size] )
    data_y.append( dfy_arr[i + size] )

In [None]:
# 훈련데이터 시험데이터 만들기                                      # 10일차 강의 01:04:00 확인
# 모듈로 나누면 순서가 섞여서 직접 나누기

train_size = int( len(data_y) * 0.8 )    # 80%를 훈련데이터로
train_size

train_x = np.array( data_x[:train_size] )
train_y = np.array( data_y[:train_size] )

test_x = np.array( data_x[train_size:] )
test_y = np.array( data_y[train_size:] )

In [None]:
from tensorflow import keras

model = keras.Sequential()
model.add( keras.layers.SimpleRNN(10, activation = 'relu', input_shape = (10, 5)) )    # 10행 5열
model.add( keras.layers.Dropout(0.1) )
model.add( keras.layers.Dense(1) )    # 그냥 예측이므로 시그모이드, 소프트맥스 아님

model.compile(optimizer='adam', loss = 'mean_squared_error')    # 최적화함수, 오차계산방법

model.fit(train_x, train_y, epochs = 100, batch_size = 32)

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(20,12))
plt.grid(True)

plt.plot( test_y, color = 'red', marker='.', label = 'real' )
plt.plot( model.predict(test_x), color = 'blue', marker='.', label = 'predicted' )

## 이전 n일 데이터로 다음날 종가 예측하기

In [None]:
# 오늘부터 10일간의 데이터 알려주면 다음 종가 예측
df['종가'].iloc[-1]    # 오늘 종가

In [None]:
dfy[-1]    # 오늘 종가 전처리한거

In [None]:
model.predict(test_x)[-1][0]    # 내일 종가 예측

In [None]:
df['종가'].iloc[-1] * (model.predict(test_x)[-1][0] / dfy[-1])    # 오늘종가 * 내일등락률 = 내일 종가