In [3]:
import os
import random
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import tensorflow as tf

%matplotlib inline

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
os.getcwd()

'/Users/Namin/Documents/CAP/CapstoneUOS/notebooks'

<b>Read the files.</b>

In [5]:
def read_files(past, future):
    """
    result[:][0], result[:][1] 로 전처리된 파일 읽는 함수
    """
    df_past = pd.read_excel(past, header=0, index_col=0)
    df_future = pd.read_excel(future, header=0, index_col=0)
    
    return df_past, df_future

n_past = 180
n_future = 30

<b>Generate training, validation and test datasets.</b>

In [14]:
def make_tvt(df_past, df_future, predict_features_list, BATCH_SIZE=64):
    """
    train, val, test 데이터 생성하는 함수
    """
    predict_features = predict_features_list # 예측하고 싶은 변수들만
    df_future = df_future[predict_features]

    x_train, x_test, y_train, y_test = train_test_split(df_past, df_future, test_size=0.3, random_state=7)
    x_val, x_test, y_val, y_test = train_test_split(x_test, y_test, test_size=0.5, random_state=7)

    x_train_copy = x_train.values.reshape(x_train.shape[0], X_train.shape[1], 1)
    y_train_copy = y_train.values
    x_val_copy = x_val.values.reshape(x_val.shape[0], x_val.shape[1], 1)
    y_val_copy = y_val.values
    x_test_copy = x_test.values.reshape(y_test.shape[0], y_test.shape[1], 1)
    y_test_copy = y_test.values

    # 주석 처리
    print("x_train's shape: ", x_train_copy.shape) 
    print("y_train's shape: ", y_train_copy.shape) 
    print("x_val's shape: ", x_val_copy.shape)
    print("y_val's shape: ", y_val_copy.shape)
    print("x_test's shape: ", x_test_copy.shape)
    print("y_test's shape: ", y_test_copy.shape)
    
    train_data = tf.data.Dataset.from_tensor_slices((x_train_copy, y_train_copy))
    train_data = train_data.cache().batch(BATCH_SIZE).repeat()
    val_data = tf.data.Dataset.from_tensor_slices((x_val_copy, y_val_copy))
    val_data = val_data.cache().batch(BATCH_SIZE).repeat()
    
    return x_train, y_train, x_test_copy, y_test_copy, train_data, val_data, BATCH_SIZE, df_future

<b>Plot some graphs for training samples.</b>

In [12]:
def plot_graphs(df_past, m, x_train, y_train):
    """
    어떤 변수들로 어떤 정보를 예측하고 싶은지 그래프로 나타내주는 함수
    """
    past_features = df_past.columns.unique()
    
    for i in range(m):
        fig = plt.figure(figsize=(5*len(past_features), 5)) 
    
        for j in range(len(past_features)):
            plt.subplot(m, len(past_features), j+1)
            plt.plot(x_train.iloc[i][n_past*j:n_past*(j+1)], 'g', label="past")
            
            for k in range(len(predict_features)):
                if predict_features[k] == past_features[j]:
                    plt.plot(y_train.iloc[i][n_past*j:n_past*(j+1)], 'r', label="future (GOAL)") # 예측해야 할 추이
            
            plt.title(past_features[j], fontdict={'size': 20}) # 'color': white
            plt.legend()
            plt.show()

<b>Make a (rough) model.</b>

In [13]:
def rnn(n_future):
    """
    RNN 모델과 callback_list을 리턴하는 함수
    """
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.LSTM(units=16,
                                   return_sequences=True,
                                   input_shape=(x_train_copy.shape[-2:])))
    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(1000))
    model.add(tf.keras.layers.Dense(n_future))
    model.compile(loss='mse', optimizer='adam', metrics=['loss'])
    
    # 주석 처리
    model.summary()
    
    callback_list = [ModelCheckpoint(filepath='rough_rnn_checkpoint.h5',
                                     monitor='val_loss',
                                     save_best_only=True),
                     TensorBoard(log_dir='rough_rnn_logs/{}'.format(time.asctime()))]
    
    return model, callback_list

In [16]:
EPOCHS = 10
model = rnn()

history = model.fit(train_data,
                    epochs=EPOCHS,
                    steps_per_epoch=x_train_copy.shape[0] // BATCH_SIZE, # training samples / batch size
                    validation_data=val_data,
                    validation_steps=x_test_copy.shape[0] // BATCH_SIZE,
                    callbacks=callback_list) # validation samples / batch size

NameError: name 'x_train_copy' is not defined

<b>Show the results.</b>

In [19]:
def plot_loss(EPOCHS, history):
    """
    Train loss와 val loss를 그래프로 나타내주는 함수
    """
    epochs = np.arange(1, EPOCHS+1)
    fig, axes = plt.subplots(figsize=(10, 5))

    plt.plot(epochs, history.history['loss'], label='train_loss')
    plt.plot(epochs, history.history['val_loss'], label='val_loss')
    plt.xlabel('epochs', fontdict={'size': 10})
    plt.ylabel('loss', fontdict={'size': 10})
    axes.tick_params(axis='x')
    axes.tick_params(axis='y')
    plt.legend()
    
    plt.show()

<b>Use the model to predict any test datas.</b>

In [20]:
def predict_future(x_test_copy):
    """
    Self-explanatory
    """
    test_predict = model.predict(x_test_copy)
    
    return test_predict

In [17]:
def visualize_future(df_future, n_past, n_future, x_test_copy, y_test_copy, test_predict):
    """
    잘 예측이 되었는지 그래프로 확인하는 함수
    """
    future_features = df_future.columns.unique()
    
    for k in range(len(x_test_copy.shape[0])):
        dates = [i for i in range(n_past+n_future)]
        fig, axes = plt.subplots(figsize=(5*len(future_features), 5))
        
        for j in range(len(future_features)):
            plt.subplot(k, len(past_features), j+1)
        
            plt.plot(dates[:n_past], x_test_copy[k][j*n_past:(j+1)*n_past], 'g', label='past')
            plt.plot(dates[n_past:], y_test_copy[k][j*n_past:(j+1)*n_past], 'r', label='actual future')
            plt.plot(dates[n_past:], test_predict[k][j*n_past:(j+1)*n_past], 'b', label='predicted future')
            
            plt.title(future_features[j], fontdict={'size': 20}) # , 'color': 'white'
            plt.xlabel('Time', fontdict={'size': 10})
            plt.ylabel('Subscribers', fontdict={'size': 10})
            axes.tick_params(axis='x') # colors='white'
            axes.tick_params(axis='y')
            plt.legend()
            
            plt.show()

In [18]:
def cal_loss(y_test_copy, test_predict):
    """
    Test dataset에서의 손실함수(MSE, MAE)
    """
    diff = np.subtract(y_test_copy, test_predict)
    sq = np.square(diff)
    ab = np.abs(diff)
    mse = np.mean(sq)
    mae = np.mean(ab)
    
    return mse, mae