In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import StandardScaler

In [2]:
train = pd.read_csv("./data/train_preproc1.csv")

In [19]:
def make_seq_data(train_x, train_y, test_x, test_y, pred_days, seq_len):
    train_x_seq = []
    train_y_seq = []
    test_x_seq = []
    test_y_seq = []
    
    for i in range(seq_len, len(train_x)-pred_days):
        train_x_seq.append(train_x[i - seq_len:i])
        train_y_seq.append(train_y[i])

    for i in range(seq_len, len(test_x)-pred_days):
        test_x_seq.append(test_x[i - seq_len:i])
        test_y_seq.append(test_y[i])

    train_x_seq, train_y_seq = np.array(train_x_seq), np.array(train_y_seq)
    test_x_seq, test_y_seq = np.array(test_x_seq), np.array(test_y_seq)

    return train_x_seq, train_y_seq, test_x_seq, test_y_seq

In [20]:
import matplotlib.pyplot as plt

class lstm_model():
    def __init__(self, input_shape, learning_rate=1e-2, load_path=None):
        
        model = Sequential()
        model.add(LSTM(128, input_shape=input_shape, return_sequences=True))
        model.add(LSTM(64, return_sequences=True))
        model.add(LSTM(32, return_sequences=True))
        model.add(Dense(1))
        self.model = model
        self.optimizer = Adam(learning_rate=learning_rate)
        
        if load_path is not None:
            self.load_model(load_path)
    
    def save_model(self, path):
        self.model.save_weights(path)
    
    def load_model(self, path):
        self.model.load_weights(path)

    # 학습 후 loss 그래프 그림
    def train(self, train_x, train_y, epochs=100, batch_size=32, validation_split=0.1, loss='mse'):
        self.model.compile(optimizer=self.optimizer, loss=loss)
        history = self.model.fit(train_x, train_y,
                                 epochs=epochs,
                                 batch_size=batch_size,
                                 validation_split=validation_split)
        plt.plot(history.history['loss'], label='Training loss')
        plt.plot(history.history['val_loss'], label='Validation loss')
        plt.legend()
        plt.show()

        self.history = history

    def predict(self, x):
        pred = self.model.predict(x)
        return pred
    
    def plot_pred(self, y, pred_y):
        plt.figure(figsize=(20, 10))
        plt.plot(y, label='GT')
        plt.plot(pred_y, label='prediction')
        plt.legend()
        plt.show()

    def pred_plot(self, x, y):
        pred = self.predict(x)
        self.plot_pred(y, pred)

In [21]:
model_dic = {}
pred_days = 1
seq_len = 14

epochs = 15
batch_size = 16
learning_rate = 5e-2
loss = 'mse'

for i in tqdm(train.item.unique()):
    train_now = train.loc[train.item == i]
    y = train_now.price
    x = train_now.drop(['item', 'price'], axis=1)

    n_train = int(len(x) * 0.9)

    scaler = StandardScaler()
    x = scaler.fit_transform(x)
    
    train_x, test_x = x[:n_train], x[n_train:]
    train_y, test_y = y[:n_train], y[n_train:]

    train_x, train_y, test_x, test_y = make_seq_data(train_x, train_y, test_x, test_y, pred_days, seq_len)

    lstm = lstm_model((batch_size, train_x.shape[1:3]),
                      learning_rate=learning_rate)
    
    lstm.train(train_x, train_y,
               epochs=epochs,
               batch_size=batch_size,
               loss=loss)
    lstm.pred_plot(test_x, test_y)

    model_dic[i] = (lstm, scaler)

  0%|          | 0/5 [00:00<?, ?it/s]


KeyError: 14

In [7]:
train_x.shape

(13707, 13)

In [6]:
lstm.predict(test_x)

NameError: name 'lstm' is not defined

In [7]:
train_now.drop(['item', 'price'], axis=1)

Unnamed: 0,year,month,day,dow,is_holiday,corporation_A,corporation_B,corporation_C,corporation_D,corporation_E,corporation_F,location_J,location_S
0,2019,1,1,1,1,1,0,0,0,0,0,1,0
1,2019,1,2,2,0,1,0,0,0,0,0,1,0
2,2019,1,3,3,0,1,0,0,0,0,0,1,0
3,2019,1,4,4,0,1,0,0,0,0,0,1,0
4,2019,1,5,5,0,1,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
15225,2023,2,27,0,0,0,0,0,0,1,0,0,1
15226,2023,2,28,1,0,0,0,0,0,1,0,0,1
15227,2023,3,1,2,1,0,0,0,0,1,0,0,1
15228,2023,3,2,3,0,0,0,0,0,1,0,0,1
