In [16]:
import pandas as pd
import numpy as np
import tensorflow as tf
# from keras.models import Sequential
# from keras.layers import Dense, Dropout, Activation, Flatten, LSTM, TimeDistributed, RepeatVector
# from keras.layers.normalization import BatchNormalization
# from keras.optimizers import Adam
# from keras.callbacks import EarlyStopping, ModelCheckpoint
import matplotlib.pyplot as plt
configpartno='85-EKA0190'

In [49]:
def readTrain():
    train =  pd.read_csv("./data/Parts_EQP_Output_ByMonth_20210407_van.csv")
    train= train[train['PART_NO']==configpartno]  
    train.drop(columns=['PART_NO','EQP_NO','MFG_MONTH','PM','TS','ENG','NST'],inplace=True)
    train.groupby(['STOCK_EVENT_TIME']).sum().reset_index()
    return train

In [50]:
df_train = readTrain()

Unnamed: 0,STOCK_EVENT_TIME,QTY
0,2015-01-31,0
1,2015-02-28,0
2,2015-03-31,0
3,2015-04-30,0
4,2015-05-31,0
...,...,...
70,2020-11-30,136
71,2020-12-31,156
72,2021-01-31,150
73,2021-02-28,117


In [51]:
def augFeatures(train):
  train["Date"] = pd.to_datetime(train["STOCK_EVENT_TIME"])
  train["year"] = train["Date"].dt.year
  train["month"] = train["Date"].dt.month
#   train["date"] = train["Date"].dt.day
#   train["day"] = train["Date"].dt.dayofweek
  return train

In [52]:
def normalize(train):
    train.drop(columns=["Date","STOCK_EVENT_TIME"], axis=1,inplace=True)
    # train = train.drop(["Date"], axis=1)
    train_norm = train.apply(lambda x: (x - np.mean(x)) / (np.max(x) - np.min(x)))
    return train_norm

In [57]:
def buildTrain(train, pastDay=30, futureDay=5):
  X_train, Y_train = [], []
  for i in range(train.shape[0]-futureDay-pastDay):
    X_train.append(np.array(train.iloc[i:i+pastDay]))
    Y_train.append(np.array(train.iloc[i+pastDay:i+pastDay+futureDay]["QTY"]))
  return np.array(X_train), np.array(Y_train)

In [37]:
def shuffle(X,Y):
  np.random.seed(10)
  randomList = np.arange(X.shape[0])
  np.random.shuffle(randomList)
  return X[randomList], Y[randomList]

In [38]:

def splitData(X,Y,rate):
  X_train = X[int(X.shape[0]*rate):]
  Y_train = Y[int(Y.shape[0]*rate):]
  X_val = X[:int(X.shape[0]*rate)]
  Y_val = Y[:int(Y.shape[0]*rate)]
  return X_train, Y_train, X_val, Y_val

In [58]:
# read SPY.csv
train = readTrain()

# Augment the features (year, month, date, day)
train_Aug = augFeatures(train)

In [59]:
train_Aug

Unnamed: 0,STOCK_EVENT_TIME,QTY,Date,year,month
3150,2015-01-31,0,2015-01-31,2015,1
3151,2015-02-28,0,2015-02-28,2015,2
3152,2015-03-31,0,2015-03-31,2015,3
3153,2015-04-30,0,2015-04-30,2015,4
3154,2015-05-31,0,2015-05-31,2015,5
...,...,...,...,...,...
3745,2020-11-30,15,2020-11-30,2020,11
3746,2020-12-31,20,2020-12-31,2020,12
3747,2021-01-31,15,2021-01-31,2021,1
3748,2021-02-28,5,2021-02-28,2021,2


In [60]:
# Normalization
train_norm = normalize(train_Aug)

In [61]:

# build Data, use last 30 days to predict next 5 days
X_train, Y_train = buildTrain(train_norm, 30, 5)

# shuffle the data, and random seed is 10
X_train, Y_train = shuffle(X_train, Y_train)

# split training data and validation data
X_train, Y_train, X_val, Y_val = splitData(X_train, Y_train, 0.1)
# X_trian: (5710, 30, 10)
# Y_train: (5710, 5, 1)
# X_val: (634, 30, 10)
# Y_val: (634, 5, 1)

In [67]:
def buildOneToOneModel(shape):
    model = tf.keras.Sequential()
    model.add( tf.keras.layers.LSTM(10, input_length=shape[1], input_dim=shape[2],return_sequences=True))
    # output shape: (1, 1)
    model.add(  tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(1)))    # or use model.add(Dense(1))
    model.compile(loss="mse", optimizer=tf.keras.optimizers.Adam())
    model.summary()
    return model





    # model = tf.keras.Sequential([
    #         tf.keras.layers.Dense(units=32,activation = 'relu',input_shape=[X.shape[1]]),
    #         tf.keras.layers.Dense(units=16,activation = 'relu'),
    #         tf.keras.layers.Dense(units=4,activation = 'relu'), 
    #         tf.keras.layers.Dense(units=1)
    #         ]) 
    #     model.compile(loss='mean_squared_error', 
    #         optimizer=tf.keras.optimizers.Adam(0.001),
    #         metrics=[tf.keras.metrics.MeanAbsoluteError()]) 

    #     history = model.fit(X, y, epochs=500, batch_size=16, verbose=True,
    #         validation_split=0.01)   
    #     return model

In [69]:
train = readTrain()
train_Aug = augFeatures(train)
train_norm = normalize(train_Aug)
# change the last day and next day 
X_train, Y_train = buildTrain(train_norm, 1, 1)
X_train, Y_train = shuffle(X_train, Y_train)
X_train, Y_train, X_val, Y_val = splitData(X_train, Y_train, 0.1)

# from 2 dimmension to 3 dimension
Y_train = Y_train[:,np.newaxis]
Y_val = Y_val[:,np.newaxis]

model = buildOneToOneModel(X_train.shape)
callback = tf.keras.callbacks.EarlyStopping(monitor="loss", patience=10, verbose=1, mode="auto")
model.fit(X_train, Y_train, epochs=1000, batch_size=128, validation_data=(X_val, Y_val), callbacks=[callback])

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_3 (LSTM)                (None, 1, 10)             560       
_________________________________________________________________
time_distributed_1 (TimeDist (None, 1, 1)              11        
Total params: 571
Trainable params: 571
Non-trainable params: 0
_________________________________________________________________
Train on 539 samples, validate on 59 samples
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 

<tensorflow.python.keras.callbacks.History at 0x20e20086d48>

In [71]:
#多對一模型
def buildManyToOneModel(shape):
#   model = Sequential()
#   model.add(LSTM(10, input_length=shape[1], input_dim=shape[2]))
#   # output shape: (1, 1)
#   model.add(Dense(1))
#   model.compile(loss="mse", optimizer="adam")
#   model.summary()
#   return model
    model = tf.keras.Sequential([
            tf.keras.layers.LSTM(units=10, input_length=shape[1], input_dim=shape[2]),
            tf.keras.layers.Dense(units=1),
            ]) 
    model.compile(loss='mean_squared_error', 
            optimizer=tf.keras.optimizers.Adam(),
            # metrics=[tf.keras.metrics.MeanAbsoluteError()]
            ) 
    model.summary()
    return model

In [73]:
train_norm

Unnamed: 0,QTY,year,month
3150,-0.268022,-0.440000,-0.483636
3151,-0.268022,-0.440000,-0.392727
3152,-0.268022,-0.440000,-0.301818
3153,-0.268022,-0.440000,-0.210909
3154,-0.268022,-0.440000,-0.120000
...,...,...,...
3745,-0.068022,0.393333,0.425455
3746,-0.001356,0.393333,0.516364
3747,-0.068022,0.560000,-0.483636
3748,-0.201356,0.560000,-0.392727


In [75]:
train = readTrain()
train_Aug = augFeatures(train)
train_norm = normalize(train_Aug)
# change the last day and next day 
X_train, Y_train = buildTrain(train_norm, 30, 1)
X_train, Y_train = shuffle(X_train, Y_train)
# because no return sequence, Y_train and Y_val shape must be 2 dimension
X_train, Y_train, X_val, Y_val = splitData(X_train, Y_train, 0.1)

model = buildManyToOneModel(X_train.shape)
callback = tf.keras.callbacks.EarlyStopping(monitor="loss", patience=10, verbose=1, mode="auto")
model.fit(X_train, Y_train, epochs=1000, batch_size=128, validation_data=(X_val, Y_val), callbacks=[callback])

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_5 (LSTM)                (None, 10)                560       
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 11        
Total params: 571
Trainable params: 571
Non-trainable params: 0
_________________________________________________________________
Train on 513 samples, validate on 56 samples
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 

<tensorflow.python.keras.callbacks.History at 0x20e24628688>

In [78]:
def buildOneToManyModel(shape):
#   model = Sequential()
#   model.add(LSTM(10, input_length=shape[1], input_dim=shape[2]))
#   # output shape: (5, 1)
#   model.add(Dense(1))
#   model.add(RepeatVector(5))
#   model.compile(loss="mse", optimizer="adam")
#   model.summary()
#   return model
    model = tf.keras.Sequential([
            tf.keras.layers.LSTM(units=10, input_length=shape[1], input_dim=shape[2]),
            tf.keras.layers.Dense(units=1),
            tf.keras.layers.RepeatVector(5),
            ]) 
    model.compile(loss='mean_squared_error', 
            optimizer=tf.keras.optimizers.Adam(),
            # metrics=[tf.keras.metrics.MeanAbsoluteError()]
            ) 
    model.summary()
    return model

In [79]:
train = readTrain()
train_Aug = augFeatures(train)
train_norm = normalize(train_Aug)
# change the last day and next day 
X_train, Y_train = buildTrain(train_norm, 1, 5)
X_train, Y_train = shuffle(X_train, Y_train)
X_train, Y_train, X_val, Y_val = splitData(X_train, Y_train, 0.1)

# from 2 dimmension to 3 dimension
Y_train = Y_train[:,:,np.newaxis]
Y_val = Y_val[:,:,np.newaxis]

model = buildOneToManyModel(X_train.shape)
callback = tf.keras.callbacks.EarlyStopping(monitor="loss", patience=10, verbose=1, mode="auto")
model.fit(X_train, Y_train, epochs=1000, batch_size=128, validation_data=(X_val, Y_val), callbacks=[callback])

/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
Epoch 73/1000
Epoch 74/1000
Epoch 75/1000
Epoch 76/1000
Epoch 77/1000
Epoch 78/1000
Epoch 79/1000
Epoch 80/1000
Epoch 81/1000
Epoch 82/1000
Epoch 83/1000
Epoch 84/1000
Epoch 85/1000
Epoch 86/1000
Epoch 87/1000
Epoch 88/1000
Epoch 89/1000
Epoch 90/1000
Epoch 91/1000
Epoch 92/1000
Epoch 93/1000
Epoch 94/1000
Epoch 95/1000
Epoch 96/1000
Epoch 97/1000
Epoch 98/1000
Epoch 99/1000
Epoch 100/1000
Epoch 101/1000
Epoch 102/1000
Epoch 103/1000
Epoch 104/1000
Epoch 105/1000
Epoch 106/1000
Epoch 107/1000
Epoch 108/1000
Epoch 109/1000
Epoch 110/1000
Epoch 111/1000
Epoch 112/1000
Epoch 113/1000


<tensorflow.python.keras.callbacks.History at 0x20e2c61a9c8>

# 多對多模型 (輸入與輸出相同長度)
將return_sequences 設為True ，再用TimeDistributed(Dense(1)) 將輸出調整為(5,1)

In [82]:
def buildManyToManyModel(shape):
#   model = Sequential()
#   model.add(LSTM(10, input_length=shape[1], input_dim=shape[2], return_sequences=True))
#   # output shape: (5, 1)
#   model.add(TimeDistributed(Dense(1)))
#   model.compile(loss="mse", optimizer="adam")
#   model.summary()
#   return model
    model = tf.keras.Sequential([
            tf.keras.layers.LSTM(units=10, input_length=shape[1], input_dim=shape[2], return_sequences=True),
            tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(1)),
            ]) 
    model.compile(loss='mean_squared_error', 
            optimizer=tf.keras.optimizers.Adam(),
            # metrics=[tf.keras.metrics.MeanAbsoluteError()]
            ) 
    model.summary()
    return model

In [83]:
train = readTrain()
train_Aug = augFeatures(train)
train_norm = normalize(train_Aug)
# change the last day and next day 
X_train, Y_train = buildTrain(train_norm, 5, 5)
X_train, Y_train = shuffle(X_train, Y_train)
X_train, Y_train, X_val, Y_val = splitData(X_train, Y_train, 0.1)

# from 2 dimmension to 3 dimension
Y_train = Y_train[:,:,np.newaxis]
Y_val = Y_val[:,:,np.newaxis]

model = buildManyToManyModel(X_train.shape)
callback = tf.keras.callbacks.EarlyStopping(monitor="loss", patience=10, verbose=0, mode="auto")
model.fit(X_train, Y_train, epochs=1000, batch_size=128, validation_data=(X_val, Y_val), callbacks=[callback])


ple - loss: 0.0189 - val_loss: 0.0175
Epoch 540/1000
Epoch 541/1000
Epoch 542/1000
Epoch 543/1000
Epoch 544/1000
Epoch 545/1000
Epoch 546/1000
Epoch 547/1000
Epoch 548/1000
Epoch 549/1000
Epoch 550/1000
Epoch 551/1000
Epoch 552/1000
Epoch 553/1000
Epoch 554/1000
Epoch 555/1000
Epoch 556/1000
Epoch 557/1000
Epoch 558/1000
Epoch 559/1000
Epoch 560/1000
Epoch 561/1000
Epoch 562/1000
Epoch 563/1000
Epoch 564/1000
Epoch 565/1000
Epoch 566/1000
Epoch 567/1000
Epoch 568/1000
Epoch 569/1000
Epoch 570/1000
Epoch 571/1000
Epoch 572/1000
Epoch 573/1000
Epoch 574/1000
Epoch 575/1000
Epoch 576/1000
Epoch 577/1000
Epoch 578/1000
Epoch 579/1000
Epoch 580/1000
Epoch 581/1000
Epoch 582/1000
Epoch 583/1000
Epoch 584/1000
Epoch 585/1000
Epoch 586/1000
Epoch 587/1000
Epoch 588/1000
Epoch 589/1000
Epoch 590/1000
Epoch 591/1000
Epoch 592/1000
Epoch 593/1000
Epoch 594/1000
Epoch 595/1000
Epoch 596/1000
Epoch 597/1000
Epoch 598/1000
Epoch 599/1000
Epoch 600/1000
Epoch 601/1000
Epoch 602/1000
Epoch 603/1000
Ep

<tensorflow.python.keras.callbacks.History at 0x20e31652188>