In [None]:
# Create a new dataframe with only the 'Close column 
data = df.filter(['Close'])
# Convert the dataframe to a numpy array
dataset = data.values
# Get the number of rows to train the model on
training_data_len = int(np.ceil( len(dataset) * .95 ))

scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(dataset)

train_data = scaled_data[0:int(training_data_len), :]
# Split the data into x_train and y_train data sets
x_train = []
y_train = []

for i in range(60, len(train_data)):
    x_train.append(train_data[i-60:i, 0])
    y_train.append(train_data[i, 0])
    if i<= 61:
        print(x_train)
        print(y_train)
        print()
        
# Convert the x_train and y_train to numpy arrays 
x_train, y_train = np.array(x_train), np.array(y_train)

# Reshape the data
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))

# Build the LSTM model
model = Sequential()
model.add(LSTM(128, return_sequences=True, input_shape= (x_train.shape[1], 1)))
model.add(LSTM(64, return_sequences=False))
model.add(Dense(25))
model.add(Dense(1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(x_train, y_train, batch_size=1, epochs=1)

# Create the testing data set
# Create a new array containing scaled values from index 1543 to 2002 
test_data = scaled_data[training_data_len - 60: , :]
# Create the data sets x_test and y_test
x_test = []
y_test = dataset[training_data_len:, :]
for i in range(60, len(test_data)):
    x_test.append(test_data[i-60:i, 0])
    
# Convert the data to a numpy array
x_test = np.array(x_test)

# Reshape the data
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1 ))

# Get the models predicted price values 
predictions = model.predict(x_test)

# Get the root mean squared error (RMSE)
predictions = scaler.inverse_transform(predictions)
rmse = np.sqrt(np.mean(((predictions - y_test) ** 2)))

# Plot the data
train = data[:training_data_len]
valid = data[training_data_len:]
valid['Predictions'] = predictions
# Visualize the data
plt.figure(figsize=(16,6))
plt.title('Model')
plt.xlabel('Date', fontsize=18)
plt.ylabel('Close Price USD ($)', fontsize=18)
plt.plot(train['Close'])
plt.plot(valid[['Close', 'Predictions']])
plt.legend(['Train', 'Val', 'Predictions'], loc='lower right')
plt.show()

- Dense layer expects a vector as input
- The LSTM network takes a 2D array as input.

- return_sequences default = false(where only the last timestep cell emits signals. The output is, therefore, a vector.)
- layer before RepeatVector(n_steps) needs return_sequences = false bcuz One layer of LSTM has as many cells as the timesteps.
- The RepeatVector layer acts as a bridge between the encoder and decoder modules.
- The TimeDistributed layer creates a vector of length equal to the number of features outputted from the previous layer. In this network, Layer 5 outputs 128 features. Therefore, the TimeDistributed layer creates a 128 long vector and duplicates it 2 (= n_features) times.
- return_sequences=True for the layer before the timedistributed layer
- The Decoder layer is designed to unfold the encoding.Therefore, the Decoder layers are stacked in the reverse order of the Encoder.
> model
- model = Sequential()
- model.add(LSTM(128, activation='relu', input_shape=(timesteps,n_features), return_sequences=True))
- model.add(LSTM(64, activation='relu', return_sequences=False))
- model.add(RepeatVector(timesteps))
- model.add(LSTM(64, activation='relu', return_sequences=True))
- model.add(LSTM(128, activation='relu', return_sequences=True))
- model.add(TimeDistributed(Dense(n_features)))
- model.compile(optimizer='adam', loss='mse')

### ways to read in files 

In [None]:
import os
os.chdir('Data/Stocks/')
list = os.listdir()
number_files = len(list)
print(number_files)

company_name = ['luv.us.txt','dal.us.txt', 'aal.us.txt', 'ual.us.txt']

data = []
for filename in company_name:
    df = pd.read_csv(os.path.join('',filename), sep=',')
    label, _, _ = filename.split(sep='.')
    df['Label'] = label
    df['Date'] = pd.to_datetime(df['Date'])

    data.append(df)