In [46]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt
import tensorflow as tf

### Data

In [47]:
CLASS_COUNT = "3"
FILE_COUNTER = "10"
THIRD_DIM = 60
#C3_P3, C2_P4 works like a charm

#stock_dataset  = pd.read_csv('stock_c' + CLASS_COUNT + '_p' + PARAMS_COUNT + '.csv')
#real_dataframe  = pd.read_csv('stock_real_p' + PARAMS_COUNT + '.csv')

stock_dataset  = pd.read_csv('stock_c' + CLASS_COUNT + '_' + FILE_COUNTER + '.csv')
real_dataframe  = pd.read_csv('stock_real_' + FILE_COUNTER + '.csv')

stock_dataset = stock_dataset.dropna(axis='rows')
real_dataframe = real_dataframe.dropna(axis='rows')

stock_dataset.head() #visualized top 5 rows

Unnamed: 0,target,class,symbol,total_roc_1,total_roc_2,total_roc_3,total_roc_4,total_roc_5,total_roc_6,total_roc_7,...,atr_14_51,atr_14_52,atr_14_53,atr_14_54,atr_14_55,atr_14_56,atr_14_57,atr_14_58,atr_14_59,atr_14_60
0,42,2,وپاسار,-0.80261,0.385228,0.478777,0.31202,0.503946,-0.017273,-0.001152,...,-0.014471,0.016954,0.013601,0.000476,0.00766,0.021906,0.032474,0.070985,0.05235,0.061951
1,2,1,کاوه,0.071468,-0.551263,-2.791332,-3.226682,-1.991228,-1.850257,-2.175748,...,0.016055,-0.018455,0.001291,0.000238,-0.008879,0.039922,-0.000224,0.003809,-0.000664,-0.019613
2,78,0,فاسمین,0.108243,-0.378638,0.528723,-0.145009,-0.182053,-0.248709,-0.083003,...,0.018193,-0.024398,-0.032199,-0.007938,0.021596,-7.3e-05,-0.005401,-0.004972,-0.03251,0.039546
3,1,1,شبهرن,-0.551263,-2.791332,-3.226682,-1.991228,-1.850257,-2.175748,-0.483083,...,0.070516,0.004247,0.001199,0.037318,0.060555,0.002711,0.082274,0.002418,-0.001218,0.021445
4,67,1,فخوز,0.098593,-0.018655,-0.002345,0.141994,0.588364,0.035335,0.176678,...,0.108209,0.043942,0.053328,0.054528,0.014862,-0.00395,-0.006335,0.024602,-0.001854,0.019675


In [48]:
stock_dataset = stock_dataset.to_numpy()
real_dataset = real_dataframe.to_numpy()

In [49]:
real_inputs = real_dataset[:,3:]
inputs = stock_dataset[:,3:]
targets = stock_dataset[:,1]

class_size = np.amax(targets, axis = 0) + 1

### Standardize the inputs

In [50]:
scaler = StandardScaler()
scaler.fit(inputs)

scaled_inputs = scaler.transform(inputs)
real_inputs = scaler.transform(real_inputs)

### Shuffle the data

In [51]:
shuffled_indices = np.arange(scaled_inputs.shape[0])
np.random.shuffle(shuffled_indices)

shuffled_inputs = scaled_inputs[shuffled_indices]
shuffled_targets = targets[shuffled_indices]

### Split the dataset into train, validation, and test

In [52]:
samples_count = shuffled_inputs.shape[0]

useKFold = samples_count < 1000

train_samples_count = int(0.8 * samples_count)
validation_samples_count = 0 if useKFold else int(0.1 * samples_count)
test_samples_count = samples_count - train_samples_count - validation_samples_count

train_inputs = shuffled_inputs[:train_samples_count]
train_targets = shuffled_targets[:train_samples_count]

validation_inputs = shuffled_inputs[train_samples_count:train_samples_count + validation_samples_count]
validation_targets = shuffled_targets[train_samples_count:train_samples_count + validation_samples_count]

test_inputs = shuffled_inputs[train_samples_count + validation_samples_count:]
test_targets = shuffled_targets[train_samples_count + validation_samples_count:]

### Reshape inputs from 2D array to 3D tensor

In [53]:
def array2tensor(arr) :
    zLength = int(arr.shape[1] / THIRD_DIM)
    ret = np.empty([arr.shape[0], THIRD_DIM, zLength])
    
    for x in range(arr.shape[0]) :
        for y in range(THIRD_DIM) :
            for z in range(zLength) :
                ret[x, y, z] = arr[x, (z * THIRD_DIM) + y]
                
    return ret

train_inputs = array2tensor(train_inputs)
validation_inputs = array2tensor(validation_inputs)
test_inputs = array2tensor(test_inputs)

real_inputs = array2tensor(real_inputs)

### Convert to tensor

In [54]:
train_inputs = tf.convert_to_tensor(train_inputs, np.float32)
train_targets = tf.convert_to_tensor(train_targets, np.float32)

validation_inputs = tf.convert_to_tensor(validation_inputs, np.float32)
validation_targets = tf.convert_to_tensor(validation_targets, np.float32)

test_inputs = tf.convert_to_tensor(test_inputs, np.float32)
test_targets = tf.convert_to_tensor(test_targets, np.float32)

# Model

### Outline the model

In [55]:
input_size = train_inputs.shape[1]
output_size = class_size
hidden_layer_size = 50

def create_model():
    model = tf.keras.Sequential([
        tf.keras.layers.LSTM(units = hidden_layer_size, return_sequences = True, 
                             input_shape = (train_inputs.shape[1], train_inputs.shape[2])),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.LSTM(units = hidden_layer_size, return_sequences = True),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.LSTM(units = hidden_layer_size, return_sequences = True),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.LSTM(units = hidden_layer_size),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(output_size, activation = 'softmax')
    ])
    
    model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])
    
    return model


### Training

In [None]:
batch_size = 100
max_epochs = 40

folds_count = int(samples_count / 50)

fold_counter = 0
fold_estimate_loss = [None] * folds_count if useKFold else 1
fold_estimate_accuracy = [None] * folds_count if useKFold else 1

if useKFold:
    for train_index, test_index in KFold(folds_count).split(train_inputs):
        x_train, x_test = tf.gather(train_inputs, train_index), tf.gather(train_inputs, test_index)
        y_train, y_test = tf.gather(train_targets, train_index), tf.gather(train_targets, test_index)

        model = create_model()
    
        model.fit(x_train, 
                  y_train,
                  batch_size = batch_size,
                  epochs = max_epochs, 
                  verbose = 2)
        
        test_loss, test_accuracy = model.evaluate(x_test, y_test)
        fold_estimate_loss[fold_counter] = test_loss
        fold_estimate_accuracy[fold_counter] = test_accuracy * 100
        fold_counter += 1
        
        print('')
        print('Test loss: {0:.2f}. Test accuracy: {1:.2f}%'.format(test_loss, test_accuracy * 100))
        print('')
else:
    early_stopping = tf.keras.callbacks.EarlyStopping(patience = 20)
    model = create_model()
    
    model.fit(train_inputs, 
              train_targets,
              batch_size = batch_size,
              epochs = max_epochs, 
              callbacks = [early_stopping],
              validation_data = (validation_inputs, validation_targets), 
              verbose = 2)
    
    test_loss, test_accuracy = model.evaluate(test_inputs, test_targets)

Train on 2928 samples, validate on 366 samples
Epoch 1/40
2928/2928 - 11s - loss: 0.9686 - accuracy: 0.5096 - val_loss: 0.8497 - val_accuracy: 0.6339
Epoch 2/40
2928/2928 - 5s - loss: 0.7883 - accuracy: 0.6311 - val_loss: 0.8096 - val_accuracy: 0.6421
Epoch 3/40
2928/2928 - 5s - loss: 0.7387 - accuracy: 0.6499 - val_loss: 0.7556 - val_accuracy: 0.6667
Epoch 4/40
2928/2928 - 5s - loss: 0.6666 - accuracy: 0.6954 - val_loss: 0.7701 - val_accuracy: 0.6667
Epoch 5/40
2928/2928 - 5s - loss: 0.6273 - accuracy: 0.7145 - val_loss: 0.7169 - val_accuracy: 0.6995
Epoch 6/40
2928/2928 - 5s - loss: 0.5956 - accuracy: 0.7346 - val_loss: 0.6594 - val_accuracy: 0.6995
Epoch 7/40
2928/2928 - 5s - loss: 0.5524 - accuracy: 0.7558 - val_loss: 0.6453 - val_accuracy: 0.7295
Epoch 8/40
2928/2928 - 5s - loss: 0.5427 - accuracy: 0.7620 - val_loss: 0.6654 - val_accuracy: 0.7268
Epoch 9/40
2928/2928 - 5s - loss: 0.5012 - accuracy: 0.7900 - val_loss: 0.6199 - val_accuracy: 0.7377
Epoch 10/40
2928/2928 - 5s - loss:

### Test the model

In [75]:
if useKFold :
    plt.hist(fold_estimate_accuracy, density=True)  # `density=False` would make counts
    plt.ylabel('Probability')
    plt.xlabel('Accuracy');
else :
    print('')
    print('Test loss: {0:.2f}. Test accuracy: {1:.2f}%'.format(test_loss, test_accuracy * 100.0))
    print('')


Test loss: 0.55. Test accuracy: 77.32%



### Make predictions

In [69]:
predictions = model.predict(real_inputs)

In [70]:
#real_dataframe = real_dataframe.drop(columns = ['target', 'class'])

for i in range(predictions.shape[1]):
    real_dataframe.insert(i, "pred_class_" + str(i), predictions[:,i], True)

In [71]:
pred = np.zeros((predictions.shape[0], 1))

for i in range(predictions.shape[0]):
    cls = 0
    lastValue = 0
    for j in range(predictions.shape[1]):
        if predictions[i, j] > lastValue :
            lastValue = predictions[i, j]
            cls = j
    pred[i, 0] = cls
    
real_dataframe.insert(predictions.shape[1], "predicted", pred, True)

In [72]:
real_dataframe.to_csv('stock_pred_c' + CLASS_COUNT + '_' + FILE_COUNTER + '.csv', encoding = 'utf-8-sig', index = False)
#real_dataframe.to_csv('stock_pred_c' + CLASS_COUNT + '_p' + PARAMS_COUNT + '.csv', encoding = 'utf-8-sig', index = False)

In [73]:
real_dataframe.head()

Unnamed: 0,pred_class_0,pred_class_1,pred_class_2,predicted,pred_class_0.1,pred_class_1.1,pred_class_2.1,predicted.1,target,class,...,atr_14_51,atr_14_52,atr_14_53,atr_14_54,atr_14_55,atr_14_56,atr_14_57,atr_14_58,atr_14_59,atr_14_60
0,0.916393,0.082762,0.000845,0.0,0.741097,0.253753,0.00515,0.0,0,0,...,0.015677,0.039046,0.120204,-0.008058,0.093087,0.340806,0.127212,0.307264,0.268048,0.007208
1,6.5e-05,0.000891,0.999044,2.0,0.008033,0.216886,0.775081,2.0,0,0,...,0.089434,0.016833,0.043883,0.005645,0.002742,0.011367,0.004134,0.076095,0.089117,0.011363
2,4.9e-05,0.000654,0.999297,2.0,0.002695,0.059194,0.938111,2.0,0,0,...,0.008133,0.00471,0.00132,0.057893,0.082064,0.005488,0.034325,-0.027928,0.053,0.048434
3,0.003536,0.124818,0.871645,2.0,0.220317,0.725375,0.054308,1.0,0,0,...,0.018592,0.016049,0.108733,0.014644,0.01137,0.037741,0.102068,0.012639,0.009456,0.080848
4,0.012812,0.869851,0.117337,1.0,0.050516,0.851635,0.097848,1.0,0,0,...,0.069737,0.102519,0.022151,0.00203,0.059968,0.048641,-0.024667,-0.004647,0.132384,0.027904
