In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt

### Data

In [2]:
CLASS_COUNT = "3"
FILE_COUNTER = "9"
#C3_P3, C2_P4 works like a charm

#stock_dataset  = pd.read_csv('stock_c' + CLASS_COUNT + '_p' + PARAMS_COUNT + '.csv')
#real_dataframe  = pd.read_csv('stock_real_p' + PARAMS_COUNT + '.csv')

stock_dataset  = pd.read_csv('stock_c' + CLASS_COUNT + '_' + FILE_COUNTER + '.csv')
real_dataframe  = pd.read_csv('stock_real_' + FILE_COUNTER + '.csv')

stock_dataset = stock_dataset.dropna(axis='rows')
real_dataframe = real_dataframe.dropna(axis='rows')

stock_dataset.head() #visualized top 5 rows

Unnamed: 0,target,class,symbol,total_roc_trend_60_negative,total_roc_trend_60_positive,total_roc_trend_60_level,total_roc_trend_60_returns,total_roc_trend_20_negative,total_roc_trend_20_positive,total_roc_trend_20_level,...,atr_14_7,atr_14_8,atr_14_9,atr_14_10,atr_14_11,atr_14_12,atr_14_13,atr_14_14,last_idle_days,idle_days
0,19,0,البرز,0.126429,0.490495,0.742242,13,0.051574,0.232261,0.777949,...,0.006767,0.01099,0.008281,0.040443,0.00558,0.00296,-0.000645,-0.044931,0,0
1,8,2,دسبحان,0.206821,1.003983,0.794,18,0.007425,0.532577,0.986059,...,0.004689,0.075927,0.005482,0.004319,0.003617,0.001554,-0.000754,0.022577,0,0
2,38,1,دپارس,0.109247,0.349337,0.687274,24,0.02202,0.185553,0.881331,...,0.042524,0.044743,0.005929,3.9e-05,0.087933,0.004962,0.002055,-0.001321,0,0
3,45,0,دکوثر,0.206226,0.428227,0.518419,26,0.05338,0.117207,0.544562,...,0.022558,-0.010902,-0.005782,0.007787,-0.00657,0.045846,0.006888,0.023999,0,3
4,9,2,سفارس,0.218341,0.816629,0.732631,19,0.01575,0.389881,0.959604,...,-0.005884,0.031591,0.063304,-0.004356,-0.007517,-0.020903,-0.013766,0.034616,0,0


In [3]:
stock_dataset = stock_dataset.to_numpy()
real_dataset = real_dataframe.to_numpy()

In [4]:
real_inputs = real_dataset[:,3:]
inputs = stock_dataset[:,3:]
targets = stock_dataset[:,1]

class_size = np.amax(targets, axis = 0) + 1

### Standardize the inputs

In [5]:
scaler = StandardScaler()
scaler.fit(inputs)

scaled_inputs = scaler.transform(inputs)
real_inputs = scaler.transform(real_inputs)

### Shuffle the data

In [6]:
shuffled_indices = np.arange(scaled_inputs.shape[0])
np.random.shuffle(shuffled_indices)

shuffled_inputs = scaled_inputs[shuffled_indices]
shuffled_targets = targets[shuffled_indices]

### Split the dataset into train, validation, and test

In [7]:
samples_count = shuffled_inputs.shape[0]

useKFold = samples_count < 4000

train_samples_count = int(0.8 * samples_count)
validation_samples_count = 0 if useKFold else int(0.1 * samples_count)
test_samples_count = samples_count - train_samples_count - validation_samples_count

train_inputs = shuffled_inputs[:train_samples_count]
train_targets = shuffled_targets[:train_samples_count]

validation_inputs = shuffled_inputs[train_samples_count:train_samples_count + validation_samples_count]
validation_targets = shuffled_targets[train_samples_count:train_samples_count + validation_samples_count]

test_inputs = shuffled_inputs[train_samples_count + validation_samples_count:]
test_targets = shuffled_targets[train_samples_count + validation_samples_count:]

### Convert to tensor

In [8]:
train_inputs = tf.convert_to_tensor(train_inputs, np.float32)
train_targets = tf.convert_to_tensor(train_targets, np.float32)

validation_inputs = tf.convert_to_tensor(validation_inputs, np.float32)
validation_targets = tf.convert_to_tensor(validation_targets, np.float32)

test_inputs = tf.convert_to_tensor(test_inputs, np.float32)
test_targets = tf.convert_to_tensor(test_targets, np.float32)

# Model

### Outline the model

In [9]:
input_size = train_inputs.shape[1]
output_size = class_size
hidden_layer_size = 1800

def create_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(hidden_layer_size, activation = 'relu'),
        tf.keras.layers.Dense(hidden_layer_size, activation = 'relu'),
        tf.keras.layers.Dense(output_size, activation = 'softmax')
    ])
    
    model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])
    
    return model


### Training

In [None]:
batch_size = 100
max_epochs = 10

folds_count = int(samples_count / 50)

fold_counter = 0
fold_estimate_loss = [None] * folds_count if useKFold else 1
fold_estimate_accuracy = [None] * folds_count if useKFold else 1

if useKFold:
    for train_index, test_index in KFold(folds_count).split(train_inputs):
        x_train, x_test = tf.gather(train_inputs, train_index), tf.gather(train_inputs, test_index)
        y_train, y_test = tf.gather(train_targets, train_index), tf.gather(train_targets, test_index)

        model = create_model()
    
        model.fit(x_train, 
                  y_train,
                  batch_size = batch_size,
                  epochs = max_epochs, 
                  verbose = 2)
        
        test_loss, test_accuracy = model.evaluate(x_test, y_test)
        fold_estimate_loss[fold_counter] = test_loss
        fold_estimate_accuracy[fold_counter] = test_accuracy * 100
        fold_counter += 1
        
        print('')
        print('Test loss: {0:.2f}. Test accuracy: {1:.2f}%'.format(test_loss, test_accuracy * 100))
        print('')
else:
    early_stopping = tf.keras.callbacks.EarlyStopping(patience = 20)
    model = create_model()
    
    model.fit(train_inputs, 
              train_targets,
              batch_size = batch_size,
              epochs = max_epochs, 
              callbacks = [early_stopping],
              validation_data = (validation_inputs, validation_targets), 
              verbose = 2)
    
    test_loss, test_accuracy = model.evaluate(test_inputs, test_targets)

Train on 1076 samples
Epoch 1/10
1076/1076 - 1s - loss: 2.1024 - accuracy: 0.3838
Epoch 2/10
1076/1076 - 0s - loss: 1.0099 - accuracy: 0.5279
Epoch 3/10
1076/1076 - 0s - loss: 0.8544 - accuracy: 0.6338
Epoch 4/10
1076/1076 - 0s - loss: 0.7041 - accuracy: 0.7082
Epoch 5/10
1076/1076 - 0s - loss: 0.5631 - accuracy: 0.7835
Epoch 6/10
1076/1076 - 0s - loss: 0.4370 - accuracy: 0.8476
Epoch 7/10
1076/1076 - 0s - loss: 0.3196 - accuracy: 0.9071
Epoch 8/10
1076/1076 - 0s - loss: 0.2204 - accuracy: 0.9405
Epoch 9/10
1076/1076 - 0s - loss: 0.1629 - accuracy: 0.9684
Epoch 10/10
1076/1076 - 0s - loss: 0.1323 - accuracy: 0.9647

Test loss: 1.90. Test accuracy: 38.10%

Train on 1076 samples
Epoch 1/10
1076/1076 - 1s - loss: 2.3060 - accuracy: 0.3690
Epoch 2/10
1076/1076 - 0s - loss: 0.9765 - accuracy: 0.5093
Epoch 3/10
1076/1076 - 0s - loss: 0.8442 - accuracy: 0.6292
Epoch 4/10
1076/1076 - 0s - loss: 0.7234 - accuracy: 0.7045
Epoch 5/10
1076/1076 - 0s - loss: 0.5941 - accuracy: 0.7723
Epoch 6/10
107

### Test the model

In [None]:
if useKFold :
    plt.hist(fold_estimate_accuracy, density=True)  # `density=False` would make counts
    plt.ylabel('Probability')
    plt.xlabel('Accuracy');
else :
    print('')
    print('Test loss: {0:.2f}. Test accuracy: {1:.2f}%'.format(test_loss, test_accuracy * 100.0))
    print('')

### Make predictions

In [None]:
predictions = model.predict(real_inputs)

In [None]:
#real_dataframe = real_dataframe.drop(columns = ['target', 'class'])

for i in range(predictions.shape[1]):
    real_dataframe.insert(i, "pred_class_" + str(i), predictions[:,i], True)

In [None]:
pred = np.zeros((predictions.shape[0], 1))

for i in range(predictions.shape[0]):
    cls = 0
    lastValue = 0
    for j in range(predictions.shape[1]):
        if predictions[i, j] > lastValue :
            lastValue = predictions[i, j]
            cls = j
    pred[i, 0] = cls
    
real_dataframe.insert(predictions.shape[1], "predicted", pred, True)

In [None]:
real_dataframe.to_csv('stock_pred_c' + CLASS_COUNT + '_' + FILE_COUNTER + '.csv', encoding = 'utf-8-sig', index = False)
#real_dataframe.to_csv('stock_pred_c' + CLASS_COUNT + '_p' + PARAMS_COUNT + '.csv', encoding = 'utf-8-sig', index = False)

In [None]:
real_dataframe.head()