# This notebook test self encoding

In [38]:
import pandas as pd
import os
import datetime
from QuotesDownloader import SoftfxDownloader, QuotesType, QuotesPeriodicity
import numpy as np
#import tensorflow as tf 

from tensorflow.keras.models import Model
from tensorflow.keras import layers, losses
from tensorflow.keras.utils import plot_model
from tqdm import tqdm


In [39]:
import tensorflow as tf
print(tf.__version__)
import sys
print(sys.executable)




2.15.0-dev20230828
C:\Users\Petrosyan\.conda\envs\ae4\python.exe


Plan:
1. Download data for EURUSD
2. Convert data to pips
3. Convert bar prices to one pip movement. If bid was increased to X pips, X ones will be generated.
4. Create network for encoding/decoding to dictionary.


###  First step. Download M1 data.

In [40]:
symbols = ["EURUSD"]
pips_value = [10**-5]
N_x = 1000 #number of pips movements for NN

qd = SoftfxDownloader.Downloader()
train_raw = qd.get_quotes(symbols[0], datetime.date(2020, 1, 1), datetime.date(2021, 1, 1), QuotesPeriodicity.M1, QuotesType.Bids)
train_raw = train_raw.to_numpy()
print(train_raw.shape)
print(train_raw.dtype)

(368053, 6)
object


###  Second step. Convert data to pips

In [41]:
#Encode input data to NN friendly format.
bids = train_raw[:,1] / pips_value
bids = bids.astype('f')
bids = np.rint(bids).astype('i')
bids.shape

(368053,)

###  Fourth step. Convert to pip steps

In [42]:
# initializing list
def pips2pricemovments(arr : np.ndarray) -> np.ndarray:
    d_iter = map(lambda x, y: np.repeat(np.float16(1), y-x) if x<y else np.repeat(np.float16(0), x-y), arr[:-1], arr[1:])
    return np.concatenate(list(d_iter))

In [43]:
filename_converted = "bids_EURUSD.npy"
if os.path.exists(filename_converted):
    print("Loading file")
    d = np.load(filename_converted)
else:
    d = pips2pricemovments(bids)
    np.save(filename_converted, d)
d.shape

Loading file


(3672304,)

##  Fifth step. NN

In [44]:
from dataclasses import dataclass
#train/test split
# Test your function and save all "global" variables within the G class (G stands for global)
@dataclass
class GSettings:
    filename_converted = "bids_EURUSD.npy"
    d = np.load(filename_converted)
    N = d.shape[0]
    train_ratio = 0.1
    n_train = int(N * train_ratio)
    window_size = 1024
    batch_size = 64
    SHUFFLE_BUFFER_SIZE = 1000
    N_letters = 10

train_data = d[:GSettings.n_train]
test_data = d[GSettings.n_train:]

print(f"Train data shape is {train_data.shape}.\nTest data shape is {test_data.shape}.")

Train data shape is (367230,).
Test data shape is (3305074,).


In [56]:
import tensorflow as tf
def windowed_dataset(series, window_size=GSettings.window_size, batch_size=GSettings.batch_size, shuffle_buffer=GSettings.SHUFFLE_BUFFER_SIZE):
    ds = tf.data.Dataset.from_tensor_slices(series)
    ds = ds.window(window_size, shift=1, drop_remainder=True)
    ds = ds.flat_map(lambda window: window.batch(window_size))
    ds = ds.shuffle(shuffle_buffer)
    ds = ds.map(lambda w: tf.cast(w, tf.float32))
    #outAverage = tf.keras.layers.Conv1D(1, 20, padding='same', strides=1)(w)
    ds = ds.map(lambda w: (w, outAverage))
    ds = ds.batch(batch_size)#.prefetch(1)
    #ds = ds.map(lambda x, y: (x, tf.squeeze(y, axis=-1)))
    #for window in ds:
     #  print(list(window))
    return ds

train_set = windowed_dataset(train_data)
test_set = windowed_dataset(test_data)

t = train_set.take(1)
list(t.as_numpy_iterator())

NameError: name 'w' is not defined

In [46]:
%load_ext autoreload
%autoreload 2
import AutoEncoderModels

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [47]:
autoencoder = AutoEncoderModels.AlphabetMulti(GSettings.window_size) #AlphabetSimple()
#autoencoder.build((GSettings.window_size))
#plot_model(autoencoder.nne1, show_shapes=True, show_layer_names=True)
#autoencoder.encoder1.summary()
autoencoder.decoder1.summary()

(None, 32, 32)
Model: "sequential_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_5 (Dense)             (None, 1024)              103424    
                                                                 
 reshape_2 (Reshape)         (None, 32, 32)            0         
                                                                 
Total params: 103424 (404.00 KB)
Trainable params: 103424 (404.00 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [48]:
# td = np.random.randint(0,2,GSettings.window_size)
# td = np.reshape(td, (1, 540, 1))
# print(td.shape)
# autoencoder.encoder(td)

In [49]:
# td = np.zeros(54)
# td[0] = 1
# td = td[np.newaxis, :]
# autoencoder.decoder(td)

In [50]:
#autoencoder.nne1.summary()

In [51]:
class CustomMSE(tf.keras.losses.Loss):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def call(self, y_true, y_pred):
#        print(y_true.shape)
#        cum_y_true = tf.cumsum(y_true)
#        cum_y_pred = tf.cumsum(y_pred)
        y_true = tf.cast(y_true, tf.float32)
        y_pred = tf.cast(y_pred, tf.float32)
        return tf.reduce_mean(tf.square(y_true - y_pred))


In [52]:
optimizer = tf.keras.optimizers.Adam()
loss_object = tf.keras.losses.BinaryCrossentropy()#CustomMSE()
train_acc_metric = tf.keras.metrics.BinaryAccuracy()
val_acc_metric = tf.keras.metrics.BinaryAccuracy()

In [53]:
def apply_gradient(optimizer, model, x, y):
    with tf.GradientTape() as tape:
        logits = model(x)
        loss_value = loss_object(y_true=y, y_pred=logits)# + loss_object2(y_true=y, y_pred=logits2)

    gradients = tape.gradient(loss_value, model.trainable_weights)
    optimizer.apply_gradients(zip(gradients, model.trainable_weights))

    return logits, loss_value


In [54]:
def train_data_for_one_epoch(model, train, train_acc_metric, totalRecords):
    losses = []
    pbar = tqdm(total=totalRecords, position=0, leave=True, bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} ')
    for step, (x_batch_train, y_batch_train) in enumerate(train_set):
        logits, loss_value = apply_gradient(optimizer, model, x_batch_train, y_batch_train)

        losses.append(loss_value)

        train_acc_metric(y_batch_train, logits)
        pbar.set_description("Training loss for step %s: %.4f acc: %.4f" % (int(step), float(loss_value), float(train_acc_metric.result())))
        pbar.update()
    return losses

In [55]:
# Iterate over epochs.
epochs = 2
epochs_val_losses, epochs_train_losses = [], []
for epoch in range(epochs):
    print('Start of epoch %d' % (epoch,))

    losses_train = train_data_for_one_epoch(autoencoder, train_set, train_acc_metric, train_data.shape[0]/GSettings.batch_size)
    train_acc = train_acc_metric.result()

    #losses_val = perform_validation()
    #val_acc = val_acc_metric.result()

    losses_train_mean = np.mean(losses_train)
    #losses_val_mean = np.mean(losses_val)
    #epochs_val_losses.append(losses_val_mean)
    epochs_train_losses.append(losses_train_mean)

    print('\n Epoch %s: Train loss: %.4f  Validation Loss: %.4f, Train Accuracy: %.4f, Validation Accuracy %.4f' % (epoch, float(losses_train_mean), 0#float(losses_val_mean)
        , float(train_acc), 0#float(val_acc)
        ))

    train_acc_metric.reset_states()
    #val_acc_metric.reset_states()

Start of epoch 0


Training loss for step 20: 0.6449 acc: 0.6141:   0%|          | 21/5737.96875 875 


KeyboardInterrupt: 

In [None]:
autoencoder.save("firstsuccess2")

In [15]:
import sys
class CustomMSE(tf.keras.losses.Loss):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def call(self, y_true, y_pred):
        print(y_true.shape)
        #mse1 = tf.reduce_mean(tf.square(y_true[:, 0] - y_pred[:, 0]))
        #mse2 = tf.reduce_mean(tf.square(y_true[:, 1] - y_pred[:, 1]))
        #total_mse = mse1 + mse2
        #return total_mse
        y_true = tf.cast(y_true, tf.float32)
        y_pred = tf.cast(y_pred, tf.float32)
        return tf.reduce_mean(tf.square(y_true - y_pred))

def custom_loss_function(y_true, y_pred):
    tf.print("\n y_true", y_true, output_stream=sys.stdout)
    tf.print("\n y_pred", y_pred, output_stream=sys.stdout)

   # y_true = tf.cast(y_true, tf.float32)
    #y_pred = tf.cast(y_pred, tf.float32)


    #squared_difference = tf.square(y_true - y_pred)
    #return tf.reduce_mean(squared_difference)
    return tf.square(y_pred)


autoencoder.compile(optimizer='adam', loss='mse', metrics=['accuracy']) #'binary_crossentropy'


In [16]:
#from tensorflow.keras.losses import mean_squared_error
autoencoder.compile(loss = 'binary_crossentropy')

In [17]:
history = autoencoder.fit(train_set, epochs=1)



In [18]:
y_true = [0, 1, 0, 0]
y_pred = [-18.6, 0.51, 2.94, -12.8]
bce = tf.keras.losses.BinaryCrossentropy(from_logits=True)
bce(y_true, y_pred).numpy()


0.865458

In [62]:
y_true = [[.0, 1.0, 0]]
y_pred = [[.0, 0.8, 0.6]]
   
la = tf.keras.metrics.BinaryAccuracy()
la.reset_state()
la.update_state(y_true, y_pred)
la.result()

    

<tf.Tensor: shape=(), dtype=float32, numpy=0.6666667>