In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.models import Model
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras import layers, models, backend, constraints, initializers

In [None]:
df = pd.read_csv('truncated_LOB_data_BTC_USD_COINBASE.csv')

In [None]:
def finder_of_fulfilment(df, column):
    time, indicator = [], []
    for i in range(len(df)):
        num = df[column].iloc[i]
        arr = df[df[column]>num][column].index
        time.append(pd.to_datetime(df['timestamp'].iloc[arr[arr>i][0]]) - pd.to_datetime(df['timestamp'].iloc[i]) 
                    if len(arr[arr>i]) else 
                    pd.to_datetime('2023-10-03 00:00:00') - pd.to_datetime(df['timestamp'].iloc[i]))
        indicator.append(1 if len(arr[arr>i]) else 0)
    return time, indicator

In [None]:
df['time'], df['indicator'] = finder_of_fulfilment(df, 'ask_prices_0')
df

In [None]:
df['timestamp'] = pd.to_datetime(df['timestamp'])
df['time'] = (df['time']).dt.total_seconds().astype(int)
df = df.iloc[0:-140]

In [None]:
df_to_plot = df[['indicator', 'time']]
N = len(df_to_plot)
F_of_t = []

for time in range(max(df['time'])):
    num_executed = len(df_to_plot[df_to_plot['time']<time])
    prob = num_executed/N
    F_of_t.append(prob)

S_of_t = 1 - np.array(F_of_t)

In [None]:
t = np.linspace(0, max(df['time']), max(df['time']))

plt.figure(figsize=(6, 6))
plt.plot(t[:400], S_of_t[:400], linewidth=2, label='Level 1, pegged', color='black')
plt.xlabel('t (sec.)')
plt.ylabel('$\hat{S}(t)$')
plt.title('Survival Probability Over Time')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
#Make some features and drop useless columns 
pd.options.mode.chained_assignment = None

df['vol_imbalance'] = (df['bid_quantity_0'] - df['ask_quantity_0'])/(df['bid_quantity_0'] + df['ask_quantity_0'])
df['microprice'] = ((df['bid_prices_0']*df['bid_quantity_0']+df['ask_prices_0']*df['ask_quantity_0'])
                    /(df['bid_quantity_0'] + df['ask_quantity_0']))

In [None]:
target = [] #create target S(execution_time)
for i in range(len(df)):
    target.append(S_of_t[df['time'].iloc[i]-1])
    
df['target'] = target

In [None]:
plt.figure(figsize=(10, 6))

# Plotting the microprice with a blue line
plt.plot(df['timestamp'], df['microprice'], color='blue', linewidth=2, label='Microprice')

# Adding labels and title
plt.xlabel('Time')
plt.ylabel('Microprice')
plt.title('Microprice over Time')

# Displaying the grid
plt.grid(True)

In [None]:
plt.figure(figsize=(10, 6))

# Plotting the microprice with a blue line
plt.plot(df['timestamp'], df['vol_imbalance'].rolling(1000).mean(), color='blue',
         linewidth=2, label='Rolling Volume Imbalance')

# Adding labels and title
plt.xlabel('Time')
plt.ylabel('Volume Imbalance')
plt.title('Rolling Volume Imbalance over Time')

# Displaying the grid
plt.grid(True)

In [None]:
plt.figure(figsize=(10, 6))

# Plotting the microprice with a blue line
plt.plot(df['timestamp'], df['target'].rolling(1000).mean(), color='blue',
         linewidth=2, label='Target thing')

# Adding labels and title
plt.xlabel('Time')
plt.ylabel('Target')
plt.title('Rolling TargetTargetTarget')

# Displaying the grid
plt.grid(True)

In [None]:
df.set_index('timestamp', drop=True, inplace=True)

X = df.drop(columns=['time', 'indicator', 'target'])
Y = df['target']

# Simplified Staff

In [None]:
input_shape = X.shape[1:]  # Assuming X is your data
steps = 1  # You need to determine the appropriate number of steps based on your data
new_input_shape = (input_shape[0], steps)
encoder_input = Input(shape=input_shape)
encoder_reshape = Reshape(new_input_shape)(encoder_input)
encoder_conv1 = Conv1D(32, kernel_size=2, activation='relu')(encoder_reshape)
encoder_lstm = LSTM(32, activation='relu')(encoder_conv1)
latent_dim = 32
encoder_output = Dense(latent_dim)(encoder_lstm)

encoder_model = Model(encoder_input, encoder_output)

# Define the decoder
decoder_input = Input(shape=(latent_dim,))
decoder_dense1 = Dense(32, activation='relu')(decoder_input)
decoder_reshape = Reshape((1, 32))(decoder_dense1)
decoder_conv1 = Conv1D(32, kernel_size=3, activation='relu', padding='same')(decoder_reshape)
output_dim = 32
decoder_output = Dense(output_dim, activation='sigmoid')(decoder_conv1)

decoder_model = Model(decoder_input, decoder_output)

# Combine the encoder and decoder into an autoencoder
autoencoder_input = Input(shape=input_shape)
encoded = encoder_model(autoencoder_input)
decoded = decoder_model(encoded)
autoencoder_model = Model(autoencoder_input, decoded)

# Compile the model
autoencoder_model.compile(optimizer='adam', loss='mse')

# Train the model
autoencoder_model.fit(X, Y, epochs=3, batch_size=32)

# some experiements 

In [None]:
# Define the DCC layer (updated version)
class DilatedCausalConvolution(layers.Layer):
    def __init__(self, filters, kernel_size, dilation_rate):
        super().__init__()
        self.query_conv = layers.Conv1D(filters=filters, kernel_size=kernel_size,
                                        dilation_rate=dilation_rate, padding='causal', activation='relu')
        self.key_conv = layers.Conv1D(filters=filters, kernel_size=kernel_size,
                                      dilation_rate=dilation_rate, padding='causal', activation='relu')
        self.value_conv = layers.Conv1D(filters=filters, kernel_size=kernel_size,
                                        dilation_rate=dilation_rate, padding='causal', activation='relu')

    def call(self, inputs):
        query = self.query_conv(inputs)
        key = self.key_conv(inputs)
        value = self.value_conv(inputs)
        return query, key, value

# Define the Transformer block (updated version)
class TransformerBlock(layers.Layer):
    def __init__(self, num_heads, d_model):
        super().__init__()
        self.multi_head_attention = layers.MultiHeadAttention(num_heads=num_heads, key_dim=d_model)
        self.dense_proj = layers.Dense(d_model, activation='relu')

    def call(self, query, key, value):
        attn_output = self.multi_head_attention(query, key, value)
        proj_output = self.dense_proj(attn_output)
        return proj_output

# Define the encoder using the DCC and Transformer block (updated version)
def create_encoder(input_shape, filters, kernel_size, dilation_rate, num_heads, d_model):
    inputs = layers.Input(shape=input_shape)
    dcc_layer = DilatedCausalConvolution(filters=filters, kernel_size=kernel_size, dilation_rate=dilation_rate)
    query, key, value = dcc_layer(inputs)
    transformer_block = TransformerBlock(num_heads=num_heads, d_model=d_model)
    transformer_output = transformer_block(query, key, value)
    model = models.Model(inputs=inputs, outputs=transformer_output)
    return model

# Define the monotonic decoder (updated version)
def monotonic_constraint(weight_matrix):
    # Constraint for monotonicity: weights must be non-negative
    return tf.where(weight_matrix < 0., tf.zeros_like(weight_matrix), weight_matrix)

class CustomDense(layers.Layer):
    def __init__(self, units, activation=None, **kwargs):
        super(CustomDense, self).__init__(**kwargs)
        self.units = units
        self.activation = tf.keras.activations.get(activation)

    def build(self, input_shape):
        # Constraint for the weights to be non-negative for monotonicity
        self.kernel = self.add_weight(name='kernel', 
                                      shape=(input_shape[-1], self.units),
                                      initializer=initializers.GlorotUniform(),
                                      constraint=monotonic_constraint,
                                      trainable=True)
        self.bias = self.add_weight(name='bias', 
                                    shape=(self.units,),
                                    initializer='zeros',
                                    trainable=True)
        super(CustomDense, self).build(input_shape)

    def call(self, inputs):
        # Apply monotonic constraint during the forward pass
        return self.activation(tf.matmul(inputs, self.kernel) + self.bias)

def create_monotonic_decoder(input_shape, output_shape):
    inputs = layers.Input(shape=input_shape)
    
    # Custom dense layer with non-negative weights and sigmoid activation to ensure output is between 0 and 1
    outputs = CustomDense(output_shape, activation='sigmoid')(inputs)
    
    model = models.Model(inputs=inputs, outputs=outputs)
    return model

# Combine the encoder and decoder to create the full model (updated version)
def create_full_model(input_shape, filters, kernel_size, dilation_rate, num_heads, d_model, output_shape):
    encoder_inputs = layers.Input(shape=input_shape)
    encoder = create_encoder(input_shape, filters, kernel_size, dilation_rate, num_heads, d_model)
    encoder_output = encoder(encoder_inputs)
    
    flattened_output = layers.Flatten()(encoder_output)
    
    decoder = create_monotonic_decoder(flattened_output.shape[1:], output_shape)
    decoder_output = decoder(flattened_output)
    
    full_model = models.Model(inputs=encoder_inputs, outputs=decoder_output)
    return full_model

In [None]:
# Expand dimensions of X to add the channel dimension if needed
X_expanded = np.expand_dims(X, axis=-1)  # Shape becomes (99601, 22, 1)

# Define model parameters
input_shape = (22, 1)  # 22 timesteps and 1 feature per timestep?
filters = 64
kernel_size = 3
dilation_rate = 1
num_heads = 8
d_model = 64
output_shape = 1

model = create_full_model(input_shape, filters, kernel_size, dilation_rate, num_heads, d_model, output_shape)
model.compile(optimizer='adam', loss='MSE')
model.fit(X_expanded, Y, epochs=3, batch_size=32)

model.summary()

In [None]:
Y_pred = model.predict(X_expanded)
Y_pred.mean()