In [105]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers

# Output library versions
print(f"numpy version: {np.__version__}")
print(f"tensorflow version: {tf.__version__}")

numpy version: 1.26.4
tensorflow version: 2.18.0


In [106]:
# ----------------------------
# Constants and Encoding Legend
# ----------------------------
START_TOKEN = 13
END_TOKEN = 14

ENCODING_LEGEND = {
    'MRI_CCS_11': 1, 'MRI_EXU_95': 2, 'MRI_FRR_18': 3, 'MRI_FRR_257': 4,
    'MRI_FRR_264': 5, 'MRI_FRR_3': 6, 'MRI_FRR_34': 7, 'MRI_MPT_1005': 8,
    'MRI_MSR_100': 9, 'MRI_MSR_104': 10, 'MRI_MSR_21': 11, 'MRI_MSR_34': 12,
    'START': START_TOKEN,
    'END': END_TOKEN
}

CHAR_TO_INT = {
    '0': 0,
    '1': 1,
    '2': 2,
    '3': 3,
    '4': 4,
    '5': 5,
    '6': 6,
    '7': 7,
    '8': 8,
    '9': 9,
    '10': 10,
    '11': 11,
    '12': 12,
}

In [107]:
# ----------------------------
# Data Loading and Preparation
# ----------------------------
data_file = "encoded_182625.csv"  # Make sure this file is in your working directory.
data = pd.read_csv(data_file)
print("Loaded CSV with columns:", data.columns.tolist())

Loaded CSV with columns: ['SeqOrder', 'sourceID', 'timediff', 'PTAB', 'BodyGroup_from', 'BodyGroup_to']


In [108]:
# We assume the CSV contains columns "sourceID" and "timediff"
source_ids = data['sourceID'].dropna().astype(int).tolist()
cumulative_times = data['timediff'].dropna().astype(float).tolist()

# Add start and end tokens to the sourceID sequence.
sequence = [START_TOKEN] + source_ids + [END_TOKEN]

# For the cumulative times (assumed to be cumulative values),
# ensure the first time is 0 and add an extra time for the END token.
if cumulative_times[0] != 0:
    cumulative_times = [0.0] + cumulative_times
if len(sequence) != len(cumulative_times):
    cumulative_times = [0.0] + cumulative_times + [cumulative_times[-1]]

# Convert to numpy arrays and add a batch dimension.
sequence = np.array(sequence, dtype=np.int32)            # Shape: (seq_len,)
cumulative_times = np.array(cumulative_times, dtype=np.float32)  # Shape: (seq_len,)

# Create training data with a batch size of 1.
X_train = np.expand_dims(sequence, axis=0)      # Shape: (1, seq_len)
Y_cum = np.expand_dims(cumulative_times, axis=0)  # Shape: (1, seq_len)

In [109]:
# ----------------------------
# Target Computation Function
# ----------------------------
def compute_true_targets(cumulative_times):
    """
    Given cumulative times (shape: [batch, seq_len]), compute:
      - true_proportions: incremental differences normalized by total time.
      - true_total: the total time (last cumulative value) per sample.
    """
    diffs = cumulative_times[:, 0:1]  # first value (could be 0)
    diffs = tf.concat([diffs, cumulative_times[:, 1:] - cumulative_times[:, :-1]], axis=1)
    true_total = cumulative_times[:, -1:]
    true_total_safe = tf.where(true_total == 0, tf.ones_like(true_total), true_total)
    true_proportions = diffs / true_total_safe
    return true_proportions, true_total

true_prop, true_total = compute_true_targets(tf.convert_to_tensor(Y_cum))

In [110]:
# ----------------------------
# Transformer Components
# ----------------------------
def positional_encoding(length, depth):
    depth = depth / 2
    positions = np.arange(length)[:, np.newaxis]      # (length, 1)
    depths = np.arange(depth)[np.newaxis, :] / depth    # (1, depth)
    angle_rates = 1 / (10000 ** depths)
    angle_rads = positions * angle_rates
    pos_encoding = np.concatenate([np.sin(angle_rads), np.cos(angle_rads)], axis=-1)
    return tf.cast(pos_encoding, dtype=tf.float32)

In [111]:
class PositionalEmbedding(layers.Layer):
    def __init__(self, vocab_size, d_model, max_len=4096, use_embedding=True):
        """
        max_len: maximum sequence length for which positional encodings are precomputed.
        """
        super(PositionalEmbedding, self).__init__()
        self.d_model = d_model
        self.use_embedding = use_embedding
        if self.use_embedding:
            self.embedding = layers.Embedding(vocab_size, d_model, mask_zero=True)
        else:
            self.embedding = layers.Dense(d_model, activation="relu")
        self.max_len = max_len
        self.pos_encoding = positional_encoding(self.max_len, d_model)
    
    def compute_mask(self, *args, **kwargs):
        if self.use_embedding:
            return self.embedding.compute_mask(*args, **kwargs)
        else:
            return None
    
    def call(self, x):
        # x shape: (batch, seq_len)
        x = self.embedding(x)  # (batch, seq_len, d_model)
        x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
        seq_len = tf.shape(x)[1]
        # Use precomputed positional encoding up to the input sequence length.
        x += self.pos_encoding[tf.newaxis, :seq_len, :]
        return x

In [112]:
class FeedForward(layers.Layer):
    def __init__(self, d_model, dff, dropout_rate=0.1):
        super().__init__()
        self.seq = tf.keras.Sequential([
            layers.Dense(dff, activation='relu'),
            layers.Dense(d_model),
            layers.Dropout(dropout_rate)
        ])
        self.add = layers.Add()
        self.layer_norm = layers.LayerNormalization()
    
    def call(self, x):
        x = self.add([x, self.seq(x)])
        x = self.layer_norm(x)
        return x

In [113]:
class CausalSelfAttention(layers.Layer):
    def __init__(self, num_heads, d_model, dropout_rate=0.1):
        super().__init__()
        self.mha = layers.MultiHeadAttention(num_heads=num_heads, key_dim=d_model, dropout=dropout_rate)
        self.add = layers.Add()
        self.layer_norm = layers.LayerNormalization()
    
    def call(self, x):
        attn_output = self.mha(query=x, key=x, value=x, use_causal_mask=True)
        x = self.add([x, attn_output])
        x = self.layer_norm(x)
        return x

In [114]:
class SelfAttentionFeedForwardLayer(layers.Layer):
    def __init__(self, d_model, num_heads, dff, dropout_rate=0.1):
        super().__init__()
        self.self_attention = CausalSelfAttention(num_heads=num_heads, d_model=d_model, dropout_rate=dropout_rate)
        self.ffn = FeedForward(d_model, dff, dropout_rate)
    
    def call(self, x):
        x = self.self_attention(x)
        x = self.ffn(x)
        return x

In [115]:
class Encoder(tf.keras.Model):
    def __init__(self, num_layers, d_model, num_heads, dff, vocab_size, dropout_rate=0.1):
        super().__init__()
        self.pos_embedding = PositionalEmbedding(vocab_size, d_model)
        self.enc_layers = [SelfAttentionFeedForwardLayer(d_model, num_heads, dff, dropout_rate)
                           for _ in range(num_layers)]
        self.dropout = layers.Dropout(dropout_rate)
    
    def call(self, x):
        x = self.pos_embedding(x)
        x = self.dropout(x)
        for layer in self.enc_layers:
            x = layer(x)
        return x  # (batch, seq_len, d_model)

In [116]:
class Decoder(tf.keras.Model):
    def __init__(self, num_layers, d_model, num_heads, dff, vocab_size, dropout_rate=0.1):
        super().__init__()
        self.pos_embedding = PositionalEmbedding(vocab_size, d_model)
        self.dropout = layers.Dropout(dropout_rate)
        self.dec_layers = [SelfAttentionFeedForwardLayer(d_model, num_heads, dff, dropout_rate)
                           for _ in range(num_layers)]
    
    def call(self, x, context):
        x = self.pos_embedding(x)
        x = self.dropout(x)
        for layer in self.dec_layers:
            x = layer(x)
        return x  # (batch, seq_len, d_model)

In [117]:
# ----------------------------
# TimeDiffTransformer Model
# ----------------------------
class TimeDiffTransformer(tf.keras.Model):
    """
    This model takes a sequence input (tokenized examination steps) and predicts:
      1. A sequence of per-step proportions (via softmax so they sum to 1).
      2. An overall total time (a nonnegative scalar via ReLU).
    The predicted per-step time differences are computed by multiplying the proportions with the total time.
    """
    def __init__(self, num_layers, d_model, num_heads, dff, input_vocab_size, dropout_rate=0.1):
        super().__init__()
        self.encoder = Encoder(num_layers, d_model, num_heads, dff, input_vocab_size, dropout_rate)
        self.decoder = Decoder(num_layers, d_model, num_heads, dff, input_vocab_size, dropout_rate)
        # Proportion head: outputs one value per time step.
        self.proportion_head = layers.Dense(1)
        # Total time head: outputs one scalar per sample (nonnegative).
        self.total_time_head = layers.Dense(1, activation='relu')
    
    def call(self, inputs):
        encoder_out = self.encoder(inputs)              # (batch, seq_len, d_model)
        decoder_out = self.decoder(inputs, encoder_out)   # (batch, seq_len, d_model)
        proportions_logits = self.proportion_head(decoder_out)  # (batch, seq_len, 1)
        proportions_logits = tf.squeeze(proportions_logits, axis=-1)  # (batch, seq_len)
        proportions = tf.nn.softmax(proportions_logits, axis=-1)
        pooled_encoder = tf.reduce_mean(encoder_out, axis=1)  # (batch, d_model)
        total_time = self.total_time_head(pooled_encoder)       # (batch, 1)
        return proportions, total_time
    
    def predict_time_differences(self, inputs):
        proportions, total_time = self(inputs)
        pred_time_diff = proportions * total_time  # (batch, seq_len) via broadcasting
        return pred_time_diff

In [118]:
# ----------------------------
# Loss Functions
# ----------------------------
def proportion_loss(y_true, y_pred):
    return tf.keras.losses.MeanSquaredError()(y_true, y_pred)

def total_time_loss(y_true, y_pred):
    return tf.keras.losses.MeanSquaredError()(y_true, y_pred)

In [119]:
# ----------------------------
# Model Instantiation, Compilation, and Training
# ----------------------------
vocab_size = max(ENCODING_LEGEND.values()) + 1  # e.g., 15
model = TimeDiffTransformer(num_layers=3, d_model=32, num_heads=8, dff=128,
                            input_vocab_size=vocab_size, dropout_rate=0.1)
# Build the model (this sets up the weights)
model.build(input_shape=(None, X_train.shape[1]))
model.summary()

# Compile with a list of losses: one for proportions and one for total time.
model.compile(optimizer='adam', loss=[proportion_loss, total_time_loss])

# Train the model.
# (For demonstration, we use our single-sequence training data.)
model.fit(X_train, [true_prop, true_total], epochs=5)



Epoch 1/5




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 18s/step - loss: 1545049.0000 - proportion_loss_loss: 0.0121 - total_time_loss_loss: 1545049.0000
Epoch 2/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - loss: 1545049.0000 - proportion_loss_loss: 0.0121 - total_time_loss_loss: 1545049.0000
Epoch 3/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - loss: 1545049.0000 - proportion_loss_loss: 0.0121 - total_time_loss_loss: 1545049.0000
Epoch 4/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - loss: 1545049.0000 - proportion_loss_loss: 0.0121 - total_time_loss_loss: 1545049.0000
Epoch 5/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - loss: 1545049.0000 - proportion_loss_loss: 0.0121 - total_time_loss_loss: 1545049.0000


<keras.src.callbacks.history.History at 0x1dfae088820>

In [120]:
# ----------------------------
# Inference
# ----------------------------
predicted_proportions, predicted_total_time = model(X_train)
predicted_time_differences = model.predict_time_differences(X_train)

print("\nPredicted proportions (first sample):", predicted_proportions[0].numpy())
print("Predicted total time (first sample):", predicted_total_time[0].numpy())
print("Predicted per-step time differences (first sample):", predicted_time_differences[0].numpy())


Predicted proportions (first sample): [1.0152797e-04 3.2527314e-05 4.9474394e-05 ... 2.5018287e-04 6.7671359e-04
 3.2045296e-04]
Predicted total time (first sample): [0.]
Predicted per-step time differences (first sample): [0. 0. 0. ... 0. 0. 0.]
