In [None]:

import torch
import torch.nn as nn
from torch.nn import functional as F
import numpy as np

# Memory Module
class MemoryModule:
    def __init__(self, memory_size, hidden_size):
        """
        Memory module for storing and retrieving input-output representations.
        Args:
            memory_size (int): Maximum size of the memory buffer.
            hidden_size (int): Size of hidden representations.
        """
        self.memory_size = memory_size
        self.hidden_size = hidden_size
        self.memory = []  # Memory buffer to store query-response pairs

    def write(self, query_repr, response_repr):
        """
        Write a new query-response pair into the memory.
        Args:
            query_repr (torch.Tensor): Input/query representation.
            response_repr (torch.Tensor): Output/response representation.
        """
        if len(self.memory) >= self.memory_size:
            self.memory.pop(0)  # Remove the oldest entry if memory is full
        self.memory.append((query_repr.detach(), response_repr.detach()))

    def read(self, query_repr):
        """
        Retrieve the most similar memory entry to the given query.
        Args:
            query_repr (torch.Tensor): Input/query representation.
        Returns:
            torch.Tensor: Retrieved response representation.
        """
        if not self.memory:
            return torch.zeros_like(query_repr)  # Return zero vector if memory is empty

        # Compute similarity scores
        similarities = [torch.cosine_similarity(query_repr, mem_query, dim=0) for mem_query, _ in self.memory]
        best_match_idx = torch.argmax(torch.tensor(similarities))
        return self.memory[best_match_idx][1]

# GPT Configuration Class
class GPTConfig:
    def __init__(self, vocab_size, block_size, n_layer, n_head, n_embd, memory_size):
        self.vocab_size = vocab_size
        self.block_size = block_size
        self.n_layer = n_layer
        self.n_head = n_head
        self.n_embd = n_embd
        self.memory_size = memory_size  # Size of the memory buffer

# Memory-Augmented GPT Model
class MemoryAugmentedGPT(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.transformer = nn.Transformer(
            d_model=config.n_embd,
            nhead=config.n_head,
            num_encoder_layers=config.n_layer,
            num_decoder_layers=config.n_layer,
            dim_feedforward=4*config.n_embd,
            dropout=0.1,
            activation='relu'
        )
        self.embedding = nn.Embedding(config.vocab_size, config.n_embd)
        self.pos_embedding = nn.Parameter(torch.zeros(1, config.block_size, config.n_embd))
        self.ln_f = nn.LayerNorm(config.n_embd)
        self.head = nn.Linear(config.n_embd, config.vocab_size, bias=False)

        # Memory Module
        self.memory = MemoryModule(config.memory_size, config.n_embd)

    def forward(self, x, targets=None):
        b, t = x.size()
        assert t <= self.pos_embedding.size(1), "Cannot forward, model block size is exhausted."

        # Token and position embeddings
        token_embeddings = self.embedding(x)
        position_embeddings = self.pos_embedding[:, :t, :]
        x = token_embeddings + position_embeddings

        x = x.permute(1, 0, 2)  # Transformer expects (seq_len, batch_size, embed_dim)
        x = self.transformer(x, x)
        x = x.permute(1, 0, 2)

        x = self.ln_f(x)
        logits = self.head(x)

        # Memory interaction
        query_repr = x.mean(dim=1)  # Aggregate representation
        memory_output = self.memory.read(query_repr)  # Read from memory
        logits += memory_output.unsqueeze(1)  # Add memory output to logits

        # Write to memory (only if training or after generating a sequence)
        if targets is not None:
            response_repr = logits.mean(dim=1)  # Use logits as the response representation
            self.memory.write(query_repr, response_repr)

        if targets is not None:
            loss = F.cross_entropy(logits.view(-1, logits.size(-1)), targets.view(-1))
            return logits, loss
        else:
            return logits, None

# Example Dataset and Training
class TextDataset(torch.utils.data.Dataset):
    def __init__(self, texts, vocab_size, block_size):
        self.texts = texts
        self.vocab_size = vocab_size
        self.block_size = block_size

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        # Convert text to indices (example logic; needs actual tokenizer)
        tokens = [random.randint(0, self.vocab_size - 1) for _ in range(self.block_size)]
        input_ids = torch.tensor(tokens[:-1], dtype=torch.long)
        targets = torch.tensor(tokens[1:], dtype=torch.long)
        return input_ids, targets

# Training the Memory-Augmented GPT
def train_memory_augmented_gpt():
    config = GPTConfig(
        vocab_size=100,  # Example vocab size
        block_size=128,
        n_layer=4,
        n_head=8,
        n_embd=256,
        memory_size=10
    )
    model = MemoryAugmentedGPT(config)
    optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)

    # Example dataset
    dataset = TextDataset(["example text"] * 1000, config.vocab_size, config.block_size)
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=8, shuffle=True)

    for epoch in range(3):
        for input_ids, targets in dataloader:
            logits, loss = model(input_ids, targets)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        print(f"Epoch {epoch + 1} completed.")

if __name__ == "__main__":
    train_memory_augmented_gpt()



In [None]:

# Importing necessary libraries
import torch
import torch.nn as nn
from torch.nn import functional as F
import numpy as np

# Memory Module
class MemoryModule:
    def __init__(self, memory_size, hidden_size):
        """
        Memory module for storing and retrieving input-output representations.
        Args:
            memory_size (int): Maximum size of the memory buffer.
            hidden_size (int): Size of hidden representations.
        """
        self.memory_size = memory_size
        self.hidden_size = hidden_size
        self.memory = []  # Memory buffer to store query-response pairs

    def write(self, query_repr, response_repr):
        """
        Write a new query-response pair into the memory.
        Args:
            query_repr (torch.Tensor): Input/query representation.
            response_repr (torch.Tensor): Output/response representation.
        """
        if len(self.memory) >= self.memory_size:
            self.memory.pop(0)  # Remove the oldest entry if memory is full
        self.memory.append((query_repr.detach(), response_repr.detach()))

    def read(self, query_repr):
        """
        Retrieve the most similar memory entry to the given query.
        Args:
            query_repr (torch.Tensor): Input/query representation.
        Returns:
            torch.Tensor: Retrieved response representation.
        """
        if not self.memory:
            return torch.zeros_like(query_repr)  # Return zero vector if memory is empty

        # Compute similarity scores
        similarities = [torch.cosine_similarity(query_repr, mem_query, dim=0) for mem_query, _ in self.memory]
        best_match_idx = torch.argmax(torch.tensor(similarities))
        return self.memory[best_match_idx][1]

# GPT Configuration Class
class GPTConfig:
    def __init__(self, vocab_size, block_size, n_layer, n_head, n_embd):
        self.vocab_size = vocab_size
        self.block_size = block_size
        self.n_layer = n_layer
        self.n_head = n_head
        self.n_embd = n_embd

# GPT Model with Memory Augmentation
class MemoryAugmentedGPT(nn.Module):
    def __init__(self, config, memory_size):
        super().__init__()
        self.embedding = nn.Embedding(config.vocab_size, config.n_embd)
        self.pos_embedding = nn.Parameter(torch.zeros(1, config.block_size, config.n_embd))
        self.layers = nn.ModuleList([
            nn.TransformerEncoderLayer(
                d_model=config.n_embd, nhead=config.n_head, dim_feedforward=4 * config.n_embd
            ) for _ in range(config.n_layer)
        ])
        self.ln_f = nn.LayerNorm(config.n_embd)
        self.head = nn.Linear(config.n_embd, config.vocab_size, bias=False)

        # Memory Module
        self.memory = MemoryModule(memory_size, config.n_embd)

    def forward(self, x, targets=None):
        b, t = x.size()
        token_embeddings = self.embedding(x)
        position_embeddings = self.pos_embedding[:, :t, :]
        x = token_embeddings + position_embeddings

        # Pass through transformer layers
        for layer in self.layers:
            x = layer(x)

        x = self.ln_f(x)
        logits = self.head(x)

        # Memory interaction
        query_repr = x.mean(dim=1)  # Aggregate representation
        memory_output = self.memory.read(query_repr)
        logits += memory_output.unsqueeze(1)  # Add memory contribution

        # Write to memory during training
        if targets is not None:
            response_repr = logits.mean(dim=1)
            self.memory.write(query_repr, response_repr)

        if targets is not None:
            loss = F.cross_entropy(logits.view(-1, logits.size(-1)), targets.view(-1))
            return logits, loss
        else:
            return logits, None

    def generate(self, start_tokens, max_new_tokens):
        """
        Generate text using the memory-augmented GPT.
        Args:
            start_tokens (torch.Tensor): Starting tokens for generation.
            max_new_tokens (int): Maximum number of tokens to generate.
        Returns:
            torch.Tensor: Generated token sequence.
        """
        generated = start_tokens
        for _ in range(max_new_tokens):
            logits, _ = self.forward(generated)
            next_token = torch.argmax(logits[:, -1, :], dim=-1, keepdim=True)
            generated = torch.cat([generated, next_token], dim=1)

            # Update memory during generation
            query_repr = self.embedding(next_token).mean(dim=1)
            response_repr = logits.mean(dim=1)
            self.memory.write(query_repr, response_repr)

        return generated

# Training and Inference Integration Example
if __name__ == "__main__":
    # Configuration
    config = GPTConfig(
        vocab_size=100,  # Example vocab size
        block_size=128,
        n_layer=4,
        n_head=8,
        n_embd=256
    )
    memory_size = 10
    model = MemoryAugmentedGPT(config, memory_size)

    # Dummy input for testing
    x = torch.randint(0, config.vocab_size, (2, 128))
    targets = torch.randint(0, config.vocab_size, (2, 128))

    # Forward pass with memory
    logits, loss = model(x, targets)
    print(f"Logits shape: {logits.shape}, Loss: {loss.item()}")

    # Text generation example
    start_tokens = torch.randint(0, config.vocab_size, (1, 10))
    generated_tokens = model.generate(start_tokens, max_new_tokens=20)
    print(f"Generated tokens: {generated_tokens}")



In [None]:

# Importing necessary libraries
import torch
import torch.nn as nn
from torch.nn import functional as F

# Memory Module for Time Series
class MemoryModule:
    def __init__(self, memory_size, hidden_size):
        self.memory_size = memory_size
        self.hidden_size = hidden_size
        self.memory = []

    def write(self, query_repr, response_repr):
        if len(self.memory) >= self.memory_size:
            self.memory.pop(0)
        self.memory.append((query_repr.detach(), response_repr.detach()))

    def read(self, query_repr):
        if not self.memory:
            return torch.zeros_like(query_repr)
        similarities = [torch.cosine_similarity(query_repr, mem_query, dim=0) for mem_query, _ in self.memory]
        best_match_idx = torch.argmax(torch.tensor(similarities))
        return self.memory[best_match_idx][1]

# Transformer-based Backbone for Time Series (Replaces LSTM with GPT-like architecture)
class TransformerBackbone(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, n_head, block_size):
        super().__init__()
        self.embedding = nn.Linear(input_size, hidden_size)
        self.pos_embedding = nn.Parameter(torch.zeros(1, block_size, hidden_size))
        self.layers = nn.ModuleList([
            nn.TransformerEncoderLayer(
                d_model=hidden_size, nhead=n_head, dim_feedforward=4 * hidden_size, dropout=0.1
            ) for _ in range(num_layers)
        ])
        self.block_size = block_size

    def forward(self, x):
        b, t, f = x.size()
        assert t <= self.block_size, "Input sequence length exceeds block size."

        x = self.embedding(x)  # Project input features to hidden size
        x = x + self.pos_embedding[:, :t, :]  # Add positional embeddings

        for layer in self.layers:
            x = layer(x)

        return x[:, -1, :]  # Use the last token's output as the sequence representation

# Titans Model for Time Series with Transformer Backbone
class TitansTimeSeries(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers, n_head, block_size, memory_size):
        super().__init__()
        self.backbone = TransformerBackbone(input_size, hidden_size, num_layers, n_head, block_size)
        self.memory = MemoryModule(memory_size, hidden_size)
        self.output_layer = nn.Linear(hidden_size, output_size)

    def forward(self, x, y=None):
        query_repr = self.backbone(x)
        memory_output = self.memory.read(query_repr)
        combined_repr = query_repr + memory_output
        output = self.output_layer(combined_repr)

        if y is not None:
            self.memory.write(query_repr, y)

        return output

# Generate synthetic time series data with batch structure
def generate_synthetic_time_series_with_batch(num_samples, sequence_length, input_size, output_size):
    """
    Generate synthetic time series data with a vector of inputs and outputs at each time step.
    Args:
        num_samples (int): Number of samples to generate (batch size).
        sequence_length (int): Length of each time series sequence.
        input_size (int): Size of the input vector at each time step.
        output_size (int): Size of the output vector at each time step.
    Returns:
        Tuple of tensors: (inputs, outputs)
    """
    inputs = torch.rand(num_samples, sequence_length, input_size)  # [batch, sequence_length, input_size]
    outputs = torch.sin(inputs.sum(dim=2, keepdim=True)) + torch.rand(num_samples, sequence_length, output_size) * 0.1
    return inputs, outputs

# Example Usage
if __name__ == "__main__":
    # Configurations
    input_size = 4  # Number of features in the time series
    hidden_size = 64
    output_size = 4  # Forecasting a vector of size 4
    num_layers = 4
    n_head = 8
    block_size = 20  # Maximum sequence length
    memory_size = 10
    batch_size = 16
    sequence_length = 20

    # Instantiate the model
    model = TitansTimeSeries(input_size, hidden_size, output_size, num_layers, n_head, block_size, memory_size)

    # Generate synthetic data
    inputs, outputs = generate_synthetic_time_series_with_batch(batch_size, sequence_length, input_size, output_size)

    # Training loop
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    for epoch in range(10):
        model.train()
        optimizer.zero_grad()
        predictions = model(inputs, outputs)
        loss = F.mse_loss(predictions, outputs[:, -1, :])  # Predict based on the last time step
        loss.backward()
        optimizer.step()
        print(f"Epoch {epoch + 1}, Loss: {loss.item():.4f}")

    # Inference
    model.eval()
    test_sample = torch.rand(1, sequence_length, input_size)  # A single test sequence
    forecast = model(test_sample)
    print(f"Test input: {test_sample}")
    print(f"Forecast: {forecast}")

