In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim

In [None]:
# reading dataset. 
with open("../data/transcription.txt", "r") as f:
    out = f.readlines()

sentences = [x.split("\t")[1].strip() for x in out]
sorted_sentences = sorted(sentences, key=len)
print(sentences[:2])

In [None]:
# character vocabulary
chars = sorted(set("".join(sentences)))
chars.append("<pad>") # Padding
chars.append("<sil>") # silence
chars.append("<bos>") # beginning of sentence
char_to_idx = {char: i for i, char in enumerate(chars)}  # Assign indices to chars
# Create inverse mapping
idx_to_char = {idx: char for char, idx in char_to_idx.items()}

def decode_indices(indices):
    """Convert a sequence of character indices back to a string."""
    return "".join(idx_to_char[idx.item()] for idx in indices if idx.item() in idx_to_char)


# Dataset and DataLoader
class TextDataset(Dataset):
    def __init__(self, sentences, char_to_idx):
        self.sentences = sentences
        self.char_to_idx = char_to_idx
    
    def __len__(self):
        return len(self.sentences)
    
    def __getitem__(self, idx):
        sentence = self.sentences[idx]
        encoded = [self.char_to_idx[char] for char in sentence if char in self.char_to_idx]
        inp = [self.char_to_idx["<bos>"]] + encoded
        out = encoded + [self.char_to_idx["<bos>"]]
        return torch.tensor(inp, dtype=torch.long), torch.tensor(out, dtype=torch.long)

# Collate function for padding and batching
def collate_fn(batch):
    max_len = max(len(sample[0]) for sample in batch)  # Find the longest sequence in the batch
    
    padded_batch = []
    target_batch = []
    attention_masks = []

    for sample in batch:
        inp, out = sample
        # Padding and attention mask creation for input
        padding_length = max_len - len(inp)
        padded_inp = torch.cat([inp, torch.zeros(padding_length, dtype=torch.long)])
        attention_mask = torch.cat([torch.ones(len(inp), dtype=torch.long), torch.zeros(padding_length, dtype=torch.long)])
        
        # Padding for output (same length as input)
        padded_out = torch.cat([out, torch.zeros(padding_length, dtype=torch.long)])
        
        padded_batch.append(padded_inp)
        target_batch.append(padded_out)
        attention_masks.append(attention_mask)
    
    return torch.stack(padded_batch), torch.stack(target_batch), torch.stack(attention_masks)


# dataset = TextDataset(sentences, char_to_idx)
# dataloader = DataLoader(dataset, batch_size=2, shuffle=True, collate_fn=collate_fn)

# for i in dataloader:
#     i,t,a = i
#     break
# i,t,a

In [None]:

class CausalConv1d(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, dilation):
        super().__init__()
        self.dilation = dilation
        self.kernel_size = kernel_size
        self.conv = nn.Conv1d(in_channels, out_channels, kernel_size, 
                               dilation=dilation, padding=0)  # No automatic padding

    def forward(self, x):
        pad_size = (self.kernel_size - 1) * self.dilation
        x = F.pad(x, (pad_size, 0))  # Manual causal padding
        return self.conv(x)  # Now it's properly causal

class CausalConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels,kernel_size):
        super().__init__()
        self.net = nn.Sequential(
            CausalConv1d(in_channels, out_channels, kernel_size=kernel_size, dilation=1),
            nn.ReLU(),
            nn.BatchNorm1d(out_channels),
            nn.Dropout(0.2),  # Dropout after activation
            
            CausalConv1d(out_channels, out_channels, kernel_size=kernel_size, dilation=2),
            nn.ReLU(),
            nn.BatchNorm1d(out_channels),
            nn.Dropout(0.2),  # Dropout after activation
            
            CausalConv1d(out_channels, out_channels, kernel_size=kernel_size, dilation=3),
            nn.ReLU(),
            nn.BatchNorm1d(out_channels),
            nn.Dropout(0.2),  # Dropout after activation
            
        )

    def forward(self, x):
        return x + self.net(x)
    
    
class CausalCNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, embed_dim, padding_idx, kernel_size, vocab_size):
        super().__init__()
        # embedding layer
        self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=padding_idx)

        self.input_layer = nn.Conv1d(input_dim, hidden_dim, 1)
        
        layers = []
        for i in range(num_layers):
            layers.append(CausalConvBlock(
                hidden_dim,
                hidden_dim,
                kernel_size,
            ))
            
        self.network = nn.Sequential(*layers)
        self.output_layer = nn.Conv1d(hidden_dim, input_dim, 1)

    def forward(self, x):
        x = self.embedding(x)
        x = x.transpose(1, 2)
        x = self.input_layer(x)
        x = self.network(x)
        return self.output_layer(x)
    
    def generate_output(self, input_str, char_to_idx, idx_to_char, max_length=100):
        """
        Generates output sequence based on an input string, iteratively predicting one character at a time.
        
        Args:
        - input_str: The input string to process.
        - char_to_idx: Dictionary mapping characters to indices.
        - idx_to_char: Dictionary mapping indices to characters.
        - max_length: The maximum length of the generated sequence.

        Returns:
        - Generated string.
        """
        # Convert input string to indices
        input_indices = [char_to_idx[char] for char in input_str]  # Assumes all chars are in char_to_idx
        input_tensor = torch.tensor(input_indices).unsqueeze(0).to(device)  # Add batch dimension

        self.eval()  # Set model to evaluation mode
        generated_str = input_str  # Start the output with the input string

        with torch.no_grad():
            for _ in range(max_length):
                # Pass through the model
                output = self(input_tensor)  
                
                # Get the most likely next character
                output_indices = torch.argmax(output, dim=1).squeeze().cpu().numpy()
                next_char_idx = output_indices[-1]  # Last predicted index
                next_char = idx_to_char[next_char_idx]  # Convert index to char

                # Append the next character to the generated string
                generated_str += next_char

                # Update input for next iteration: append the predicted character
                input_indices.append(next_char_idx)
                input_tensor = torch.tensor(input_indices).unsqueeze(0).to(device)  # Update input tensor

        return generated_str

    
    
# Example usage
embed_dim = 512
model = CausalCNN(input_dim=embed_dim, hidden_dim=512, num_layers=10, embed_dim=embed_dim, padding_idx=char_to_idx["<pad>"], kernel_size=5, vocab_size=len(char_to_idx))

# Function to calculate total parameters
def calculate_params(model):
    total_params = sum(p.numel() for p in model.parameters())
    return total_params / 1e6 # Convert to millions

print(calculate_params(model))


dataset = TextDataset(sentences, char_to_idx)
dataloader = DataLoader(dataset, batch_size=256, shuffle=True, collate_fn=collate_fn)

criterion = nn.CrossEntropyLoss(ignore_index=char_to_idx["<pad>"])  # Cross-entropy loss
optimizer = optim.Adam(model.parameters(), lr=0.001)

device = torch.device("cuda:2" if torch.cuda.is_available() else "cpu")
model = model.to(device)

In [None]:

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    running_loss = 0.0
    for i, (inputs, targets, attention_masks) in enumerate(dataloader):
        
        inputs = inputs.to(device)
        targets = targets.to(device)
        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)

        # Compute the loss
        loss = criterion(outputs, targets)

        # Backward pass
        loss.backward()

        # Update the parameters
        optimizer.step()
        
        torch.cuda.empty_cache()
        running_loss += loss.item()
        print(f"Batch {i+1}/{len(dataloader)}, Loss: {loss.item():.4f}")
    avg_loss = running_loss / len(dataloader)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}")

print("Training finished!")

In [None]:
model.eval()  # Set model to evaluation mode
model.generate_output("HELLO", char_to_idx, idx_to_char)