<a href="https://colab.research.google.com/github/sanviaithal26/Architecting-LLMs-WiDS/blob/main/week2_completed.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## E01 : Trigram

In [13]:
import torch
import torch.nn.functional as F

!wget https://raw.githubusercontent.com/karpathy/makemore/master/names.txt

# 1. Load the data
words = open('names.txt', 'r').read().splitlines()

# 2. Build the vocabulary and mappings
chars = sorted(list(set(''.join(words))))
stoi = {s:i+1 for i,s in enumerate(chars)}
stoi['.'] = 0
itos = {i:s for s,i in stoi.items()}

# 3. Create the dataset (xs: input character, ys: target next character)
xs, ys = [], []
for w in words:
    chs = ['.'] + list(w) + ['.']
    for ch1, ch2, ch3 in zip(chs, chs[1:], chs[2:]):
        ix1, ix2, ix3 = stoi[ch1], stoi[ch2], stoi[ch3]
        xs.append(27*ix1 + ix2)
        ys.append(ix3)

xs = torch.tensor(xs)
ys = torch.tensor(ys)
num = len(xs)

# 4. Initialize the neural network (one-layer, 27 neurons)
# We use a 729x27 weight matrix (mapping 729 inputs to 27 outputs)
g = torch.Generator().manual_seed(2147483647+1)
W = torch.randn((729, 27), generator=g, requires_grad=True)

# 5. Gradient Descent (Training Loop)
for k in range(100):

    # Forward pass
    xenc = F.one_hot(xs, num_classes=729).float() # input to the network: one-hot encoding
    logits = xenc @ W # predict log-counts
    counts = logits.exp() # counts, equivalent to N matrix
    probs = counts / counts.sum(1, keepdims=True) # probabilities for next character

    # Loss: negative log-likelihood (NLL)
    # We select the probability assigned to the actual next character
    loss = -probs[torch.arange(num), ys].log().mean() + 0.01*(W**2).mean() # Includes small regularizer

    # Backward pass
    W.grad = None # set gradient to zero
    loss.backward()

    # Update weights
    W.data += -50 * W.grad

    if k % 10 == 0:
        print(f"Iteration {k}: loss = {loss.item():.4f}")

# 6. Sampling from the model
for i in range(5):
    out = []
    prev_two = [0,0]
    while True:
        xenc = F.one_hot(torch.tensor([27*prev_two[0] + prev_two[1]]), num_classes=729).float()
        logits = xenc @ W
        counts = logits.exp()
        p = counts / counts.sum(1, keepdims=True)

        next_ix = torch.multinomial(p, num_samples=1, replacement=True, generator=g).item()
        if next_ix == 0:
            break
        out.append(itos[next_ix])
        prev_two = [prev_two[1], next_ix]
    print(''.join(out))

--2025-12-21 17:40:57--  https://raw.githubusercontent.com/karpathy/makemore/master/names.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 228145 (223K) [text/plain]
Saving to: ‘names.txt.11’


2025-12-21 17:40:57 (74.3 MB/s) - ‘names.txt.11’ saved [228145/228145]

Iteration 0: loss = 3.7375
Iteration 10: loss = 3.1778
Iteration 20: loss = 2.9177
Iteration 30: loss = 2.7716
Iteration 40: loss = 2.6733
Iteration 50: loss = 2.6008
Iteration 60: loss = 2.5447
Iteration 70: loss = 2.4999
Iteration 80: loss = 2.4633
Iteration 90: loss = 2.4330
den
gislwqxlkonni
dari
chrqagrvzhbibocqilkharj
ixamarkudxhce


## E02.1: Testing Bigram on Test and Dev Sets

In [27]:
import torch
import torch.nn.functional as F
import random

# 1. Load the data
words_unshuffled= open('names.txt', 'r').read().splitlines()
num_words = len(words_unshuffled)
random.shuffle(words)
n1 = int(0.8*num_words)
n2 = int(0.9*num_words)
train_set = words[:n1]
dev_set = words[n1:n2]
test_set = words[n2:]

In [28]:

# 2. Build the vocabulary and mappings
chars = sorted(list(set(''.join(train_set))))
stoi = {s:i+1 for i,s in enumerate(chars)}
stoi['.'] = 0
itos = {i:s for s,i in stoi.items()}

# 3. Create the dataset (xs: input character, ys: target next character)
xs, ys = [], []
for w in train_set:
    chs = ['.'] + list(w) + ['.']
    for ch1, ch2 in zip(chs, chs[1:]):
        ix1, ix2 = stoi[ch1], stoi[ch2]
        xs.append(ix1)
        ys.append(ix2)

xs = torch.tensor(xs)
ys = torch.tensor(ys)
num = xs.nelement()

# 4. Initialize the neural network (one-layer, 27 neurons)
# We use a 27x27 weight matrix (mapping 27 inputs to 27 outputs)
g = torch.Generator().manual_seed(2147483647)
W = torch.randn((27, 27), generator=g, requires_grad=True)

# 5. Gradient Descent (Training Loop)
for k in range(100):

    # Forward pass
    xenc = F.one_hot(xs, num_classes=27).float() # input to the network: one-hot encoding
    logits = xenc @ W # predict log-counts
    counts = logits.exp() # counts, equivalent to N matrix
    probs = counts / counts.sum(1, keepdims=True) # probabilities for next character

    # Loss: negative log-likelihood (NLL)
    # We select the probability assigned to the actual next character
    loss = -probs[torch.arange(num), ys].log().mean() + 0.01*(W**2).mean() # Includes small regularizer

    # Backward pass
    W.grad = None # set gradient to zero
    loss.backward()

    # Update weights
    W.data += -50 * W.grad

    if k % 10 == 0:
        print(f"Iteration {k}: loss = {loss.item():.4f}")

Iteration 0: loss = 3.7681
Iteration 10: loss = 2.6966
Iteration 20: loss = 2.5828
Iteration 30: loss = 2.5421
Iteration 40: loss = 2.5220
Iteration 50: loss = 2.5105
Iteration 60: loss = 2.5033
Iteration 70: loss = 2.4984
Iteration 80: loss = 2.4949
Iteration 90: loss = 2.4924


In [29]:
xsd, ysd = [], []
for w in dev_set:
    chs = ['.'] + list(w) + ['.']
    for ch1, ch2 in zip(chs, chs[1:]):
        ix1, ix2 = stoi[ch1], stoi[ch2]
        xsd.append(ix1)
        ysd.append(ix2)

xsd = torch.tensor(xsd)
ysd = torch.tensor(ysd)
num = xsd.nelement()

 # Forward pass
xenc = F.one_hot(xsd, num_classes=27).float() # input to the network: one-hot encoding
logits = xenc @ W # predict log-counts
counts = logits.exp() # counts, equivalent to N matrix
probs = counts / counts.sum(1, keepdims=True) # probabilities for next character

    # Loss: negative log-likelihood (NLL)
    # We select the probability assigned to the actual next character
lossd = -probs[torch.arange(num), ysd].log().mean() + 0.01*(W**2).mean() # Includes small regularizer
print(f"Dev set loss: {lossd.item()}")

Dev set loss: 2.4840035438537598


In [30]:
xst, yst = [], []
for w in test_set:
    chs = ['.'] + list(w) + ['.']
    for ch1, ch2 in zip(chs, chs[1:]):
        ix1, ix2 = stoi[ch1], stoi[ch2]
        xst.append(ix1)
        yst.append(ix2)

xst = torch.tensor(xst)
yst = torch.tensor(yst)
num = xst.nelement()

 # Forward pass
xenc = F.one_hot(xst, num_classes=27).float() # input to the network: one-hot encoding
logits = xenc @ W # predict log-counts
counts = logits.exp() # counts, equivalent to N matrix
probs = counts / counts.sum(1, keepdims=True) # probabilities for next character

    # Loss: negative log-likelihood (NLL)
    # We select the probability assigned to the actual next character
losst = -probs[torch.arange(num), yst].log().mean() + 0.01*(W**2).mean() # Includes small regularizer
print(f"Test set loss: {losst.item()}")

Dev set loss: 2.492913246154785


Final loss with test and dev sets is similar to that of training set. This shows there's no particular overfitting that's observed.

## E02.2: Testing Trigram on Test and Dev Sets

In [31]:
import torch
import torch.nn.functional as F
import random

# 1. Load the data
words_unshuffled= open('names.txt', 'r').read().splitlines()
num_words = len(words_unshuffled)
random.shuffle(words)
n1 = int(0.8*num_words)
n2 = int(0.9*num_words)
train_set = words[:n1]
dev_set = words[n1:n2]
test_set = words[n2:]

In [32]:
# 2. Build the vocabulary and mappings
chars = sorted(list(set(''.join(words))))
stoi = {s:i+1 for i,s in enumerate(chars)}
stoi['.'] = 0
itos = {i:s for s,i in stoi.items()}

# 3. Create the dataset (xs: input character, ys: target next character)
xs, ys = [], []
for w in train_set:
    chs = ['.'] + list(w) + ['.']
    for ch1, ch2, ch3 in zip(chs, chs[1:], chs[2:]):
        ix1, ix2, ix3 = stoi[ch1], stoi[ch2], stoi[ch3]
        xs.append(27*ix1 + ix2)
        ys.append(ix3)

xs = torch.tensor(xs)
ys = torch.tensor(ys)
num = len(xs)

# 4. Initialize the neural network (one-layer, 27 neurons)
# We use a 729x27 weight matrix (mapping 729 inputs to 27 outputs)
g = torch.Generator().manual_seed(2147483647+1)
W = torch.randn((729, 27), generator=g, requires_grad=True)

# 5. Gradient Descent (Training Loop)
for k in range(100):

    # Forward pass
    xenc = F.one_hot(xs, num_classes=729).float() # input to the network: one-hot encoding
    logits = xenc @ W # predict log-counts
    counts = logits.exp() # counts, equivalent to N matrix
    probs = counts / counts.sum(1, keepdims=True) # probabilities for next character

    # Loss: negative log-likelihood (NLL)
    # We select the probability assigned to the actual next character
    loss = -probs[torch.arange(num), ys].log().mean() + 0.01*(W**2).mean() # Includes small regularizer

    # Backward pass
    W.grad = None # set gradient to zero
    loss.backward()

    # Update weights
    W.data += -50 * W.grad

    if k % 10 == 0:
        print(f"Iteration {k}: loss = {loss.item():.4f}")

Iteration 0: loss = 3.7382
Iteration 10: loss = 3.1798
Iteration 20: loss = 2.9198
Iteration 30: loss = 2.7734
Iteration 40: loss = 2.6748
Iteration 50: loss = 2.6020
Iteration 60: loss = 2.5457
Iteration 70: loss = 2.5006
Iteration 80: loss = 2.4638
Iteration 90: loss = 2.4333


In [33]:
xsd, ysd = [], []
for w in dev_set:
    chs = ['.'] + list(w) + ['.']
    for ch1, ch2, ch3 in zip(chs, chs[1:], chs[2:]):
        ix1, ix2, ix3 = stoi[ch1], stoi[ch2], stoi[ch3]
        xsd.append(27*ix1 + ix2)
        ysd.append(ix3)

xsd = torch.tensor(xsd)
ysd = torch.tensor(ysd)
num = len(xsd)

# Forward pass
xenc = F.one_hot(xsd, num_classes=729).float() # input to the network: one-hot encoding
logits = xenc @ W # predict log-counts
counts = logits.exp() # counts, equivalent to N matrix
probs = counts / counts.sum(1, keepdims=True) # probabilities for next character

    # Loss: negative log-likelihood (NLL)
    # We select the probability assigned to the actual next character
lossd = -probs[torch.arange(num), ysd].log().mean() + 0.01*(W**2).mean() # Includes small regularizer
print(f"Dev set loss: {lossd.item()}")

Dev set loss: 2.4210987091064453


In [16]:
xst, yst = [], []
for w in test_set:
    chs = ['.'] + list(w) + ['.']
    for ch1, ch2, ch3 in zip(chs, chs[1:], chs[2:]):
        ix1, ix2, ix3 = stoi[ch1], stoi[ch2], stoi[ch3]
        xst.append(27*ix1 + ix2)
        yst.append(ix3)

xst = torch.tensor(xst)
yst = torch.tensor(yst)
num = len(xst)

# Forward pass
xenc = F.one_hot(xst, num_classes=729).float() # input to the network: one-hot encoding
logits = xenc @ W # predict log-counts
counts = logits.exp() # counts, equivalent to N matrix
probs = counts / counts.sum(1, keepdims=True) # probabilities for next character

    # Loss: negative log-likelihood (NLL)
    # We select the probability assigned to the actual next character
losst = -probs[torch.arange(num), yst].log().mean() + 0.01*(W**2).mean() # Includes small regularizer
print(f"Test set loss: {losst.item()}")

Test set loss: 2.1974117755889893


Once again, final loss with test and dev sets is similar to that of training set. This shows there's no particular overfitting that's observed.

## E03: L2 Regularization

In [3]:
import torch
import torch.nn.functional as F
import random

!wget https://raw.githubusercontent.com/karpathy/makemore/master/names.txt

# 1. Load the data
words= open('names.txt', 'r').read().splitlines()
num_words = len(words)
random.shuffle(words)
n1 = int(0.8*num_words)
n2 = int(0.9*num_words)
train_set = words[:n1]
dev_set = words[n1:n2]
test_set = words[n2:]

--2025-12-22 10:38:01--  https://raw.githubusercontent.com/karpathy/makemore/master/names.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 228145 (223K) [text/plain]
Saving to: ‘names.txt.1’


2025-12-22 10:38:01 (7.30 MB/s) - ‘names.txt.1’ saved [228145/228145]



In [6]:
# 2. Build the vocabulary and mappings
chars = sorted(list(set(''.join(words))))
stoi = {s:i+1 for i,s in enumerate(chars)}
stoi['.'] = 0
itos = {i:s for s,i in stoi.items()}

# 3. Create the dataset (xs: input character, ys: target next character)
xs, ys = [], []
for w in train_set:
    chs = ['.'] + list(w) + ['.']
    for ch1, ch2, ch3 in zip(chs, chs[1:], chs[2:]):
        ix1, ix2, ix3 = stoi[ch1], stoi[ch2], stoi[ch3]
        xs.append(27*ix1 + ix2)
        ys.append(ix3)

xs = torch.tensor(xs)
ys = torch.tensor(ys)
num = len(xs)

# 4. Initialize the neural network (one-layer, 27 neurons)
# We use a 729x27 weight matrix (mapping 729 inputs to 27 outputs)
g = torch.Generator().manual_seed(2147483647)
W = torch.randn((729, 27), generator=g, requires_grad=True)



In [10]:
# 5. Gradient Descent (Training Loop)
reg = 1
for i in range(4):
    for k in range(100):

      # Forward pass
      xenc = F.one_hot(xs, num_classes=729).float() # input to the network: one-hot encoding
      logits = xenc @ W # predict log-counts
      counts = logits.exp() # counts, equivalent to N matrix
      probs = counts / counts.sum(1, keepdims=True) # probabilities for next character

      # Loss: negative log-likelihood (NLL)
      # We select the probability assigned to the actual next character
      loss = -probs[torch.arange(num), ys].log().mean() + reg*(W**2).mean() # Includes small regularizer

      # Backward pass
      W.grad = None # set gradient to zero
      loss.backward()

      # Update weights
      W.data += -50 * W.grad

    print(f"Reg = {reg}, Loss = {loss.item():.4f}")
    reg *= 0.1

Reg = 1, Loss = 2.8070
Reg = 0.1, Loss = 2.2984
Reg = 0.010000000000000002, Loss = 2.1995
Reg = 0.0010000000000000002, Loss = 2.1647


In [12]:
#Changing the value of regulariser and checking loss using dev set

reg = 1
xsd, ysd = [], []
for w in dev_set:
    chs = ['.'] + list(w) + ['.']
    for ch1, ch2, ch3 in zip(chs, chs[1:], chs[2:]):
        ix1, ix2, ix3 = stoi[ch1], stoi[ch2], stoi[ch3]
        xsd.append(27*ix1 + ix2)
        ysd.append(ix3)

xsd = torch.tensor(xsd)
ysd = torch.tensor(ysd)
num = len(xsd)

# Forward pass
xenc = F.one_hot(xsd, num_classes=729).float() # input to the network: one-hot encoding
logits = xenc @ W # predict log-counts
counts = logits.exp() # counts, equivalent to N matrix
probs = counts / counts.sum(1, keepdims=True) # probabilities for next character

# Loss: negative log-likelihood (NLL)
# We select the probability assigned to the actual next character
for i in range(4):
  lossd = -probs[torch.arange(num), ysd].log().mean() + reg*(W**2).mean() # Includes small regularizer
  print(f"Reg = {reg}, Dev set loss: {lossd.item():.4f}")
  reg *= 0.1

Reg = 1, Dev set loss: 2.8302
Reg = 0.1, Dev set loss: 2.2442
Reg = 0.010000000000000002, Dev set loss: 2.1856
Reg = 0.0010000000000000002, Dev set loss: 2.1798


Comparing Values:

*   Reg = 1, Training set loss = 2.8070 , Dev set loss = 2.8302 , Difference = -0.0232
*   Reg = 0.1, Training set loss = 2.2984 , Dev set loss = 2.2442 , Difference = +0.0542
*   Reg = 0.01, Training set loss = 2.1995 , Dev set loss = 2.1856 , Difference = +0.0139
*   Reg = 0.001, Training set loss = 2.1647 , Dev set loss = 2.1798 , Difference = -0.0151

Since, the smallest difference is with reg = 0.01
best_reg = 0.01










In [14]:
#Loss with Test set
best_reg = 0.01
xst, yst = [], []
for w in test_set:
    chs = ['.'] + list(w) + ['.']
    for ch1, ch2, ch3 in zip(chs, chs[1:], chs[2:]):
        ix1, ix2, ix3 = stoi[ch1], stoi[ch2], stoi[ch3]
        xst.append(27*ix1 + ix2)
        yst.append(ix3)

xst = torch.tensor(xst)
yst = torch.tensor(yst)
num = len(xst)

# Forward pass
xenc = F.one_hot(xst, num_classes=729).float() # input to the network: one-hot encoding
logits = xenc @ W # predict log-counts
counts = logits.exp() # counts, equivalent to N matrix
probs = counts / counts.sum(1, keepdims=True) # probabilities for next character

    # Loss: negative log-likelihood (NLL)
    # We select the probability assigned to the actual next character
losst = -probs[torch.arange(num), yst].log().mean() + best_reg*(W**2).mean() # Includes small regularizer
print(f"Test set loss: {losst.item():.4f}")



Test set loss: 2.1974


## E04: Replacement for One-Hot

In [None]:
# xenc = F.one_hot(xst, num_classes=729).float()
# logits = xenc @ W

logits = W[xs]
# as xenc = 1 and was multiplied with the weight (=weight) (rest gave 0) only for the xth element which is stored in xs

## E05: F.cross_entropy

In [15]:
xst, yst = [], []
for w in test_set:
    chs = ['.'] + list(w) + ['.']
    for ch1, ch2, ch3 in zip(chs, chs[1:], chs[2:]):
        ix1, ix2, ix3 = stoi[ch1], stoi[ch2], stoi[ch3]
        xst.append(27*ix1 + ix2)
        yst.append(ix3)

xst = torch.tensor(xst)
yst = torch.tensor(yst)
num = len(xst)

# Forward pass
logits = W[xst]
loss = F.cross_entropy(logits, yst)

# Loss: negative log-likelihood (NLL)
# We select the probability assigned to the actual next character
losst = -probs[torch.arange(num), yst].log().mean() + best_reg*(W**2).mean() # Includes small regularizer
print(f"Test set loss: {losst.item():.4f}")

Test set loss: 2.1974


Same loss as using spelled out probability and nll

## E06: Noisy Morse Denoiser
The "Noisy Signal" Denoiser is a project that uses the Trigram model to recover original English names from corrupted Morse code. It works by balancing Morse match penalties with English letter probabilities to find the most likely intended message using a Beam Search algorithm.


I wanted to try this idea out when I first thought of the application of trigrams to interpret Morse. Just predicting new names would not be as fun and could get too compliacted as the length of each character in Morse is different, so a change in the code from trigram would be required anyway.


Instead, this project uses a 'noisy' input that wishes to convey a name but has some errors in the Morse code. Using the names.txt database of names, the project predicts the likely name the Morse code is trying to convey.

In [37]:
import torch
import torch.nn.functional as F

# 1. DOWNLOAD AND LOAD DATA
!wget https://raw.githubusercontent.com/karpathy/makemore/master/names.txt

words = open('names.txt', 'r').read().splitlines()

# 2. BUILD VOCABULARY AND MAPPINGS
chars = sorted(list(set(''.join(words))))
stoi = {s:i+1 for i,s in enumerate(chars)}
stoi['.'] = 0
itos = {i:s for s,i in stoi.items()}

# 3. BUILD THE 27x27x27 TRIGRAM COUNTS
N = torch.zeros((27, 27, 27), dtype=torch.int32)

for w in words:
    chs = ['.'] + list(w) + ['.']
    for ch1, ch2, ch3 in zip(chs, chs[1:], chs[2:]):
        ix1, ix2, ix3 = stoi[ch1], stoi[ch2], stoi[ch3]
        N[ix1, ix2, ix3] += 1 # Fill counts matrix

# Convert counts to Log-Probabilities using log_softmax
# This handles normalization and numerical stability.
P = F.log_softmax(N.float(), dim=2)

# 4. MORSE DICTIONARY
MORSE_DICT = {
    'a': '.-', 'b': '-...', 'c': '-.-.', 'd': '-..', 'e': '.',
    'f': '..-.', 'g': '--.', 'h': '....', 'i': '..', 'j': '.---',
    'k': '-.-', 'l': '.-..', 'm': '--', 'n': '-.', 'o': '---',
    'p': '.--.', 'q': '--.-', 'r': '.-.', 's': '...', 't': '-',
    'u': '..-', 'v': '...-', 'w': '.--', 'x': '-..-', 'y': '-.--',
    'z': '--..'
}

--2025-12-22 17:38:28--  https://raw.githubusercontent.com/karpathy/makemore/master/names.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 228145 (223K) [text/plain]
Saving to: ‘names.txt.4’


2025-12-22 17:38:28 (24.6 MB/s) - ‘names.txt.4’ saved [228145/228145]



In [63]:
def get_match_score(target_char, noisy_segment):
    actual = MORSE_DICT.get(target_char, "")
    # If lengths match, the penalty is small (based on dot/dash flips)
    if len(actual) == len(noisy_segment):
        diffs = sum(1 for a, b in zip(actual, noisy_segment) if a != b)
        return -(diffs * 10)
    # If lengths don't match, give a medium penalty
    return -20

In [39]:
# 6. BEAM SEARCH DECODER
def denoise_morse(noisy_morse_list, beam_width=5):
    # Start with log-probability 0.0 and two start-of-word tokens '..'
    beam = [(0.0, [0, 0])]

    for noisy_seg in noisy_morse_list:
        candidates = []

        for current_score, path in beam:
            prev1, prev2 = path[-2], path[-1] # Context from 3D matrix

            # Try every possible letter A-Z
            for char_idx in range(1, 27):
                char_str = itos[char_idx]

                # 1. Trigram Score: How likely is this letter in English names?
                transition_score = P[prev1, prev2, char_idx].item()

                # 2. Match Score: How well does it match our noisy signal?
                match_score = get_match_score(char_str, noisy_seg)

                # Combine (Add logs = Multiply probabilities)
                total_score = current_score + transition_score + (match_score*5)
                candidates.append((total_score, path + [char_idx]))

        # Sort and prune candidates to keep only the best ones
        candidates.sort(key=lambda x: x[0], reverse=True)
        beam = candidates[:beam_width]

    # Convert indices back to text
    results = []
    for score, path in beam:
        clean_name = "".join([itos[i] for i in path[2:]])
        results.append((clean_name, round(score, 2)))
    return results

In [64]:
# --- EXECUTION ---
#Sending a noisy input - . -- ..
noisy_input = ["-", ".", "--", ".."]

top_results = denoise_morse(noisy_input)

print("Top Denoised Suggestions:")
for i, (name, score) in enumerate(top_results):
    print(f"{i+1}. {name.upper()} (Log-Prob Score: {score})")

Top Denoised Suggestions:
1. TALI (Log-Prob Score: -273.3)
2. ELLA (Log-Prob Score: -303.3)
3. TAMA (Log-Prob Score: -305.3)
4. ELLI (Log-Prob Score: -319.3)
5. TAMI (Log-Prob Score: -331.3)


Given Noisy sample: - . -- ..  Translates to TEMI, which isn't a real name. These are the possible real names from the noisy input

Outputs:

*   "- .- .-.. .."
*   ". .-.. .-.. .-"
*  "- .- -- .- "
*   ". .-.. .-.. .."
*   "- .- -- .."








In [70]:
#Sending a noisy input     .. . .- .- .--- --- IEAAJO
#Most reasonable output    .. - .- .-.. .. .- ITALIA
noisy_input = ["..", ".", ".-", ".-", ".---", "---"]

top_results = denoise_morse(noisy_input)

print("Top Denoised Suggestions:")
for i, (name, score) in enumerate(top_results):
    print(f"{i+1}. {name.upper()} (Log-Prob Score: {score})")

Top Denoised Suggestions:
1. ITALEE (Log-Prob Score: -522.3)
2. ITALIA (Log-Prob Score: -548.3)
3. ITALIN (Log-Prob Score: -561.3)
4. ITALEY (Log-Prob Score: -596.3)
5. ITHARI (Log-Prob Score: -603.3)
