In [3]:
import torch
import torch.nn as nn
import torch.optim as optim

# --- 1. PREPARATION: The Vocabulary ---
# We need to map every letter to a unique number.
# We include a generic set of characters (a-z) and a few extras.
all_letters = "abcdefghijklmnopqrstuvwxyzåäöABCDEFGHIJKLMNOPQRSTUVWXYZÅÄÖ .,;'"
n_letters = len(all_letters)

def name_to_tensor(name):
    """
    Converts a string "Anna" into a Tensor of shape (Length, 1, n_letters).
    We use 'One-Hot Encoding': A vector of zeros with a single '1' at the letter's index.
    """
    tensor = torch.zeros(len(name), 1, n_letters)
    for li, letter in enumerate(name):
        # Find the index of the letter in our list
        try:
            index = all_letters.index(letter)
            tensor[li][0][index] = 1
        except ValueError:
            continue # Ignore weird characters
    return tensor

# --- 2. SWEDISH DATASET ---
# 0 = Boy (Pojke), 1 = Girl (Flicka)
training_data = [
    # --- POJKAR (Boys) ---
    # The "Names ending in E" (Nicknames)
    ("Lasse", 0), ("Bosse", 0), ("Nisse", 0), ("Olle", 0), ("Hasse", 0),
    ("Janne", 0), ("Roffe", 0), ("Kalle", 0), ("Pelle", 0), ("Sune", 0),
    ("Åke", 0), ("Börje", 0), ("Arne", 0), ("Svante", 0), ("Christer", 0),

    # Classic Swedish
    ("Gunnar", 0), ("Anders", 0), ("Johan", 0), ("Lars", 0), ("Mikael", 0),
    ("Per", 0), ("Karl", 0), ("Hans", 0), ("Peter", 0), ("Jan", 0),
    ("Thomas", 0), ("Erik", 0), ("Fredrik", 0), ("Bengt", 0), ("Sven", 0),
    ("Magnus", 0), ("Gustav", 0), ("Oscar", 0), ("William", 0), ("Lucas", 0),
    ("Alexander", 0), ("Björn", 0), ("Ulf", 0), ("Göran", 0), ("Stefan", 0),
    ("Mats", 0), ("Leif", 0), ("Marcus", 0), ("Torbjörn", 0), ("Kjell", 0),
    ("Håkan", 0), ("Lennart", 0), ("Stig", 0), ("Kerstin", 0), ("Olof", 0),
    ("Rolf", 0), ("Tommy", 0), ("Ingvar", 0), ("Kenneth", 0), ("Jörgen", 0),

    # --- FLICKOR (Girls) ---
    # The "Confusing E endings"
    ("Marie", 1), ("Therese", 1), ("Louise", 1), ("Sofie", 1), ("Emelie", 1),
    ("Natalie", 1), ("Alice", 1), ("Tove", 1), ("Signe", 1), ("Beatrice", 1),
    ("Irene", 1), ("Caroline", 1), ("Madeleine", 1), ("Amelie", 1), ("Elin", 1),

    # Classic Swedish
    ("Anna", 1), ("Eva", 1), ("Maria", 1), ("Karin", 1), ("Kristina", 1),
    ("Lena", 1), ("Sara", 1), ("Malin", 1), ("Emma", 1), ("Ingrid", 1),
    ("Birgitta", 1), ("Marianne", 1), ("Jenny", 1), ("Maja", 1), ("Elsa", 1),
    ("Julia", 1), ("Linnea", 1), ("Wilma", 1), ("Ebba", 1), ("Ida", 1),
    ("Saga", 1), ("Klara", 1), ("Ulla", 1), ("Elisabeth", 1), ("Monica", 1),
    ("Hanna", 1), ("Linda", 1), ("Susanne", 1), ("Agneta", 1), ("Katarina", 1),
    ("Gunilla", 1), ("Annika", 1), ("Britt", 1), ("Inger", 1), ("Åsa", 1),
    ("Siv", 1), ("Barbro", 1), ("Lisbet", 1), ("Maj", 1), ("Anita", 1)
]

# --- 3. THE MODEL (RNN) ---
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size

        # LSTM replaces RNN
        # It's smarter at keeping context over long sequences
        self.lstm = nn.LSTM(input_size, hidden_size)

        self.fc = nn.Linear(hidden_size, output_size)
        self.sigmoid = nn.Sigmoid()

    def forward(self, input_tensor):
        # LSTM needs TWO hidden states:
        # 1. h0 (Hidden state - working memory)
        # 2. c0 (Cell state - long-term memory)
        h0 = torch.zeros(1, 1, self.hidden_size)
        c0 = torch.zeros(1, 1, self.hidden_size)

        # LSTM returns: output, (hidden_state, cell_state)
        _, (hidden, cell) = self.lstm(input_tensor, (h0, c0))

        # We use the final hidden state for prediction
        output = self.fc(hidden[0])
        output = self.sigmoid(output)
        return output

# --- 4. TRAINING ---
# Setup
model = LSTM(n_letters, 128, 1) # Keep Hidden size 128
criterion = nn.BCELoss()

# INCREASE LEARNING RATE: 0.005 -> 0.01 (Helps get out of the 0.69 plateau)
optimizer = optim.SGD(model.parameters(), lr=0.01)

print("Training LSTM (Longer Run)...")
loss_avg = 0

# INCREASE ITERATIONS: 2000 -> 25000
# Since we only look at 1 name at a time, we need MANY loops to see the dataset enough times.
for epoch in range(25000):
    name, label = random.choice(training_data)

    input_tensor = name_to_tensor(name)
    target_tensor = torch.tensor([[float(label)]])

    output = model(input_tensor)
    loss = criterion(output, target_tensor)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    loss_avg += loss.item()

    if (epoch+1) % 5000 == 0:
        print(f"Iteration {epoch+1}, Avg Loss: {loss_avg/5000:.4f}")
        loss_avg = 0

print("Training finished!")

# --- 3. RE-TEST ---
print("\n--- TEST RESULTS ---")
test_names = ["Gunnar", "Annika", "Torbjörn", "Åsa", "Kjell", "Olle", "Louise", "Nisse", "Lasse", "Therese"]

print("\n--- THE FINAL EXAM (Names NEVER seen before) ---")

for name in test_names:
    predict(name)

# These names are NOT in your training_data list
final_exam = [
    "Sixten",   # Boy (Classic)
    "Ellen",    # Girl (Classic)
    "Hjalmar",  # Boy (Old school)
    "Ebba",     # Girl (Very popular)
    "Love",     # Boy (The ultimate trick! Ends in e, looks like English 'Love')
    "Signe",    # Girl (Ends in e)
    "Melker",   # Boy (Ends in r)
    "Astrid",   # Girl (Ends in d)
    "Pontus",   # Boy (Ends in s, but not double s)
    "Malte"     # Boy (Ends in e, generic structure)
]

for name in final_exam:
    predict(name)

# --- 5. TESTING ---
def predict(name):
    with torch.no_grad():
        tensor = name_to_tensor(name)
        output = model(tensor)
        prob = output.item()

        # If > 0.5 it's a Girl (1), else Boy (0)
        guess = "Girl" if prob > 0.5 else "Boy"
        confidence = prob if prob > 0.5 else 1 - prob
        print(f"Name: {name:10} -> Prediction: {guess} ({confidence*100:.1f}%)")

print("\n--- TEST RESULTS (Unseen Names) ---")
predict("Gunnar")   # Boy (Ends in r)
predict("Annika")   # Girl (Ends in a)
predict("Torbjörn") # Boy (Ends in n)
predict("Åsa")      # Girl (Short, ends in a)
predict("Kjell")    # Boy (Double consonant)
predict("Olle")
predict("Louise")
predict("Nisse")
predict("Lasse")

predict("Peter")
predict("Lollo")
predict("Kalle")





Training LSTM (Longer Run)...


NameError: name 'random' is not defined

In [2]:
def analyze_name(name):
    print(f"\nAnalyzing: {name}")
    model.eval()
    with torch.no_grad():
        input_tensor = name_to_tensor(name)

        # We need to manually step through the LSTM to see the hidden state evolve
        h0 = torch.zeros(1, 1, 128)
        c0 = torch.zeros(1, 1, 128)
        hidden = (h0, c0)

        for i in range(len(name)):
            # Feed one letter at a time
            letter_tensor = input_tensor[i].unsqueeze(0)
            _, hidden = model.lstm(letter_tensor, hidden)

            # Predict based on current memory
            output = model.fc(hidden[0])
            prob = model.sigmoid(output).item()

            # Print the brain's status
            guess = "Girl" if prob > 0.5 else "Boy"
            conf = prob if prob > 0.5 else 1 - prob
            print(f"Step {i+1} ('{name[:i+1]}'): {guess} ({conf*100:.1f}%)")

# Run it
analyze_name("Ellen")
analyze_name("Lollo")


Analyzing: Ellen
Step 1 ('E'): Boy (50.4%)
Step 2 ('El'): Boy (50.3%)
Step 3 ('Ell'): Boy (50.3%)
Step 4 ('Elle'): Boy (50.4%)
Step 5 ('Ellen'): Boy (50.3%)

Analyzing: Lollo
Step 1 ('L'): Boy (50.1%)
Step 2 ('Lo'): Boy (50.3%)
Step 3 ('Lol'): Boy (50.2%)
Step 4 ('Loll'): Boy (50.2%)
Step 5 ('Lollo'): Boy (50.3%)
