<a href="https://colab.research.google.com/github/mohammadreza-mohammadi94/Deep-Learning-Projects/blob/main/Fantasy%20Name%20Generator%20(LOTR-DOTA)/name_generator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import Libraries & Setup Enviorment

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
import random

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding, Dropout
from tensorflow.keras.preprocessing.sequence import pad_sequences

import warnings
warnings.filterwarnings('ignore')

import logging
logging.basicConfig(
    format="%(asctime)s-%(levelname)s-%(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
    level=logging.INFO,
    handlers=[
        logging.FileHandler("logs.log"),
        logging.StreamHandler()
    ]
)

In [None]:
# Setup seeds
np.random.seed(42)
tf.random.set_seed(42)

# Load dataset & Processing Dataset

In [None]:
def load_dataset_from_url(url):
    response = requests.get(url)
    response.raise_for_status()     # Check for errors
    names = [line.strip() for line in response.text.splitlines() if line.strip()]
    return names

# Dataset's url
lotr_url = "https://raw.githubusercontent.com/borhanMorphy/fantasy-name-generator/main/data/lotr_names.txt"
dota_url = "https://raw.githubusercontent.com/borhanMorphy/fantasy-name-generator/main/data/dota_names.txt"

# Load datasets
lotr_names = load_dataset_from_url(lotr_url)
dota_names = load_dataset_from_url(dota_url)

# Concatenate datasets
names = lotr_names + dota_names

# Remove dubplicates
names = list(set(names))
names = [name for name in names if len(name) > 1 and name.isalpha()] # To filter for valid names

print(f"Total Names: {len(names)}")

Total Names: 550


### Creating Vocabulary (Charachter-Level)

In [None]:
chars = sorted(list(set("".join(names))))
char_to_idx = {char: idx for idx,char in enumerate(chars)}
char_to_idx['<PAD>'] = 0

idx_to_chars = {idx: char for char, idx in char_to_idx.items()}
vocab_size = len(char_to_idx)

print(f"Vocabulary Size: {vocab_size}")

Vocabulary Size: 66


### Creating Sequences

In [None]:
max_len = max(len(name) for name in names) + 1
sequences = []
for name in names:
    seq = [char_to_idx[char] for char in name]
    sequences.append(seq)

# Padding sequences
X = pad_sequences(sequences, maxlen=max_len, padding='post')

### Creating Input/Output

In [None]:
X_train = []
y_train = []

for seq in X:
    for i in range(1, len(seq)):
        X_train.append(seq[:i])
        y_train.append(seq[i])

X_train = pad_sequences(X_train, maxlen=max_len, padding='post')
y_train = np.array(y_train)

# Define the Model

In [None]:
model = Sequential([
    Embedding(vocab_size, 128, input_length=max_len),
    LSTM(256, return_sequences=True),
    Dropout(0.3),
    LSTM(128),
    Dropout(0.3),
    Dense(vocab_size, activation='softmax')
])

model.compile(loss='sparse_categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

model.summary()

In [None]:
model.fit(X_train, y_train, epochs=100, batch_size=64, verbose=1)

Epoch 1/150
[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 8ms/step - accuracy: 0.5441 - loss: 2.4873
Epoch 2/150
[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.5745 - loss: 1.8326
Epoch 3/150
[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.5874 - loss: 1.6701
Epoch 4/150
[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.5970 - loss: 1.6338
Epoch 5/150
[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.5971 - loss: 1.6225
Epoch 6/150
[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.5958 - loss: 1.6048
Epoch 7/150
[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.5980 - loss: 1.5951
Epoch 8/150
[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.6005 - loss: 1.5826
Epoch 9/150
[1m112/112[0m [32

<keras.src.callbacks.history.History at 0x7bfc285a4890>

### Creating Names with Temperature Sampling

In [None]:
def sample(preds, temperature=0.7):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)


def generate_name(model, max_len, char_to_idx, idx_to_char,
                  seed=None, temperature=0.7, max_attempts=10):
    for _ in range(max_attempts):
        if seed is None:
            seed_idx = [random.choice(list(char_to_idx.values())[1:])]  # No padding
        else:
            seed_idx = [char_to_idx[c] for c in seed.lower() if c in char_to_idx]
            if not seed_idx:
                seed_idx = [random.choice(list(char_to_idx.values())[1:])]

        generated = seed_idx[:]
        for _ in range(max_len - len(seed_idx)):
            input_seq = pad_sequences([generated], maxlen=max_len, padding='post')
            pred = model.predict(input_seq, verbose=0)[0]
            next_char_idx = sample(pred, temperature)
            if next_char_idx == 0:  # stop padding
                break
            generated.append(next_char_idx)

        name = ''.join(idx_to_char[idx] for idx in generated if idx in idx_to_char and idx != 0)
        if len(name) > 2 and name.isalpha():  # check if name is valid
            return name
    return None

In [None]:
print("Randomly Generated Names: ")
for _ in range(5):
    name = generate_name(model, max_len, char_to_idx, idx_to_chars, temperature=0.7)
    if name:
        print(name.capitalize())

print("\nRandomly Generated Names (Seed 'el'):")
for _ in range(3):
    name = generate_name(model, max_len, char_to_idx, idx_to_chars, seed="el", temperature=0.7)
    if name:
        print(name.capitalize())

print("\nRandomly Generated Names (Seed 'gan'): ")
for _ in range(3):
    name = generate_name(model, max_len, char_to_idx, idx_to_chars, seed="Gan", temperature=0.7)
    if name:
        print(name.capitalize())

Randomly Generated Names: 
Kíli
Findegil
Ylumehtar
Zamîn
Arvodin

Randomly Generated Names (Seed 'el'):
Elagror
Elemmírion
Elendar

Randomly Generated Names (Seed 'gan'): 
Ganilladë
Gangos
Ganwendil


In [None]:
model.save("fantasy_name_generator_lotr_dota.h5")

