In [None]:
import random
import numpy as np

In [None]:
placenames = [a.strip() for a in open('logainm-train.txt') if a.strip()]
maxlen = max(len(a) for a in placenames)

In [None]:
vocab = set([c for a in placenames for c in a])
vocab.add('<PAD>')
encoder = dict((c,i) for i,c in enumerate(vocab))
decoder = dict((i,c) for i,c in enumerate(vocab))

In [None]:
from tensorflow.keras.utils import to_categorical
X = []
y = []
inputlen = 20
for a in placenames:
    Xenc = [encoder['<PAD>']]*inputlen
    for c in a:
        X.append(Xenc.copy())
        y.append(encoder[c])
        Xenc.pop(0)
        Xenc.append(encoder[c])
    X.append(Xenc.copy())
    y.append(encoder['<PAD>'])
    
X = np.array(X)
y = to_categorical(y, num_classes=len(vocab))

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_dev, y_train, y_dev = train_test_split(X, y, test_size=0.1, random_state = 42)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Embedding
emb_dim = 20
model = Sequential()
model.add(Embedding(input_dim=len(vocab), output_dim=emb_dim, input_length=inputlen))
model.add(LSTM(units=50,dropout=0.2,recurrent_dropout=0.2))
model.add(Dense(len(vocab), activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

In [None]:
model.fit(X_train, y_train, batch_size=128, epochs=10, validation_data=(X_dev, y_dev),verbose=2)

In [None]:
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [None]:
def generate(temperature=1.0):
    answer = ''
    Xout = [vmap['<PAD>']]*inputlen
    while True:
        ydist = model.predict(np.array([Xout]), verbose=0)
        nextchar_index = sample(ydist[0],temperature)
        nextchar = decoder[nextchar_index]
        if nextchar == '<PAD>' or len(answer)>100:
            break
        answer += nextchar
        Xout.append(nextchar_index)
        Xout.pop(0)
    return answer
    

In [None]:
def generate_new(temperature=1.0):
    while True:
        answer = generate(temperature)
        if answer in placenames:
            print(answer,'is a real placename')
        else:
            break
    return answer

In [None]:
print(generate_new(0.5),'vs.',random.choice(placenames))