In [1]:
# Importing libraries
import numpy as np
import torch
# from torch import nn
import torch.nn.functional as F
from ipywidgets import IntProgress
from IPython.display import display
import json
import random
import CharRNN as crnn

The code below reads in the text, and maps characters to integers. I am reading in data I found by crawling poetryfoundation.org and ripping JSON formatted files for each poem. 

In [2]:
def readData(num_poems):
    poems = []
    f = IntProgress(min=0, max=num_poems, description= "Reading data...")
    display(f)
    text = ""
    
    poem = random.randint(1,10216)
    for i in range(1,num_poems+1):
        while(poem in poems): 
            poem = random.randint(1, 10216)
        text += "\n".join(json.load(open("./poems/"+ str(poem) + ".json"))['text'])
        f.value += 1
    f.close()
    print("Data read complete.")
    return text

In [3]:
# encoding the text and map each character to an integer and vice versa

# We create two dictionaries:
# 1. int2char, which maps integers to characters
# 2. char2int, which maps characters to integers
text = readData(5000)
chars = tuple(set(text))
int2char = dict(enumerate(chars))
char2int = {ch: ii for ii, ch in int2char.items()}

# Encode the text
encoded = np.array([char2int[ch] for ch in text])

IntProgress(value=0, description='Reading data...', max=5000)

Data read complete.


In [4]:
# Define and print the net
n_hidden=512
n_layers=2

net = crnn.CharRNN(chars, n_hidden, n_layers)
print(net)

# Declaring the hyperparameters
batch_size = 128
seq_length = 100
n_epochs = 20 # start smaller if you are just testing initial behavior

# train the model
crnn.train(net, encoded, epochs=n_epochs, batch_size=batch_size, seq_length=seq_length, lr=0.001, print_every=250)

# Saving the model
model_name = 'rnn_20_epoch.net'

checkpoint = {'n_hidden': net.n_hidden,
              'n_layers': net.n_layers,
              'state_dict': net.state_dict(),
              
              'tokens': net.chars}

with open(model_name, 'wb') as f:
    torch.save(checkpoint, f)
    
# Generating new text
print(crnn.sample(net, 1000, prime='A', top_k=5))

Training on GPU!
CharRNN(
  (lstm): LSTM(186, 512, num_layers=2, batch_first=True, dropout=0.5)
  (dropout): Dropout(p=0.5)
  (fc): Linear(in_features=512, out_features=186, bias=True)
)


IntProgress(value=0, description='Training...', max=20)

Epoch: 1/20... Step: 50... Loss: 3.1715... Val Loss: 3.1491
Epoch: 1/20... Step: 100... Loss: 3.1736... Val Loss: 3.1425
Epoch: 1/20... Step: 150... Loss: 3.1025... Val Loss: 3.0845
Epoch: 1/20... Step: 200... Loss: 2.8565... Val Loss: 2.7679
Epoch: 1/20... Step: 250... Loss: 2.6564... Val Loss: 2.5843
Epoch: 1/20... Step: 300... Loss: 2.5723... Val Loss: 2.4833
Epoch: 1/20... Step: 350... Loss: 2.5156... Val Loss: 2.4119
Epoch: 1/20... Step: 400... Loss: 2.4298... Val Loss: 2.3523
Epoch: 1/20... Step: 450... Loss: 2.3728... Val Loss: 2.3103
Epoch: 2/20... Step: 500... Loss: 2.3137... Val Loss: 2.2609
Epoch: 2/20... Step: 550... Loss: 2.3018... Val Loss: 2.2200
Epoch: 2/20... Step: 600... Loss: 2.2609... Val Loss: 2.1853
Epoch: 2/20... Step: 650... Loss: 2.2328... Val Loss: 2.1574
Epoch: 2/20... Step: 700... Loss: 2.2373... Val Loss: 2.1281
Epoch: 2/20... Step: 750... Loss: 2.2319... Val Loss: 2.1052
Epoch: 2/20... Step: 800... Loss: 2.1870... Val Loss: 2.0797
Epoch: 2/20... Step: 850.

Epoch: 14/20... Step: 6600... Loss: 1.6195... Val Loss: 1.5133
Epoch: 14/20... Step: 6650... Loss: 1.6221... Val Loss: 1.5134
Epoch: 14/20... Step: 6700... Loss: 1.6122... Val Loss: 1.5128
Epoch: 14/20... Step: 6750... Loss: 1.5832... Val Loss: 1.5103
Epoch: 14/20... Step: 6800... Loss: 1.5896... Val Loss: 1.5089
Epoch: 14/20... Step: 6850... Loss: 1.6062... Val Loss: 1.5091
Epoch: 15/20... Step: 6900... Loss: 1.6082... Val Loss: 1.5061
Epoch: 15/20... Step: 6950... Loss: 1.6088... Val Loss: 1.5062
Epoch: 15/20... Step: 7000... Loss: 1.6042... Val Loss: 1.5058
Epoch: 15/20... Step: 7050... Loss: 1.6055... Val Loss: 1.5040
Epoch: 15/20... Step: 7100... Loss: 1.5986... Val Loss: 1.5035
Epoch: 15/20... Step: 7150... Loss: 1.6006... Val Loss: 1.5030
Epoch: 15/20... Step: 7200... Loss: 1.6108... Val Loss: 1.5039
Epoch: 15/20... Step: 7250... Loss: 1.5846... Val Loss: 1.5016
Epoch: 15/20... Step: 7300... Loss: 1.5708... Val Loss: 1.4989
Epoch: 15/20... Step: 7350... Loss: 1.5641... Val Loss:

In [16]:
for i in range(5, 50, 5):
    print(i)
    print(crnn.sample(net, 400, prime="The", top_k=i))

5
The search is that
So walked and stay on the sense of the step, to the strike
Where a silver station are the flower of the battles.
A poor book of the troulds of an expectent tongue
And starts alone the brown bare sound and song.
To his beats through the train to be too look
Them through the song to seem on the black branches and the down,
Through the surface soul with a back and as they shall say an 
10
Ther mayst, the frumeration wurl
Shit of the mountain wonder brief, waiting to
the softers
We are a steam as wearons back
Of this house for the come to still feed.
No place to mention me a carp
In the door
In the people the temple
I continue a lawnchear
As the double sea between turned song,
And saying the words that watch the flowers
Implaces by a cloud of concisions,
And sailt like a friend being to
15
Then banks:
Certainly of the grible couldn,
And we lived in light, before my great
Players, to muldiculate skins,
To die, and my beh aland in any wend,
Others can not spare aloft in 

In [6]:
num_poems = 10215;

text = ""
for i in range(1,num_poems+1):
    text = json.load(open("./poems/"+ str(i) + ".json"))['text']
    if(len(text)>2 and text[2].isupper()):
        print(str(i)+ ". " + text[2])
   

127. SOUTH, IT SHALL BE THE TIME WHEN NEGROES LEAVE THE
283. SUMMER
1423. “M-A-R-R-Y M-E!”
1980. (A
2406. INFOMERCIALS HAVE STARTED AND I KIND OF WANT TO DIE
3097. V
7600. F
9098. O
