# Importing Libraries

In [None]:
import math
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import Parameter
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
import pandas as pd
import os
import re
import random
import urllib.request
from wordcloud import WordCloud
from matplotlib import animation
from matplotlib.animation import FuncAnimation

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Loading Data

In [None]:
shakespeare = urllib.request.urlopen('https://raw.githubusercontent.com/emiletimothy/Caltech-CS155-2023/main/miniproject3/data/shakespeare.txt').read().decode('utf-8')

In [None]:
print(shakespeare)

                   1
From fairest creatures we desire increase,
That thereby beauty's rose might never die,
But as the riper should by time decease,
His tender heir might bear his memory:
But thou contracted to thine own bright eyes,
Feed'st thy light's flame with self-substantial fuel,
Making a famine where abundance lies,
Thy self thy foe, to thy sweet self too cruel:
Thou that art now the world's fresh ornament,
And only herald to the gaudy spring,
Within thine own bud buriest thy content,
And tender churl mak'st waste in niggarding:
  Pity the world, or else this glutton be,
  To eat the world's due, by the grave and thee.


                   2
When forty winters shall besiege thy brow,
And dig deep trenches in thy beauty's field,
Thy youth's proud livery so gazed on now,
Will be a tattered weed of small worth held:
Then being asked, where all thy beauty lies,
Where all the treasure of thy lusty days;
To say within thine own deep sunken eyes,
Were an all-eating shame, and thriftle

## Data Preprocessing

In [None]:
num_matches = re.finditer(r"[0-9]+",shakespeare)
end_matches = re.finditer(r"\n{3}",shakespeare)
sonnets=[]



for i, (n, e) in enumerate(zip(num_matches, end_matches)):

    print("Process Sonnet-",i+1)
    n,e = n.start(),e.start()

    j=n
    while shakespeare[j]!="\n":
        j+=1
    
    sonnets.append(shakespeare[j+1:e])



for n in re.finditer(r"[0-9]+",shakespeare):
    n=n.start()
    pass

print("Process Sonnet- 154")

sonnets.append(shakespeare[n+4:])

Process Sonnet- 1
Process Sonnet- 2
Process Sonnet- 3
Process Sonnet- 4
Process Sonnet- 5
Process Sonnet- 6
Process Sonnet- 7
Process Sonnet- 8
Process Sonnet- 9
Process Sonnet- 10
Process Sonnet- 11
Process Sonnet- 12
Process Sonnet- 13
Process Sonnet- 14
Process Sonnet- 15
Process Sonnet- 16
Process Sonnet- 17
Process Sonnet- 18
Process Sonnet- 19
Process Sonnet- 20
Process Sonnet- 21
Process Sonnet- 22
Process Sonnet- 23
Process Sonnet- 24
Process Sonnet- 25
Process Sonnet- 26
Process Sonnet- 27
Process Sonnet- 28
Process Sonnet- 29
Process Sonnet- 30
Process Sonnet- 31
Process Sonnet- 32
Process Sonnet- 33
Process Sonnet- 34
Process Sonnet- 35
Process Sonnet- 36
Process Sonnet- 37
Process Sonnet- 38
Process Sonnet- 39
Process Sonnet- 40
Process Sonnet- 41
Process Sonnet- 42
Process Sonnet- 43
Process Sonnet- 44
Process Sonnet- 45
Process Sonnet- 46
Process Sonnet- 47
Process Sonnet- 48
Process Sonnet- 49
Process Sonnet- 50
Process Sonnet- 51
Process Sonnet- 52
Process Sonnet- 53
Pr

In [None]:
full_sonnet_text = ""

for i in range(154):

    if i==153:
        full_sonnet_text += sonnets[i]
    else:
        full_sonnet_text += sonnets[i]+"\n"

In [None]:
full_sonnet_text



In [None]:
chars = list(set(full_sonnet_text))
print(chars)

['M', ',', 'y', ')', 'Y', 'r', 'R', 'e', "'", 'k', 'x', 'D', 'o', 'm', 'g', 'U', 'v', 'I', 'C', '!', 'N', 't', 'n', 'w', 'K', 'W', 'T', ':', 'q', 's', 'd', 'p', 'P', '.', '\n', 'i', 'F', 'E', 'B', 'c', 'a', 'h', ' ', ';', 'J', '?', 'L', 'G', 'A', 'b', 'O', 'j', '(', 'H', 'V', 'z', 'f', '-', 'S', 'u', 'l']


In [None]:
len(chars)

61

In [None]:
full_sonnet_text_l = full_sonnet_text.lower()

In [None]:
chars2 = list(set(full_sonnet_text_l))
print(chars2)

[',', 'y', ')', 'r', 'e', "'", 'k', 'x', 'o', 'm', 'g', 'v', '!', 't', 'n', 'w', ':', 'q', 's', 'd', 'p', '.', '\n', 'i', 'c', 'a', 'h', ' ', ';', '?', 'b', 'j', '(', 'z', 'f', '-', 'u', 'l']


In [None]:
len(chars2)

38

### Encoding

In [None]:
char_decoder_map = dict(enumerate(chars))
char_encoder_map = {v: k for k, v in char_decoder_map.items()}

input_encoded = np.array([char_encoder_map[char] for char in full_sonnet_text])
print(input_encoded)

[36  5 12 ... 16  7 33]


In [None]:
char_decoder_map = dict(enumerate(chars2))
char_encoder_map = {v: k for k, v in char_decoder_map.items()}

input_encoded2 = np.array([char_encoder_map[char] for char in full_sonnet_text_l])
print(input_encoded2)

[34  3  8 ... 11  4 21]


In [None]:
len(list(full_sonnet_text))

94289

In [None]:
len(input_encoded)

94289

In [None]:
len(list(full_sonnet_text_l))

94289

In [None]:
len(input_encoded2)

94289

### Helper Functions for Data Loading

In [None]:
def get_batches(char_arr, batch_size, seq_length):

    n_chars_batch = batch_size * seq_length
    n_batches = len(char_arr)//n_chars_batch
    
    char_arr = char_arr[:n_batches * n_chars_batch]
    char_arr = char_arr.reshape((batch_size, -1))
    
    # iterate through the array, one sequence at a time
    for n in range(0, char_arr.shape[1], seq_length):
        x = char_arr[:, n:n+seq_length]
        y = np.zeros_like(x)
        try:
            y[:, :-1], y[:, -1] = x[:, 1:], char_arr[:, n+seq_length]
        except IndexError:
            y[:, :-1], y[:, -1] = x[:, 1:], char_arr[:, 0]
        yield x, y

In [None]:
def one_hot_encode(char_arr, n_labels):
    
    one_hot = np.zeros((np.multiply(*char_arr.shape), n_labels), dtype=np.float32)
    
    one_hot[np.arange(one_hot.shape[0]), char_arr.flatten()] = 1.
    
    one_hot = one_hot.reshape((*char_arr.shape, n_labels))
    
    return one_hot

# Models

## LSTM

In [None]:
class CharLSTM(nn.Module):
    
    def __init__(self, chars, device, n_hidden=256, n_layers=2):
        super().__init__()
        
        self.device = device
        
        self.n_layers = n_layers
        self.n_hidden = n_hidden
    
        self.n_chars = len(chars)
        self.char_decoder_map = dict(enumerate(chars))
        self.char_encoder_map = {v: k for k, v in self.char_decoder_map.items()}
        
        # LSTM Layer
        self.lstm = nn.LSTM(self.n_chars, n_hidden, n_layers, batch_first=True)
        
        
        # Output Layer
        self.fc = nn.Linear(n_hidden, self.n_chars)
        
    def forward(self, x, hidden):

        
        # Pass through the LSTM layer
        r_output, hidden = self.lstm(x,hidden)
        

        
        # Stack up the LSTM outputs 
        out = r_output.contiguous().view(-1, self.n_hidden)
        
        # Pass thorugh Output Layer
        out = self.fc(out)
        
        return out, hidden
    
    def init_hidden(self, batch_size=1):

        weight = next(self.parameters()).data

        hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_().to(device),
                  weight.new(self.n_layers, batch_size, self.n_hidden).zero_().to(device))
        
        return hidden

# Helper Functions : Training, Prediction and Generation

## Train

In [None]:
def train(model, data, device, optimizer, criterion, epochs=10, batch_size=20,
          seq_length=40, clip=5):
    model.train()
    
    for epoch in range(epochs):

        h = model.init_hidden(batch_size)
        
        for x, y in get_batches(data, batch_size, seq_length):
            # One Hot Encoding and Tensorize
            x = one_hot_encode(x, model.n_chars)
            inputs, targets = torch.from_numpy(x), torch.from_numpy(y)
            inputs, targets = inputs.to(device), targets.to(device)

            h = tuple([each.data for each in h])

            # Forward Pass
            optimizer.zero_grad()
            output, h = model(inputs,h)
            
            # Backward Pass
            loss = criterion(output, targets.view(batch_size*seq_length).long())
            loss.backward(retain_graph=True)
            
            # Avoid Exploding Gradients Problem
            nn.utils.clip_grad_norm_(model.parameters(), clip)
            optimizer.step()
            
            
        print("Epoch: {}/{}:".format(epoch + 1, epochs),
              "Loss: {:.4f}:".format(loss.item()))

## Predict

In [None]:
def predict(model, char, device, T=1, h=None, top_k=5):
        ''' Given a character & hidden state, predict the next character.
            Returns the predicted character and the hidden state.
        '''
        x = np.array([[model.char_encoder_map[char]]])
        x = one_hot_encode(x, model.n_chars)
        inputs = torch.from_numpy(x).to(device)

        with torch.no_grad():
            # Model Output
            out, h = model(inputs,h)

            # get the character probabilities (Softmax with Temperature)
            p = F.softmax(out/T, dim=1).data.cpu()

            # get the top characters with highest likelihood
            p, top_ch = p.topk(top_k)
            top_ch = top_ch.numpy().squeeze()

            #  draw softmax samples from trained model (Top-K)
            p = p.numpy().squeeze()
            char = np.random.choice(top_ch, p=p/p.sum())
        
        return model.char_decoder_map[char], h

## Generate

In [None]:
def generate_poem(model, poem_size, device, seed='M', T=1, top_k=None):
    # Generate Poem Based on Initial Seed


    model.eval() # eval mode
    
    # Run Model for Initial Seed
    chars = list(seed)
    with torch.no_grad():
        h = model.init_hidden(batch_size=1)
        for ch in seed:
            char, h = predict(model, ch, device, top_k=top_k,h=h)

        # append the characters to the sequence
        chars.append(char)

        # Poem Generation!
        for i in range(poem_size):
            char, h = predict(model, chars[-1], device, top_k=top_k,h=h)
            chars.append(char)

    return ''.join(chars)

# Main : Experiments

## LSTM

In [None]:
n_hidden=200
n_layers=1

model = CharLSTM(chars, device, n_hidden, n_layers).to(device)

# Hyperparameters
batch_size = 128
seq_length = 40
epochs = 2000
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

### Train : Raw Text

In [None]:
train(model, input_encoded, device, optimizer, criterion, epochs=epochs,
      batch_size=batch_size, seq_length=seq_length)

Epoch: 1/2000: Loss: 3.1408:
Epoch: 2/2000: Loss: 3.0865:
Epoch: 3/2000: Loss: 3.0720:
Epoch: 4/2000: Loss: 3.0597:
Epoch: 5/2000: Loss: 3.0389:
Epoch: 6/2000: Loss: 3.0037:
Epoch: 7/2000: Loss: 2.9455:
Epoch: 8/2000: Loss: 2.8524:
Epoch: 9/2000: Loss: 2.7516:
Epoch: 10/2000: Loss: 2.6663:
Epoch: 11/2000: Loss: 2.5878:
Epoch: 12/2000: Loss: 2.5204:
Epoch: 13/2000: Loss: 2.4723:
Epoch: 14/2000: Loss: 2.4396:
Epoch: 15/2000: Loss: 2.4034:
Epoch: 16/2000: Loss: 2.3681:
Epoch: 17/2000: Loss: 2.3560:
Epoch: 18/2000: Loss: 2.3221:
Epoch: 19/2000: Loss: 2.3014:
Epoch: 20/2000: Loss: 2.2802:
Epoch: 21/2000: Loss: 2.2692:
Epoch: 22/2000: Loss: 2.2425:
Epoch: 23/2000: Loss: 2.2359:
Epoch: 24/2000: Loss: 2.2157:
Epoch: 25/2000: Loss: 2.1992:
Epoch: 26/2000: Loss: 2.1876:
Epoch: 27/2000: Loss: 2.1691:
Epoch: 28/2000: Loss: 2.1664:
Epoch: 29/2000: Loss: 2.1403:
Epoch: 30/2000: Loss: 2.1402:
Epoch: 31/2000: Loss: 2.1315:
Epoch: 32/2000: Loss: 2.1130:
Epoch: 33/2000: Loss: 2.1022:
Epoch: 34/2000: Los

### Generate Poem : Raw Text

In [None]:
print(generate_poem(model, 1000, device, seed="Shall I compare thee to a summer's day?\n", T=1, top_k=5))

Shall I compare thee to a summer's day?
Thes lack to me, then reis are woe delight.
Butided none hine, to well the guls their scapled gracenty by tart,
And tils this hard, or, rhouths strangy ears.
  Yot him should jewelprimaked be strainst know their,
Whose worthy travill thy becoie the subject lies,
Which such apprain as aby abfor thee,
  This comp canst idle no quine up huse,
Fead nut your hambsage of soul kings yourl,
Which than by darrers I dignate new,
Though I (make desert, my broos for thy foe,
Which labk'st gold canforter hand more truth is not say,
That heirs to lovie? I seen with comp reng?
The can's eyes, nothing, nor me hellive,
Or sme thou, which now I be come)
By own wing swallsw?
Make it beauty is I brigh a florthing tond,
Than heap nothing not procked but wast
  All-doiture and then might should to be?
No morid have men in thee where all mind
Mure base days of you, will gay thine swent,
Hath proud that true love of foults thou to lie.
Thou wilt now my strasst forn with

In [None]:
print(generate_poem(model, 1000, device, seed="Shall I compare thee to a summer's day?\n", T=1.5, top_k=5))

Shall I compare thee to a summer's day?
Therefrand given sunsiege phate of your own,
Ginging your praise them beauty being,
And befinfth being for his forgen with shown
The have precare of my travew'st seem sock,
The flothy and his amts and thinks treasure!
When th's ten this baiteauly I her it dids.
Not is suffices a did my head, and her see the pirge.
When I dave pleas their that even is a mamed wiWher,
Make that hall of terugtates it weth riched of sight,
I must inst in juep mys scowally ar much hand
Do shall I know can love to all the taight in wietned priving,
Pience tight and cundd, with my art,
And thus I paratly loke madencent with use their trime,
So thou beed when I am contented did dead,
The liest the sternow forted for thine,
To pitiss form would to bounty once feads woe.
So death' leven to wend to grace you,
And strance of my stromivions youns befter in:
No livis impor my wain much holour dee,
On the aggeach worshings to the heart:
All to again, and to that well making lov

In [None]:
print(generate_poem(model, 1000, device, seed="Shall I compare thee to a summer's day?\n", T=0.75, top_k=5))

Shall I compare thee to a summer's day?
Thes leods the self-badallefin thily shins:
Much vortay 's a babbsate's strengeyes by their view,
Gored suint with that shall fore youth,
And I am shall live with thou shouldst shouth tears.
  You so the strank on my arting shade,
Which like the leps thy pending of abuse,
That I have thought know kidded in these liest,
When I am shald I love, ertummur's rainsame thy fre,
To praifed and thy foe, where is my judgment their face,
Not what sweal ched lie not sound their raysome,
Untholt I do beauty's sumbless to delight?
Be chil chrouns no love a sweal-pirs in maje,
Sweat thence bransomer's wrickle earth,
The onate's his glat foold arting swit:
Nor hot such amppearable spirit,
What meditiens with a ghaching hours,
Even in fels winh woos me will thine of sighties.
  And thou in of with tangul least to youth wretting,
And by to chosumbatched white,
Not whils thou dotter tommer not,
It mideremingers hadind thy show,
  Tim do distain but made flowers so 

In [None]:
print(generate_poem(model, 1000, device, seed="Shall I compare thee to a summer's day?\n", T=0.25, top_k=5))

Shall I compare thee to a summer's day?
Thes leods the time with to each, ha dear heart,
Make for my love as blook him thrief
As I have she masure thes terter fair
Pritume tendived thoughts in which he wound that I be gentle,
  Fair names that tire, but wase's rideles cleap.
  Thuse add canquet flatce and sur inhinispedse:
No  dary merounlios that thou give in my with restornct life,
Anchereatcention of thy love adjoy,
Whrive at thesed, agains nor set,
To  mage to his purply me to gracion,
Or ban his store, and this thy lory died.
So lovers duen in pricion go su-me,
And sur you write, age no other beressory where!
  Buc show it you, my loss being cured.
  And hin blifinf be bate, though I will be which heliss away:
Make win were in how to meget thee main
Dush day 's thou hast tho could strack:
How mak whence of this, end times requit.
How heart to five with mestrantiblion be.
  But leaver the love, althole doth lie hours,
  Sin sayful rack time whete day oten with un.
Though I (all ris

### Train : Lower Case Text

In [None]:
n_hidden=200
n_layers=1

model2 = CharLSTM(chars2, device, n_hidden, n_layers).to(device)

# Hyperparameters
batch_size = 128
seq_length = 40
epochs = 2000
optimizer = torch.optim.Adam(model2.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

In [None]:
train(model2, input_encoded2, device, optimizer, criterion, epochs=epochs,
      batch_size=batch_size, seq_length=seq_length)

Epoch: 1/2000: Loss: 3.0205:
Epoch: 2/2000: Loss: 2.9759:
Epoch: 3/2000: Loss: 2.9638:
Epoch: 4/2000: Loss: 2.9423:
Epoch: 5/2000: Loss: 2.8991:
Epoch: 6/2000: Loss: 2.8096:
Epoch: 7/2000: Loss: 2.6961:
Epoch: 8/2000: Loss: 2.5920:
Epoch: 9/2000: Loss: 2.5147:
Epoch: 10/2000: Loss: 2.4568:
Epoch: 11/2000: Loss: 2.4068:
Epoch: 12/2000: Loss: 2.3647:
Epoch: 13/2000: Loss: 2.3310:
Epoch: 14/2000: Loss: 2.2982:
Epoch: 15/2000: Loss: 2.2723:
Epoch: 16/2000: Loss: 2.2670:
Epoch: 17/2000: Loss: 2.2289:
Epoch: 18/2000: Loss: 2.2122:
Epoch: 19/2000: Loss: 2.1913:
Epoch: 20/2000: Loss: 2.1889:
Epoch: 21/2000: Loss: 2.1541:
Epoch: 22/2000: Loss: 2.1445:
Epoch: 23/2000: Loss: 2.1274:
Epoch: 24/2000: Loss: 2.1116:
Epoch: 25/2000: Loss: 2.0953:
Epoch: 26/2000: Loss: 2.0885:
Epoch: 27/2000: Loss: 2.0713:
Epoch: 28/2000: Loss: 2.0635:
Epoch: 29/2000: Loss: 2.0488:
Epoch: 30/2000: Loss: 2.0377:
Epoch: 31/2000: Loss: 2.0245:
Epoch: 32/2000: Loss: 2.0151:
Epoch: 33/2000: Loss: 2.0019:
Epoch: 34/2000: Los

### Generate Poem : Lower Case Text

In [None]:
print(generate_poem(model2, 1000, device, seed="shall i compare thee to a summer's day?\n", T=1, top_k=5))

shall i compare thee to a summer's day?
thou art to dreaking, what it with my lays yee 
but woll's day, not to time this golders deeds,
thence vice de infied, nor being shall in the even
doth lone is my gloas in sears with deseiss,
that parts to thee world to make owered, words' noundse on mench is my nofe spent,
from my sice groan bo diffit upot some,
  whilst it in merong ond mayst thou pleas,
thou thrimivatawn to lies broughts efthech mies, and thou your sweet skill.
who will is for thee is nothil 'wart'rsilest spent,
  then flame which is not so lifful love's lie,
to thy sweet fin, to make thou dost buth my madn,
being for new-rount be withorio,
for thou art the foil het with self-substance:
o cendicuse may i do debence to me?
and i ampristing pride in this poot resing,
but wintout so moly, with hear'st gald,   and and oblow drown,
your ant or she laving sweet form seed thy chie.
  is not faines if hise, and still some
when their virtue oll muse worly excaste
as the mount of live (

In [None]:
print(generate_poem(model2, 1000, device, seed="shall i compare thee to a summer's day?\n", T=1.5, top_k=5))

shall i compare thee to a summer's day?
thou art more lovely tough in you do intome,
un han if her times, thou your love the silly gried,
that despite is mine own worth they day one,
  potios is being find of selfils in reements,
with men's fan their this, thou despests rive,
with thou shouldst indirn of the stranger reise,
and every lies, and see it unhore,
the pape unnater resure may still not to men,
rnygreding on my braigh art sporat his,
  my friend's murit faurl will colosed expressed.
which subleng lights to might self-sained
theschale be ullearest thou to manrers hers.
not but thee one mothertes this not book.
  and keep eye to haprs with my love's gone,
yet thou turnupary sould i widers to be way,
how hate more shalt begeame to feart,
not to trut what besteited from their dot,
which by other's ginde if every view,
more us that chere do phersed eater,
which is not be of your debach friend)
i amay arg all were dotious arguments
cruepin of with my mind eyes thee,
in theil art the

In [None]:
print(generate_poem(model2, 1000, device, seed="shall i compare thee to a summer's day?\n", T=0.75, top_k=5))

shall i compare thee to a summer's day?
thou art my loor, slaseed with me defited,
from the jearly in all hour silve.
andwed for which hips with saw it is some
if i then be there more place in this,
  thriull not to with tere when time's me,
the soll know shall i leaph my verses, a look it i not.
those shor thight be sumple vibtry rained
loss.
being thy beauty can soot,
  to lonk to love that thou wilt,
hip your make the view,
grows the lasg bark thy parts of wretch did?
my foil furlicount my adrivised spent,
for of daintand forsower, an will decays,
is not thy sec, ander these prese,
which shall far the beauty this time doth expressed,
whereing in his grave termest bo mone, thee give.
though i mouth true, i ran more shall rage,
which should i callered with never shone.
unuse each that fair truthest fould and crospertsed in;
but is that by ambestak werthought
where buth in merrage of many as died with knows do shower,
who conting thou should not to treatter,
which no lines the spirion 

In [None]:
print(generate_poem(model2, 1000, device, seed="shall i compare thee to a summer's day?\n", T=0.25, top_k=5))

shall i compare thee to a summer's day?
thou art more do show think looks should nothing
forth tords' not, to yould, recimes bore of of fing:
their onturvione like the lee so bright
that skill nauring of sweet how, and seame,
swilt was should that friend through like defearing pocie,
for thy namernct that my liverunces with my elas
wishoving stop by one bain and is rushers be now it love's entwe,
that my love is is not be theie great,
or do their part membaring of your life?
ay up, they dowh beauty beauteous seem, thy account,
no profors of thy shore 's best i houlds are
from hours are self to give bear lony,
but that give then most is sainy must beffed expressed,
buine eirst is thy formen retused than my strong,
that you your self seild, what from him, fairs thou benks,
when other in one covered loving,
and i my self a los, it self i ne,mands to be
which all my self with pry and rine,
how hat you will you give shall being made unframe.
the baty a grief, thou shouldst and shame!
  mire