<a href="https://colab.research.google.com/github/leandrorodor/DL_PyThorch/blob/main/c0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

https://www.kaggle.com/code/sumantindurkhya/text-generation-from-shakespeare-s-play-pytorch

In [6]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

In [7]:
import torch
import torch.nn as nn
# import torch.nn.functional as F

from collections import Counter
import warnings

import string
import nltk

import matplotlib.pyplot as plt
import seaborn as sns

In [8]:
%matplotlib inline

In [9]:
warnings.filterwarnings('ignore')

In [10]:
torch.autograd.set_detect_anomaly(True)

<torch.autograd.anomaly_mode.set_detect_anomaly at 0x7fd85b4179a0>

In [11]:
batch_size = 16
seq_size = 32
embedding_size = 64
lstm_size = 64
gradients_norm = 5
# set device parameter
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

Load data

In [13]:
# read document
with open ('/content/alllines.txt', 'r') as f:
    doc = f.read()

Data preparation

In [14]:
# get list of words from document
def doc2words(doc):
    lines = doc.split('\n')
    lines = [line.strip(r'\"') for line in lines]
    words = ' '.join(lines).split()
    return words

In [15]:
def removepunct(words):
    punct = set(string.punctuation)
    words = [''.join([char for char in list(word) if char not in punct]) for word in words]
    return words

In [16]:
# get vocab from word list
def getvocab(words):
    wordfreq = Counter(words)
    sorted_wordfreq = sorted(wordfreq, key=wordfreq.get)
    return sorted_wordfreq

In [17]:
# get dictionary of int to words and word to int
def vocab_map(vocab):
    int_to_vocab = {k:w for k,w in enumerate(vocab)}
    vocab_to_int = {w:k for k,w in int_to_vocab.items()}
    return int_to_vocab, vocab_to_int

In [18]:
words = removepunct(doc2words(doc))
vocab = getvocab(words)
int_to_vocab, vocab_to_int = vocab_map(vocab)

In [19]:
def get_batches(words, vocab_to_int, batch_size, seq_size):
    # generate a Xs and Ys of shape (batchsize * num_batches) * seq_size
    word_ints = [vocab_to_int[word] for word in words]
    num_batches = int(len(word_ints) / (batch_size * seq_size))
    Xs = word_ints[:num_batches*batch_size*seq_size]
    Ys = np.zeros_like(Xs)
    Ys[:-1] = Xs[1:]
    Ys[-1] = Xs[0]
    Xs = np.reshape(Xs, (num_batches*batch_size, seq_size))
    Ys= np.reshape(Ys, (num_batches*batch_size, seq_size))
    
    # iterate over rows of Xs and Ys to generate batches
    for i in range(0, num_batches*batch_size, batch_size):
        yield Xs[i:i+batch_size, :], Ys[i:i+batch_size, :]

Model

In [20]:
class RNNModule(nn.Module):
    # initialize RNN module
    def __init__(self, n_vocab, seq_size=32, embedding_size=64, lstm_size=64):
        super(RNNModule, self).__init__()
        self.seq_size = seq_size
        self.lstm_size = lstm_size
        self.embedding = nn.Embedding(n_vocab, embedding_size)
        self.lstm = nn.LSTM(embedding_size,
                            lstm_size,
                            batch_first=True)
        self.dense = nn.Linear(lstm_size, n_vocab)
        
    def forward(self, x, prev_state):
        embed = self.embedding(x)
        output, state = self.lstm(embed, prev_state)
        logits = self.dense(output)

        return logits, state
    
    def zero_state(self, batch_size):
        return (torch.zeros(1, batch_size, self.lstm_size),torch.zeros(1, batch_size, self.lstm_size))

In [21]:
def get_loss_and_train_op(net, lr=0.001):
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(net.parameters(), lr=lr)

    return criterion, optimizer

In [22]:
def generate_text(device, net, words, n_vocab, vocab_to_int, int_to_vocab, top_k=5):
    net.eval()

    state_h, state_c = net.zero_state(1)
    state_h = state_h.to(device)
    state_c = state_c.to(device)
    for w in words:
        ix = torch.tensor([[vocab_to_int[w]]]).to(device)
        output, (state_h, state_c) = net(ix, (state_h, state_c))
    
    _, top_ix = torch.topk(output[0], k=top_k)
    choices = top_ix.tolist()
    choice = np.random.choice(choices[0])

    words.append(int_to_vocab[choice])
    
    for _ in range(100):
        ix = torch.tensor([[choice]]).to(device)
        output, (state_h, state_c) = net(ix, (state_h, state_c))

        _, top_ix = torch.topk(output[0], k=top_k)
        choices = top_ix.tolist()
        choice = np.random.choice(choices[0])
        words.append(int_to_vocab[choice])

    print(' '.join(words))

In [23]:
def train_rnn(words, vocab_to_int, int_to_vocab, n_vocab):
    
    # RNN instance
    net = RNNModule(n_vocab, seq_size, embedding_size, lstm_size)
    net = net.to(device)
    criterion, optimizer = get_loss_and_train_op(net, 0.01)

    iteration = 0
    
    for e in range(50):
        batches = get_batches(words, vocab_to_int, batch_size, seq_size)
        state_h, state_c = net.zero_state(batch_size)

        # Transfer data to GPU
        state_h = state_h.to(device)
        state_c = state_c.to(device)
        for x, y in batches:
            iteration += 1

            # Tell it we are in training mode
            net.train()

            # Reset all gradients
            optimizer.zero_grad()

            # Transfer data to GPU
            x = torch.tensor(x).to(device)
            y = torch.tensor(y).to(device)

            logits, (state_h, state_c) = net(x, (state_h, state_c))
            loss = criterion(logits.transpose(1, 2), y)

            state_h = state_h.detach()
            state_c = state_c.detach()

            loss_value = loss.item()

            # Perform back-propagation
            loss.backward(retain_graph=True)

            _ = torch.nn.utils.clip_grad_norm_(net.parameters(), gradients_norm)
            
            # Update the network's parameters
            optimizer.step()

            if iteration % 100 == 0:
                print('Epoch: {}/{}'.format(e, 200),'Iteration: {}'.format(iteration),'Loss: {}'.format(loss_value))

            # if iteration % 1000 == 0:
                # predict(device, net, flags.initial_words, n_vocab,vocab_to_int, int_to_vocab, top_k=5)
                # torch.save(net.state_dict(),'checkpoint_pt/model-{}.pth'.format(iteration))
                
    return net

Train

In [None]:
rnn_net = train_rnn(words, vocab_to_int, int_to_vocab, len(vocab))

Epoch: 0/200 Iteration: 100 Loss: 6.7442827224731445
Epoch: 0/200 Iteration: 200 Loss: 7.145484924316406
Epoch: 0/200 Iteration: 300 Loss: 7.04975700378418
Epoch: 0/200 Iteration: 400 Loss: 6.623109817504883
Epoch: 0/200 Iteration: 500 Loss: 7.114477634429932


Generate text

In [None]:
generate_text(device, rnn_net, ['hey', 'you'], len(vocab), vocab_to_int, int_to_vocab)

ChatGPT

In [None]:
import torch
import torch.nn as nn

# Define the RNN class
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.i2h = nn.Linear(input_size + hidden_size, hidden_size)
        self.i2o = nn.Linear(input_size + hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)
        
    def forward(self, input, hidden):
        combined = torch.cat((input, hidden), 1)
        hidden = self.i2h(combined)
        output = self.i2o(combined)
        output = self.softmax(output)
        return output, hidden
    
    def init_hidden(self):
        return torch.zeros(1, self.hidden_size)

# Define the input and output sizes
input_size = len(all_letters)
output_size = len(all_letters)

# Define the hidden size
hidden_size = 128

# Create the RNN
rnn = RNN(input_size, hidden_size, output_size)

# Define the loss function and optimizer
criterion = nn.NLLLoss()
optimizer = torch.optim.SGD(rnn.parameters(), lr=0.005)

# Function to turn a string into a tensor
def input_tensor(line):
    tensor = torch.zeros(len(line), 1, input_size)
    for i in range(len(line)):
        tensor[i][0][all_letters.find(line[i])] = 1
    return tensor

# Function to turn a tensor into a string
def output_tensor(line):
    letter_indexes = [all_letters.find(line[i]) for i in range(len(line))]
    return torch.LongTensor(letter_indexes)

# Function to turn a one-hot tensor into a string
def output_string(output):
    s = ""
    for i in range(len(output)):
        s += all_letters[output[i]]
    return s

# Function to train the RNN
def train(line_tensor, category_tensor):
    hidden = rnn.init_hidden()
    rnn.zero_grad()
    for i in range(line_tensor.size()[0]):
        output, hidden = rnn(line_tensor[i], hidden)
    loss = criterion(output, category_tensor)
    loss.backward()
    optimizer.step()
    return output, loss.item()

# Function to evaluate the RNN
def evaluate(line_tensor):
    hidden = rnn.init_hidden()
    for i in range(line_tensor.size()[0]):
        output, hidden = rnn(line_t
