In [1]:
import numpy as np
import torch
from torch import nn
import torch.nn.functional as F
from einops import rearrange

In [2]:
with open('/home/iot/jupyter/root_dir/liudongdong/dataset/charprediction/val.txt', 'r') as f:
    text = f.read()

chars = tuple(set(text))
int2char = dict(enumerate(chars))
char2int = {ch: ii for ii, ch in int2char.items()}

encoded = np.array([char2int[ch] for ch in text])

In [3]:
def one_hot_encode(arr, n_labels):
    
    # Initialize the the encoded array
    one_hot = np.zeros((np.multiply(*arr.shape), n_labels), dtype=np.float32)
    
    # Fill the appropriate elements with ones
    one_hot[np.arange(one_hot.shape[0]), arr.flatten()] = 1.
    
    # Finally reshape it to get back to the original array
    one_hot = one_hot.reshape((*arr.shape, n_labels))
    
    return one_hot

In [4]:
def get_batches(arr, batch_size, seq_length):
    '''Create a generator that returns batches of size
       batch_size x seq_length from arr.
       
       Arguments
       ---------
       arr: Array you want to make batches from
       batch_size: Batch size, the number of sequences per batch
       seq_length: Number of encoded chars in a sequence
    '''
    
    ## TODO: Get the number of batches we can make
    n_batches = (len(arr))//(batch_size*seq_length)
    
    ## TODO: Keep only enough characters to make full batches
    arr = arr[:(n_batches*batch_size*seq_length)]
    
    ## TODO: Reshape into batch_size rows
    size=(batch_size,-1)
    arr = arr.reshape(size)  #(batch, columns)  后续数据直接在 columns 遍历
    
    ## TODO: Iterate over the batches using a window of size seq_length
    for n in range(0, arr.shape[1], seq_length):
        # The features
        x = arr[:, n:n+seq_length]
        # The targets, shifted by one
        y = np.zeros_like(x)
        try:
          y[:, :-1],y[:, -1]=x[:,1:], arr[:,n+seq_length]
        except IndexError:
          y[:, :-1],y[:, -1]=x[:,1:], arr[:,0]
        yield x, y

In [5]:
# check if GPU is available
train_on_gpu = torch.cuda.is_available()
if(train_on_gpu):
    print('Training on GPU!')
else: 
    print('No GPU available, training on CPU; consider making n_epochs very small.')

Training on GPU!


In [6]:
class CharRNN(nn.Module):
    
    def __init__(self, tokens, n_hidden=256, n_layers=2,
                               drop_prob=0.5, lr=0.001):
        super().__init__()
        self.drop_prob = drop_prob
        self.n_layers = n_layers
        self.n_hidden = n_hidden
        self.lr = lr
        
        # creating character dictionaries
        self.chars = tokens
        self.int2char = dict(enumerate(self.chars))
        self.char2int = {ch: ii for ii, ch in self.int2char.items()}
        
        ## TODO: define the layers of the model
        self.lstm = nn.LSTM(len(self.chars), n_hidden, n_layers, 
                            dropout=drop_prob, batch_first=True)   #注意这里
        
        self.dropout=nn.Dropout(drop_prob)

        self.fc = nn.Linear(n_hidden, len(self.chars))
      
    
    def forward(self, x, hidden):
        ''' Forward pass through the network. 
            These inputs are x, and the hidden/cell state `hidden`. '''
                
        ## TODO: Get the outputs and the new hidden state from the lstm
        #x=rearrange(x,'b s d-> s b d')
        r_output,hidden=self.lstm(x,hidden)
        #r_output=rearrange(r_output,'s b d-> b s d')
        out=self.dropout(r_output)
        #print("self.dropout",out.shape)
        out=out.contiguous().view(-1,self.n_hidden)
        #print("self.contiguous",out.shape)
        out=self.fc(out)
# self.dropout torch.Size([128, 100, 512])
# self.contiguous torch.Size([12800, 512])
# output, torch.Size([12800, 94])
        # return the final output and the hidden state
        return out, hidden
    
    
    def init_hidden(self, batch_size):
        ''' Initializes hidden state '''
        # Create two new tensors with sizes n_layers x batch_size x n_hidden,
        # initialized to zero, for hidden state and cell state of LSTM
        weight = next(self.parameters()).data
        
        if (train_on_gpu):
            hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda(),
                  weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda())
        else:
            hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_(),
                      weight.new(self.n_layers, batch_size, self.n_hidden).zero_())
        
        return hidden

In [7]:
## TODO: set you model hyperparameters
# define and print the net
n_hidden=512
n_layers=2

net = CharRNN(chars, n_hidden, n_layers)
print(net)


CharRNN(
  (lstm): LSTM(72, 512, num_layers=2, batch_first=True, dropout=0.5)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc): Linear(in_features=512, out_features=72, bias=True)
)


In [None]:
batch_size = 64
seq_length = 20
n_epochs = 100 # start small if you are just testing initial behavior

# train the model
train(net, encoded, epochs=n_epochs, batch_size=batch_size, seq_length=seq_length, lr=0.001, print_every=10)

In [10]:
def predict(net, char, h=None, top_k=None):
        ''' Given a character, predict the next character.
            Returns the predicted character and the hidden state.
        '''
        
        # tensor inputs
        x = np.array([[net.char2int[char]]])
        x = one_hot_encode(x, len(net.chars))
        inputs = torch.from_numpy(x)
        
        if(train_on_gpu):
            inputs = inputs.cuda()
        
        # detach hidden state from history
        h = tuple([each.data for each in h])
        # get the output of the model
        out, h = net(inputs, h)

        # get the character probabilities
        p = F.softmax(out, dim=1).data
        if(train_on_gpu):
            p = p.cpu() # move to cpu
        
        # get top characters
        if top_k is None:
            top_ch = np.arange(len(net.chars))
        else:
            p, top_ch = p.topk(top_k)
            top_ch = top_ch.numpy().squeeze()
        
        # select the likely next character with some element of randomness
        p = p.numpy().squeeze()
        char = np.random.choice(top_ch, p=p/p.sum())
        
        # return the encoded value of the predicted char and the hidden state
        return net.int2char[char], h

In [11]:
def sample(net, size, prime='The', top_k=None):
        
    if(train_on_gpu):
        net.cuda()
    else:
        net.cpu()
    
    net.eval() # eval mode
    
    # First off, run through the prime characters
    chars = [ch for ch in prime]
    h = net.init_hidden(1)
    for ch in prime:
        char, h = predict(net, ch, h, top_k=top_k)

    chars.append(char)
    
    # Now pass in the previous character and get a new one
    for ii in range(size):
        char, h = predict(net, chars[-1], h, top_k=top_k)
        chars.append(char)

    return ''.join(chars)

In [17]:
print(sample(net, 1, prime='appl', top_k=5))

applit


In [110]:
torch.save(net, 'output/model/twolayLSTM.pth') 

In [6]:
model=torch.load('output/model/twolayLSTM.pth') 

In [112]:
print(sample(model, 1, prime='appl', top_k=5))

apples


In [7]:
torch.save(model.state_dict(), 'twolayLSTM_params.pth')

In [20]:
import requests
import numpy as np

  # "https://github.com/pradeepradyumna/SampleData/blob/master/sampledata.txt")

data = "apple banana orange black yellow good bad right wrong blue green white think thought wonderful"


# Markov Chains Algorithm

def generatetable(data, k):
    T = {}
    for i in range(len(data)-k):
        x = data[i:i+k]
        y = data[i+k]

        if T.get(x) is None:
            T[x] = {}
            T[x][y] = 1
        else:
            if T[x].get(y) is None:
                T[x][y] = 1
            else:
                T[x][y] += 1
    return T


k = 5
inital_content = "right"

T = generatetable(data.lower(), k)

for i in range(len(data)):
    inp = inital_content[-k:]

    possible_chars = list(T[inp].keys())
    possible_values = list(T[inp].values())

    sum_ = sum(T[inp].values())

    probabs = np.array(possible_values)/sum_

    next_char = np.random.choice(possible_chars, p=probabs)

    inital_content += next_char

    print(inital_content)

right 
right w
right wo
right won
right wond
right wonde
right wonder
right wonderf
right wonderfu
right wonderful


KeyError: 'erful'