## Quantization for LM

Reducing the size of Neural Networks is called Network Quantizaiton where the "quantization" meaning number of bits representing a number.

**nn.Module** vs **nn.Functional**
The technique we are going to use for quantization called "net-aware quantizaiton" is 1:1 mapping of operations and to model. So we cannot re-use operations.

In [1]:
import os
from io import open
import time

import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
class LSTMModel(nn.Module):
    def __init__(self, ntoken, ninp, nhid, nlayers, dropout=0.5):
        super(LSTMModel, self).__init__()
        
        self.drop = nn.Dropout(dropout)
        self.encoder = nn.Embedding(ntoken,ninp)
        self.rnn = nn.LSTM(ninp, nhid, ntoken, dropout=dropout)
        self.decoder = nn.Linear(nhid,ntoken)
        
        self.init_weights()

        self.nhid = nhid
        self.nlayers = nlayers
        
    def init_weights(self):
        initrange = 0.1
        self.encoder.weight.data.uniform_(-initrange,initrange)
        self.decoder.bias.data.zero_()
        self.decoder.weight.data.uniform_(-initrange,initrange)
        
    
    def forward(self,x, hidden):
        emb = self.drop(self.encoder(input))
        output, hidden = self.rnn(emb,hidden)
        output = self.drop(output)
        output = self.decoder(output)
    
        return output
    def init_hidden(self, bsize):
        weight = next(self.parameters())
        return (weight.new_zeros(self.nlayers, bsize, self.nhid), weight.new_zeros(self.nlayers, bsize, self.nhid))
        

We load wikitext from the torchtext module



In [3]:
class Dictionary(object):
    def __init__(self):
        self.word2idx = {}
        self.idx2word = []
        
    def add_word(self, word):
        if word not in self.word2idx.keys():
            self.idx2word.append(word)
            self.word2idx[word] = len(self.idx2word) -1
        return self.word2idx[word]
    def __len__(self):
        return len(self.idx2word)
    
class Corpus(object):
    def __init__(self, path):
        self.dictionary = Dictionary()
        self.train = self.tokenize(os.path.join(path, 'train.txt'))
        self.valid = self.tokenize(os.path.join(path, 'valid.txt'))
        self.test = self.tokenize(os.path.join(path, 'test.txt'))
    
    def tokenize(self, path):
        
        assert os.path.exists(path)
        
        with open(path, 'r', encoding='utf-8') as f:
            for line in f:
                words = line.split() + ['<eos>']
                for word in words:
                    self.dictionary.add_word(word)
        with open(path, 'r', encoding="utf8") as f:
            idss = []
            for line in f:
                words = line.split() + ['<eos>']
                ids = []
                for word in words:
                    ids.append(self.dictionary.word2idx[word])
                    
                idss.append(torch.tensor(ids).type(torch.int64))
            ids = torch.cat(idss)
        return ids
    
model_data_filepath = 'data/'

corpus = Corpus('./wikitext-2')
                    
 
        
            

In [5]:
ntokens = len(corpus.dictionary)

model = LSTMModel(ntoken=ntokens,ninp=512, nhid=256, nlayers=5)

model.load_state_dict(
    torch.load(
        'word_language_model_quantize.pth',
        map_location=torch.device('cpu')
        )
    )

model.eval()
print(model)

KeyboardInterrupt: 