# App is good to go! start it with `streamlit run app.py` but we are to test the backend first

we are but to follow the same, right steps as we did for Auto_complete_psk.ipynb to test the backend

In [1]:
import torch
import torch.nn as nn
import math

In [2]:
device = torch.device('cpu')

### Loading Vocab ( of course all that tokenizing and stuff is doen and we just have to load the path to the vocab object)

In [3]:
vocab = torch.load('vocab_obj.pth')

### Model

In [4]:
class LSTMLanguageModel(nn.Module):
    def __init__(self, vocab_size, emb_dim, hid_dim, num_layers, dropout_rate):
                
        super().__init__()
        self.num_layers = num_layers
        self.hid_dim = hid_dim
        self.emb_dim = emb_dim

        self.embedding = nn.Embedding(vocab_size, emb_dim)
        self.lstm = nn.LSTM(emb_dim, hid_dim, num_layers=num_layers, 
                    dropout=dropout_rate, batch_first=True)
        self.dropout = nn.Dropout(dropout_rate)
        self.fc = nn.Linear(hid_dim, vocab_size)
        
        self.init_weights()
        
    def init_weights(self):
        init_range_emb = 0.1
        init_range_other = 1/math.sqrt(self.hid_dim)
        self.embedding.weight.data.uniform_(-init_range_emb, init_range_emb)
        self.fc.weight.data.uniform_(-init_range_other, init_range_other)
        self.fc.bias.data.zero_()
        for i in range(self.num_layers):
            self.lstm.all_weights[i][0] = torch.FloatTensor(self.emb_dim,
                    self.hid_dim).uniform_(-init_range_other, init_range_other) 
            self.lstm.all_weights[i][1] = torch.FloatTensor(self.hid_dim, 
                    self.hid_dim).uniform_(-init_range_other, init_range_other) 

    def init_hidden(self, batch_size, device):
        hidden = torch.zeros(self.num_layers, batch_size, self.hid_dim).to(device)
        cell   = torch.zeros(self.num_layers, batch_size, self.hid_dim).to(device)
        return hidden, cell
    
    def detach_hidden(self, hidden):
        hidden, cell = hidden
        hidden = hidden.detach()
        cell = cell.detach()
        return hidden, cell

    def forward(self, src, hidden):
        #src: [batch size, seq len]
        embedding = self.dropout(self.embedding(src))
        #embedding: [batch size, seq len, emb_dim]
        output, hidden = self.lstm(embedding, hidden)      
        #output: [batch size, seq len, hid_dim]
        #hidden = h, c = [num_layers * direction, seq len, hid_dim)
        output = self.dropout(output) 
        prediction = self.fc(output)
        #prediction: [batch size, seq_len, vocab size]
        return prediction, hidden

In [5]:
vocab_size = len(vocab)
emb_dim = 1024
hid_dim = 1024
num_layers = 2
dropout_rate = 0.65

In [6]:
model = LSTMLanguageModel(vocab_size, emb_dim, hid_dim, num_layers, dropout_rate)

### Loading Learned Weights

In [7]:
path = './models/best-val-auto.pt' # <--------------- Change file path before submission

In [8]:
model.load_state_dict(torch.load(path, map_location=torch.device('cpu')))

<All keys matched successfully>

# Inference/Testing

In [9]:
import tokenize
import io

In [10]:
tok_name = tokenize.tok_name

In [11]:
def python_code_tokenizer(content):
    tokenized_code = []
    
    try:
        for token in tokenize.generate_tokens(io.StringIO(content).readline):
            encoding = tok_name[token.type]
            if encoding == "COMMENT" or encoding== "NL":
                continue
            elif encoding == "NUMBER":
                tokenized_code.append("<NUMBER>")
            elif encoding == "STRING":
                tokenized_code.append("<STRING>")
            else:
                tokenized_code.append(token.string)
    except:
        return []
    
    return tokenized_code

In [12]:
def generate(prompt, max_seq_len, temperature, model, tokenizer, vocab, device, seed=None):
    if seed is not None:
        torch.manual_seed(seed)
    model.eval()
    tokens = tokenizer(prompt)
    indices = [vocab[t] for t in tokens]
    batch_size = 1
    hidden = model.init_hidden(batch_size, device)
    with torch.no_grad():
        for i in range(max_seq_len):
            src = torch.LongTensor([indices]).to(device)
            prediction, hidden = model(src, hidden)
            
            #prediction: [batch size, seq len, vocab size]
            #prediction[:, -1]: [batch size, vocab size] #probability of last vocab
            
            probs = torch.softmax(prediction[:, -1] / temperature, dim=-1)  
            prediction = torch.multinomial(probs, num_samples=1).item()    
            
            while prediction == vocab['<unk>']: #if it is unk, we sample again
                prediction = torch.multinomial(probs, num_samples=1).item()

            if prediction == vocab['<eos>']:    #if it is eos, we stop
                break

            indices.append(prediction) #autoregressive, thus output becomes input

    itos = vocab.get_itos()
    tokens = [itos[i] for i in indices]
    return tokens

In [13]:
prompt = 'import'
max_seq_len = 10
seed = 0

#smaller the temperature, more diverse tokens but comes 
#with a tradeoff of less-make-sense sentence
temperatures = [0.5, 0.7, 0.75, 0.8, 1.0]
for temperature in temperatures:
    generation = generate(prompt, max_seq_len, temperature, model, python_code_tokenizer, 
                          vocab, device, seed)
    print(str(temperature)+'\n'+' '.join(generation)+'\n')

0.5
import   def run_task ( ) : 
      import importlib 


0.7
import   def samp_traj ( raw_data , w , len ( backbone

0.75
import   def samp_traj ( raw_data , w , len ( backbone

0.8
import   def samp_traj ( raw_data , w , len ( backbone

1.0
import   def samp_traj ( raw_data = decoded_preds , sketch_feat = t1



In [14]:
prompt = 'import torch.nn'
max_seq_len = 30
seed = 0

#smaller the temperature, more diverse tokens but comes 
#with a tradeoff of less-make-sense sentence
temperatures = [0.5, 0.7, 0.75, 0.8, 1.0]
for temperature in temperatures:
    generation = generate(prompt, max_seq_len, temperature, model, python_code_tokenizer, 
                          vocab, device, seed)
    print(' '.join(generation), end="\n\n")

import torch . nn   if torch . cuda . is_available ( ) and not os . path . isdir ( path ) and args . outf and not os . path . isdir (

import torch . nn   if torch . nn . Parameter ( torch . tensor ) , <STRING> : 
              if not self . cuda : 
              assert isinstance ( x , list

import torch . nn   if torch . nn . Parameter ( torch . tensor ) , <STRING> : 
              if not self . cuda : 
              assert isinstance ( model , nn

import torch . nn   if torch . nn . Parameter ( torch . tensor ) , <STRING> : 
              if not self . cuda : 
              assert state . dtype in storage

import torch . nn   if torch . nn . Parameter ( torch . tensor ) : 
              import pytest 
 from collections import random 
 import numpy as np 
 import pytest 


