In [1]:
import sys, os
import json
os.environ["CUDA_VISIBLE_DEVICES"] = "6"

from flask import Flask, request
from transformers import (LlamaForCausalLM, CodeLlamaTokenizer,
                          BitsAndBytesConfig)
import torch

from data_management.lm_example import LmExample 
from tactic_gen.train_codellama import (collate_input, CONF_NAME, load_config,
                                        get_tokenizer)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
MODEL_LOC = "/home/ubuntu/coq-modeling/models/codellama-7b-basic"
CHECKPOINT_NUM = 1800 

model_path = os.path.join(MODEL_LOC, f"checkpoint-{CHECKPOINT_NUM}")
model_conf = load_config(os.path.join(MODEL_LOC, CONF_NAME))

quantization_config = BitsAndBytesConfig(load_in_4bit=True)
model = LlamaForCausalLM.from_pretrained(
    model_path, quantization_config=quantization_config
)
tokenizer = get_tokenizer(model_conf) 
tokenizer.add_eos_token = False # Don't add eos to input during inference
max_input_len = model_conf["max_input_len"]
device = "cuda" 

Loading checkpoint shards: 100%|██████████| 2/2 [00:12<00:00,  6.06s/it]


In [5]:
test_in = """\
Theorem mult_0_plus : ∀ n m : nat, 0 + (S n * m) = S n * m.<THM-SEP>

∀ n m : nat, 0 + S n * m = S n * m""" 
collated_in = collate_input(tokenizer, max_input_len, test_in)
input_ids = tokenizer(collated_in, return_tensors="pt")["input_ids"].to("cuda")
output = model.generate(
    input_ids,
    num_beams=10,
    num_return_sequences=10,
    max_new_tokens=200,
    output_scores=True,
    return_dict_in_generate=True,
    pad_token_id=tokenizer.pad_token_id,
)




In [None]:
output

In [None]:
output[0].to

In [10]:
num_padding_tokens = (output.sequences == tokenizer.pad_token_id).sum(axis=1)
output.sequences.shape[1] - input_ids.shape[1] - num_padding_tokens

tensor([7, 8, 6, 7, 6, 5, 4, 8, 6, 7], device='cuda:0')

In [13]:
output.sequences[1][input_ids.shape[1]:]

tensor([   13, 29871, 25956,   302,   286, 29889, 29871,     2],
       device='cuda:0')

In [15]:
type(output)

transformers.generation.utils.BeamSearchDecoderOnlyOutput

In [14]:
tokenizer.batch_decode(output.sequences[:, input_ids.shape[1]:], skip_special_tokens=True)

['\n  intros n m.',
 '\n  intros n m. ',
 'intros n m. ',
 '\n    intros n m.',
 '\n  intros. ',
 '\n  intros.',
 'intros. ',
 '\n    intros n m. ',
 'reflexivity. ',
 '\nintros n m.']

In [7]:
sequences = output.sequences
sequences_scores = output.sequences_scores
for sequence, score in zip(sequences, sequences_scores):
    print((tokenizer.decode(sequence[input_ids.shape[1]:], skip_special_tokens=False), score))


('\n  intros n m.</s>▁<PRE>', tensor(-0.0225, device='cuda:0'))
('\n  intros n m. </s>', tensor(-0.0266, device='cuda:0'))
('intros n m. </s>▁<PRE>▁<PRE>', tensor(-0.0381, device='cuda:0'))
('\n    intros n m.</s>▁<PRE>', tensor(-0.0473, device='cuda:0'))
('\n  intros. </s>▁<PRE>▁<PRE>', tensor(-0.0502, device='cuda:0'))
('\n  intros.</s>▁<PRE>▁<PRE>▁<PRE>', tensor(-0.0505, device='cuda:0'))
('intros. </s>▁<PRE>▁<PRE>▁<PRE>▁<PRE>', tensor(-0.0529, device='cuda:0'))
('\n    intros n m. </s>', tensor(-0.0546, device='cuda:0'))
('reflexivity. </s>▁<PRE>▁<PRE>', tensor(-0.0596, device='cuda:0'))
('\nintros n m.</s>▁<PRE>', tensor(-0.0643, device='cuda:0'))


In [7]:
tokenizer.

71

In [5]:
output

BeamSearchDecoderOnlyOutput(sequences=tensor([[    1, 10244,  1773, 29918, 29900, 29918, 11242,   584, 29871, 30315,
           302,   286,   584, 14033, 29892, 29871, 29900,   718,   313, 29903,
           302,   334,   286, 29897,   353,   317,   302,   334,   286, 19423,
          4690, 29924, 29899,  1660, 29925, 29958,    13,    13, 30315,   302,
           286,   584, 14033, 29892, 29871, 29900,   718,   317,   302,   334,
           286,   353,   317,   302,   334,   286,    13, 29966,  6040,  1783,
          2965, 29958,    13,    13, 29871, 25956,   302,   286, 29889, 29871,
             2],
        [    1, 10244,  1773, 29918, 29900, 29918, 11242,   584, 29871, 30315,
           302,   286,   584, 14033, 29892, 29871, 29900,   718,   313, 29903,
           302,   334,   286, 29897,   353,   317,   302,   334,   286, 19423,
          4690, 29924, 29899,  1660, 29925, 29958,    13,    13, 30315,   302,
           286,   584, 14033, 29892, 29871, 29900,   718,   317,   302,   33

In [6]:
tokenizer.decode(output.sequences[0])

'<s> Theorem mult_0_plus : ∀ n m : nat, 0 + (S n * m) = S n * m.<THM-SEP>\n\n∀ n m : nat, 0 + S n * m = S n * m\n<TACTIC>\n\n  intros n m. </s>'

In [7]:
tokenizer.decode(output.sequences[1])

'<s> Theorem mult_0_plus : ∀ n m : nat, 0 + (S n * m) = S n * m.<THM-SEP>\n\n∀ n m : nat, 0 + S n * m = S n * m\n<TACTIC>\n\n  intros n m.</s></s>'

In [8]:
output.scores

(tensor([[-19.2553, -22.7084,  -6.9077,  ..., -15.1057, -14.6581, -15.3577],
         [-19.2553, -22.7084,  -6.9077,  ..., -15.1057, -14.6581, -15.3577],
         [-19.2553, -22.7084,  -6.9077,  ..., -15.1057, -14.6581, -15.3577],
         ...,
         [-19.2553, -22.7084,  -6.9077,  ..., -15.1057, -14.6581, -15.3577],
         [-19.2553, -22.7084,  -6.9077,  ..., -15.1057, -14.6581, -15.3577],
         [-19.2553, -22.7084,  -6.9077,  ..., -15.1057, -14.6581, -15.3577]],
        device='cuda:0'),
 tensor([[-16.2049, -20.7909,  -7.4159,  ..., -14.8990, -13.2792, -15.3717],
         [-19.4558, -20.5789,  -4.5457,  ..., -16.8198, -17.0659, -17.9211],
         [-20.5151, -24.3491, -11.3335,  ..., -20.2546, -18.9214, -20.8530],
         ...,
         [-25.2098, -24.3875, -10.0008,  ..., -24.1531, -21.9400, -22.8440],
         [-24.5632, -25.1726,  -7.6961,  ..., -20.4256, -18.5867, -20.0496],
         [-24.8365, -25.4459, -13.8365,  ..., -20.8380, -18.9830, -20.7667]],
        device='cuda