In [1]:
import sys, os
import json
os.environ["CUDA_VISIBLE_DEVICES"] = "6"

from flask import Flask, request
from transformers import (LlamaForCausalLM, CodeLlamaTokenizer,
                          BitsAndBytesConfig)
import torch

from data_management.lm_example import LmExample 
from tactic_gen.train_codellama import (collate_input, CONF_NAME, load_config,
                                        get_tokenizer)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
MODEL_LOC = "/home/ubuntu/coq-modeling/models/codellama-7b-basic"
CHECKPOINT_NUM = 1800 

model_path = os.path.join(MODEL_LOC, f"checkpoint-{CHECKPOINT_NUM}")
model_conf = load_config(os.path.join(MODEL_LOC, CONF_NAME))

quantization_config = BitsAndBytesConfig(load_in_4bit=True)
model = LlamaForCausalLM.from_pretrained(
    model_path, quantization_config=quantization_config
)
tokenizer = get_tokenizer(model_conf) 
tokenizer.add_eos_token = False # Don't add eos to input during inference
max_input_len = model_conf["max_input_len"]
device = "cuda" 

Loading checkpoint shards: 100%|██████████| 2/2 [00:12<00:00,  6.06s/it]


In [4]:
test_in = """\
Theorem mult_0_plus : ∀ n m : nat, 0 + (S n * m) = S n * m.<THM-SEP>

∀ n m : nat, 0 + S n * m = S n * m""" 
collated_in = collate_input(tokenizer, max_input_len, test_in)
input_ids = tokenizer(collated_in, return_tensors="pt")["input_ids"].to("cuda")
output = model.generate(
    input_ids,
    num_beams=10,
    num_return_sequences=10,
    max_new_tokens=200,
    output_scores=True,
    return_dict_in_generate=True,
)


ValueError: The following `model_kwargs` are not used by the model: ['tokennizer'] (note: typos in the generate arguments will also show up in this list)

In [8]:
output

BeamSearchDecoderOnlyOutput(sequences=tensor([[    1, 10244,  1773, 29918, 29900, 29918, 11242,   584, 29871, 30315,
           302,   286,   584, 14033, 29892, 29871, 29900,   718,   313, 29903,
           302,   334,   286, 29897,   353,   317,   302,   334,   286, 19423,
          4690, 29924, 29899,  1660, 29925, 29958,    13,    13, 30315,   302,
           286,   584, 14033, 29892, 29871, 29900,   718,   317,   302,   334,
           286,   353,   317,   302,   334,   286,    13, 29966,  6040,  1783,
          2965, 29958,    13,    13, 29871, 25956,   302,   286, 29889, 29871,
             2],
        [    1, 10244,  1773, 29918, 29900, 29918, 11242,   584, 29871, 30315,
           302,   286,   584, 14033, 29892, 29871, 29900,   718,   313, 29903,
           302,   334,   286, 29897,   353,   317,   302,   334,   286, 19423,
          4690, 29924, 29899,  1660, 29925, 29958,    13,    13, 30315,   302,
           286,   584, 14033, 29892, 29871, 29900,   718,   317,   302,   33

In [5]:
sequences = output.sequences
sequences_scores = output.sequences_scores
for sequence, score in zip(sequences, sequences_scores):
    print((tokenizer.decode(sequence[input_ids.shape[1]:], skip_special_tokens=True), score))


('\n  intros n m. ', tensor(-0.0236, device='cuda:0'))
('intros n m. ', tensor(-0.0251, device='cuda:0'))
('intros. ', tensor(-0.0316, device='cuda:0'))
('\n  intros n m.', tensor(-0.0350, device='cuda:0'))
('\n  intros. ', tensor(-0.0460, device='cuda:0'))
('\nintros n m. ', tensor(-0.0544, device='cuda:0'))
('\nintros n m.', tensor(-0.0620, device='cuda:0'))
('\n    intros n m. ', tensor(-0.0632, device='cuda:0'))
('intros n m.', tensor(-0.0639, device='cuda:0'))
('\n  intros.', tensor(-0.0656, device='cuda:0'))


In [7]:
tokenizer.

71

In [5]:
output

BeamSearchDecoderOnlyOutput(sequences=tensor([[    1, 10244,  1773, 29918, 29900, 29918, 11242,   584, 29871, 30315,
           302,   286,   584, 14033, 29892, 29871, 29900,   718,   313, 29903,
           302,   334,   286, 29897,   353,   317,   302,   334,   286, 19423,
          4690, 29924, 29899,  1660, 29925, 29958,    13,    13, 30315,   302,
           286,   584, 14033, 29892, 29871, 29900,   718,   317,   302,   334,
           286,   353,   317,   302,   334,   286,    13, 29966,  6040,  1783,
          2965, 29958,    13,    13, 29871, 25956,   302,   286, 29889, 29871,
             2],
        [    1, 10244,  1773, 29918, 29900, 29918, 11242,   584, 29871, 30315,
           302,   286,   584, 14033, 29892, 29871, 29900,   718,   313, 29903,
           302,   334,   286, 29897,   353,   317,   302,   334,   286, 19423,
          4690, 29924, 29899,  1660, 29925, 29958,    13,    13, 30315,   302,
           286,   584, 14033, 29892, 29871, 29900,   718,   317,   302,   33

In [6]:
tokenizer.decode(output.sequences[0])

'<s> Theorem mult_0_plus : ∀ n m : nat, 0 + (S n * m) = S n * m.<THM-SEP>\n\n∀ n m : nat, 0 + S n * m = S n * m\n<TACTIC>\n\n  intros n m. </s>'

In [7]:
tokenizer.decode(output.sequences[1])

'<s> Theorem mult_0_plus : ∀ n m : nat, 0 + (S n * m) = S n * m.<THM-SEP>\n\n∀ n m : nat, 0 + S n * m = S n * m\n<TACTIC>\n\n  intros n m.</s></s>'

In [8]:
output.scores

(tensor([[-19.2553, -22.7084,  -6.9077,  ..., -15.1057, -14.6581, -15.3577],
         [-19.2553, -22.7084,  -6.9077,  ..., -15.1057, -14.6581, -15.3577],
         [-19.2553, -22.7084,  -6.9077,  ..., -15.1057, -14.6581, -15.3577],
         ...,
         [-19.2553, -22.7084,  -6.9077,  ..., -15.1057, -14.6581, -15.3577],
         [-19.2553, -22.7084,  -6.9077,  ..., -15.1057, -14.6581, -15.3577],
         [-19.2553, -22.7084,  -6.9077,  ..., -15.1057, -14.6581, -15.3577]],
        device='cuda:0'),
 tensor([[-16.2049, -20.7909,  -7.4159,  ..., -14.8990, -13.2792, -15.3717],
         [-19.4558, -20.5789,  -4.5457,  ..., -16.8198, -17.0659, -17.9211],
         [-20.5151, -24.3491, -11.3335,  ..., -20.2546, -18.9214, -20.8530],
         ...,
         [-25.2098, -24.3875, -10.0008,  ..., -24.1531, -21.9400, -22.8440],
         [-24.5632, -25.1726,  -7.6961,  ..., -20.4256, -18.5867, -20.0496],
         [-24.8365, -25.4459, -13.8365,  ..., -20.8380, -18.9830, -20.7667]],
        device='cuda