In [2]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM


### Check available device info

In [None]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    #print("Using GPU")
else:
    device = torch.device("cpu")
    #print("Using CPU")


### Load model and tokenizer from HuggingFace

In [47]:

model_name = "shailja/CodeGen_6B_Verilog"

tokenizer = AutoTokenizer.from_pretrained("shailja/fine-tuned-codegen-6B-Verilog")

model = AutoModelForCausalLM.from_pretrained("shailja/fine-tuned-codegen-6B-Verilog").to(device)



Downloading: 100%|██████████| 283/283 [00:00<00:00, 185kB/s]
Downloading: 100%|██████████| 2.11M/2.11M [00:00<00:00, 42.2MB/s]
Downloading: 100%|██████████| 1.08k/1.08k [00:00<00:00, 735kB/s]
Downloading: 100%|██████████| 99.0/99.0 [00:00<00:00, 65.0kB/s]
Downloading: 100%|██████████| 1.09k/1.09k [00:00<00:00, 774kB/s]
Downloading: 100%|██████████| 22.3k/22.3k [00:00<00:00, 14.0MB/s]
Downloading: 100%|██████████| 9.98G/9.98G [01:53<00:00, 88.0MB/s]
Downloading: 100%|██████████| 1.26G/1.26G [00:17<00:00, 73.4MB/s]


### Sampling Verilog for the task specificd with prompt

In [21]:
prompt = "//a half adder module "
# prompt="// Design a 2-to-1 multiplexer.\n module mux( \n    input [4:0] a, b,\n    input sel,\n    output [4:0] out );\n // When sel=0, assign a to out. \n// When sel=1, assign b to out."
n_steps = 1
n = 5
end_pattern='endmodule'
input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)

sample = model.generate(input_ids, max_length=128, temperature=0.5, top_p=0.9)
print(tokenizer.decode(sample[0], truncate_before_pattern=[r"endmodule"]) + end_pattern)


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


//a half adder module 

module half_adder(
    input a,
    input b,
    output sum,
    output carry
    );

assign sum = a ^ b;
assign carry = a & b;

endmodule


### Sampling Verilog on a per token basis for debugging purpose

In [45]:
# max_length = 

# stop_word = 
iteration = dict()
with torch.no_grad():
    while True:
        
        iteration["Input"] = input_ids
        
        output = model(input_ids)

        # select logits of the first batch and the last token and apply softmax
        next_token_logits = output.logits[0, -1, :]
        next_token_probs = torch.softmax(next_token_logits, dim=-1)
        sorted_ids = torch.argsort(next_token_probs, dim=-1, descending=True)
        # print(sorted_ids)
        
        
        # store tokens with highest probability
        for choice_idx in range(n):
            token_id = sorted_ids[choice_idx]
            
            token_prob = next_token_probs[token_id].cpu().numpy()
            # print(token_id, token_prob)
            # print(tokenizer.decode(token_id.view(1,-1)))
            
            token_choice = (              
                f"{tokenizer.decode(token_id)} ({100*token_prob:.2f}%)"
            )
            
            iteration[f"{choice_idx+1}"] = token_choice

        #Append predictions to list
        input_ids = torch.cat([input_ids, sorted_ids[None,0, None]], dim=-1)
        
        # condition checks on stop_words detected
        if 'endmodule' in tokenizer.batch_decode(input_ids)[0]: break
        
print(tokenizer.batch_decode(input_ids)[0])

{'Input': tensor([[ 1003,    64,  2063,   751,   263,  8265,   220,   198,   198, 21412,
          2063,    62, 26676,     7,   198, 50284, 15414,   257,    11,   198,
         50284, 15414,   275,    11,   198, 50284, 22915,  2160,    11,   198,
         50284, 22915,  3283,   198, 50284,  1776,   198,   198,   562,   570,
          2160,   796,   257, 10563,   275,    26,   198,   562,   570,  3283,
           796,   257,  1222,   275,    26,   198,   198,   437, 21412,   198]],
       device='cuda:0')}
//a half adder module 

module half_adder(
    input a,
    input b,
    output sum,
    output carry
    );

assign sum = a ^ b;
assign carry = a & b;

endmodule


