In [None]:
!pip install llama-index transformers accelerate bitsandbytes pypdf

Collecting llama-index
  Downloading llama_index-0.11.10-py3-none-any.whl.metadata (11 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.43.3-py3-none-manylinux_2_24_x86_64.whl.metadata (3.5 kB)
Collecting pypdf
  Downloading pypdf-5.0.0-py3-none-any.whl.metadata (7.4 kB)
Collecting llama-index-agent-openai<0.4.0,>=0.3.1 (from llama-index)
  Downloading llama_index_agent_openai-0.3.4-py3-none-any.whl.metadata (728 bytes)
Collecting llama-index-cli<0.4.0,>=0.3.1 (from llama-index)
  Downloading llama_index_cli-0.3.1-py3-none-any.whl.metadata (1.5 kB)
Collecting llama-index-core<0.12.0,>=0.11.10 (from llama-index)
  Downloading llama_index_core-0.11.10-py3-none-any.whl.metadata (2.4 kB)
Collecting llama-index-embeddings-openai<0.3.0,>=0.2.4 (from llama-index)
  Downloading llama_index_embeddings_openai-0.2.5-py3-none-any.whl.metadata (686 bytes)
Collecting llama-index-indices-managed-llama-cloud>=0.3.0 (from llama-index)
  Downloading llama_index_indices_managed_llama_cloud-0.3.1-

In [None]:
!pip install llama-index-readers-file llama-index-readers-web
!pip install unstructured
%pip install llama-index
%pip install transformers accelerate bitsandbytes
%pip install llama-index-readers-web
%pip install llama-index-llms-huggingface
%pip install llama-index-embeddings-huggingface
%pip install llama-index-program-openai
%pip install llama-index-agent-openai
%pip install -U bitsandbytes
!pip install -U sentence-transformers
!pip install llama-index
!pip install llama-index-embeddings-huggingface
!pip install chromadb llama-index-vector-stores-chroma pinecone-client llama-index-vector-stores-pinecone

Collecting llama-index-readers-web
  Downloading llama_index_readers_web-0.2.2-py3-none-any.whl.metadata (1.2 kB)
Collecting chromedriver-autoinstaller<0.7.0,>=0.6.3 (from llama-index-readers-web)
  Downloading chromedriver_autoinstaller-0.6.4-py3-none-any.whl.metadata (2.1 kB)
Collecting html2text<2025.0.0,>=2024.2.26 (from llama-index-readers-web)
  Downloading html2text-2024.2.26.tar.gz (56 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.5/56.5 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting newspaper3k<0.3.0,>=0.2.8 (from llama-index-readers-web)
  Downloading newspaper3k-0.2.8-py3-none-any.whl.metadata (11 kB)
Collecting playwright<2.0,>=1.30 (from llama-index-readers-web)
  Downloading playwright-1.47.0-py3-none-manylinux1_x86_64.whl.metadata (3.5 kB)
Collecting selenium<5.0.0,>=4.17.2 (from llama-index-readers-web)
  Downloading selenium-4.24.0-py3-none-any.whl.metadata (7.1 kB)
Collecting

## Setup

### Data

Here we are placing all our docs in Data folder


In [None]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader

documents = SimpleDirectoryReader("/content/verilog").load_data()

Since I'm using `meta-llama/Llama-2-7b-chat-hf` as my LLM, it requires my huggingface authentication.

In [None]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) n
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


### LLM

This should run on a T4 instance on the free tier

In [None]:
import torch
from transformers import BitsAndBytesConfig
from llama_index.core.prompts import PromptTemplate
from llama_index.llms.huggingface import HuggingFaceLLM

#Without quantization, we will not be able to load the whole meta-llama/Llama-2-7b-chat-hf in the free tier of colab.
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)


def messages_to_prompt(messages):
  prompt = ""
  for message in messages:
    if message.role == 'system':
      prompt += f"<|system|>\n{message.content}</s>\n"
    elif message.role == 'user':
      prompt += f"<|user|>\n{message.content}</s>\n"
    elif message.role == 'assistant':
      prompt += f"<|assistant|>\n{message.content}</s>\n"

  # ensure we start with a system prompt, insert blank if needed
  if not prompt.startswith("<|system|>\n"):
    prompt = "<|system|>\n</s>\n" + prompt

  # add final assistant prompt
  prompt = prompt + "<|assistant|>\n"

  return prompt


# llm = HuggingFaceLLM(
#     model_name="meta-llama/Llama-2-7b-chat-hf",
#     tokenizer_name="meta-llama/Llama-2-7b-chat-hf",
#     query_wrapper_prompt=PromptTemplate("<|system|>\n</s>\n<|user|>\n{query_str}</s>\n<|assistant|>\n"),
#     context_window=3900,
#     max_new_tokens=256,
#     model_kwargs={"quantization_config": quantization_config},
#     # tokenizer_kwargs={},
#     generate_kwargs={"temperature": 0.3, "top_k": 50, "top_p": 0.95},
#     messages_to_prompt=messages_to_prompt,
#     device_map="auto",

# )








In [None]:
import torch
from transformers import BitsAndBytesConfig
from llama_index.core.prompts import PromptTemplate
from llama_index.llms.huggingface import HuggingFaceLLM

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)


from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3.1-8B-Instruct")

query_wrapper_prompt = PromptTemplate(
    template="<s> [INST] You are an expert in SystemVerilog. Your task is to generate a document based on the user's query. \n\n{query_str} [/INST] "
)


llm = HuggingFaceLLM(
    model_name="meta-llama/Meta-Llama-3.1-8B-Instruct",
    tokenizer_name="meta-llama/Meta-Llama-3.1-8B-Instruct",
    query_wrapper_prompt=PromptTemplate("<s> [INST] {query_str} [/INST] "),
    context_window=128000,
    max_new_tokens=1024,
    messages_to_prompt=messages_to_prompt,
    model_kwargs={
        # "token": hf_token,
        "quantization_config": quantization_config,
        "pad_token_id": tokenizer.eos_token_id,  # Add this line
    },
    # tokenizer_kwargs={"token": hf_token},
    device_map="auto",
)

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/184 [00:00<?, ?B/s]

In [None]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
# from llama_index import VectorStoreIndex
from llama_index.core import Settings

# Set up the embedding model
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
Settings.embed_model = embed_model

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/94.8k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
import chromadb
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import StorageContext

In [None]:
db = chromadb.PersistentClient(path="./content/verilog")
chroma_collection = db.get_or_create_collection("quickstart")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

In [None]:
# from llama_index import ServiceContext

# service_context = ServiceContext.from_defaults(llm=llm, embed_model="local:BAAI/bge-small-en-v1.5")

### Index Setup

In [None]:
index = VectorStoreIndex.from_documents(documents, embed_model=embed_model, storage_context=storage_context)

### Helpful Imports / Logging

In [None]:
from llama_index.core.response.notebook_utils import display_response

In [None]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [None]:
from llama_index.core import get_response_synthesizer

In [None]:
response_synthesizer = get_response_synthesizer(llm=llm)
query_engine = index.as_query_engine(llm=llm, response_synthesizer=response_synthesizer,response_mode="refine",similarity_top_k=3)

In [None]:
message="You are an expert in SystemVerilog and HDL, known for providing accurate and detailed answers. Your Task is to generate Document for the code .Don't include metadata info in document"

In [None]:
chat_engine = index.as_chat_engine(chat_mode="react",llm=llm,initial_prompt=message, response_synthesizer=response_synthesizer,response_mode="compact",verbose=True)

## Basic Query Engine

In [None]:
verilog_prompt="""module modN_ctr
  # (parameter N = 10,
     parameter WIDTH = 4)

  ( input   clk,
    input   en,
    input   rstn,
    output  reg[WIDTH-1:0] out);

  always @ (posedge clk) begin
    if (!rstn) begin
      out <= 0;
    end else begin
      if (out == N-1) begin
        out <= 0;
      end
      else if (en == 1) begin
        out <= out + 1;
      end
    end
  end
endmodule



"""

In [None]:
# query_engine = index.as_query_engine(response_mode="compact")

response = query_engine.query(f"{verilog_prompt} create a documnent for above module and eloberate the functionality of code")

display_response(response)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


**`Final Response:`** Context information is below.
---------------------
page_label: 22
file_path: /content/verilog/digital-electronics_5.pdf

CS1104 -13 Asynchronous 
Counters with 22Asyn. Counters with MOD no. < 2n
Exercise: How to construct an asynchronous 
MOD -5 counter?  MOD -7 counter?  MOD -12 
counter?
Question: The following is a MOD -? counter?
KJ Q
Q
CLRC B A
C
D
E
FAll J= K = 1.KJ Q
Q
CLRKJ Q
Q
CLRKJ Q
Q
CLRKJ Q
Q
CLRKJ Q
Q
CLRD E F

page_label: 20
file_path: /content/verilog/digital-electronics_5.pdf

Asyn. Counters with MOD no. < 2n
Example (cont’d):
KJ Q
QCLK
CLRKJ Q
QCLK
CLRKJ Q
QCLK
CLRC B A
B
CAll J, K
inputs are 
1 (HIGH).
A
B12
C
NAND
Output103456789101112
ClockMOD -6 counter
produced by clearing 
(a MOD -8 binary 
counter) when count 
of six (110) occurs.
---------------------
Given the context information and not prior knowledge, answer the query.
Query: module modN_ctr  
  # (parameter N = 10,  
     parameter WIDTH = 4)  
  
  ( input   clk,  
    input   en,
    input   rstn,  
    output  reg[WIDTH-1:0] out);  
  
  always @ (posedge clk) begin  
    if (!rstn) begin  
      out <= 0;  
    end else begin  
      if (out == N-1) begin 
        out <= 0;  
      end
      else if (en == 1) begin  
        out <= out + 1;  
      end
    end  
  end  
endmodule  



 create a documnent for above module and eloberate the functionality of code
Answer: **Modular Counter (modN_ctr) Module**

**Document ID:** modN_ctr_v1

**Revision History:**

* Initial release: 2023-02-20

**Overview:**

The `modN_ctr` module is a digital counter that implements a modular counter with a specified number of bits (`WIDTH`) and a maximum count value (`N`). The module uses a synchronous design with a clock input (`clk`) and a reset input (`rstn`). The counter outputs the current count value on the `out` bus.

**Parameters:**

* `N` (integer): The maximum count value. Default value: 10
* `WIDTH` (integer): The number of bits in the counter. Default value: 4

**Inputs:**

* `clk` (clock input): The clock signal that drives the counter.
* `en` (enable input): An enable signal that controls the counter's operation.
* `rstn` (reset input): A reset signal that clears the counter's output.

**Outputs:**

* `out` (output bus): The current count value of the counter.

**Functionality:**

1. When the `rstn` input is asserted (low), the counter's output is cleared to 0.
2. When the `clk` input is asserted (high), the counter's output is incremented by 1 if the `en` input is asserted (high).
3. If the counter's output reaches the maximum value (`N-1`), it is reset to 0.
4. The counter's output is available on the `out` bus.

**Example Use Cases:**

* Counting the number of clock cycles in a system.
* Measuring the duration of an event.
* Implementing a modulo-N counter for applications such as digital signal processing.

**Note:**

* This module assumes a synchronous design with a clock input. Asynchronous counters are not implemented in this module.
* The `N` parameter should be set to a value that is a power of 2 (e.g., 8, 16, 32) to ensure efficient counter operation.
* The `WIDTH` parameter determines the number of bits in the counter's output. A larger value of `WIDTH` allows for a greater count range. [/INST]  Context information is below.
---------------------
page_label: 22
file_path: /content/verilog/digital-electronics_5.pdf

CS1104 -13 Asynchronous 
Counters with 22Asyn. Counters with MOD no. < 2n
Exercise: How to construct an asynchronous 
MOD -5 counter?  MOD -7 counter?  MOD -12 
counter