# Check GPU

In [1]:
import torch
print("CUDA Available: ", torch.cuda.is_available())
print("CUDA Device Name: ", torch.cuda.get_device_name(0))
torch.cuda.empty_cache()

# Verify CUDA
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using Device: {device}")

CUDA Available:  True
CUDA Device Name:  NVIDIA GeForce RTX 3050 Ti Laptop GPU
Using Device: cuda


# Load TSpec Data

In [2]:
import os

def load_tspec_data(directory):
    """
    Loads the content of all .md files from a directory and its subdirectories.
    Returns a list of dictionaries with 'release', 'series', and 'content'.
    """
    data = []

    # Iterate over all releases and series
    for release in os.listdir(directory):
        release_path = os.path.join(directory, release)
        if os.path.isdir(release_path):
            for series in os.listdir(release_path):
                series_path = os.path.join(release_path, series)
                if os.path.isdir(series_path):
                    # Read the .md files within each series
                    for file in os.listdir(series_path):
                        if file.endswith('.md'):
                            file_path = os.path.join(series_path, file)
                            with open(file_path, 'r', encoding='utf-8') as f:
                                content = f.read()
                                data.append({
                                    "release": release,
                                    "series": series,
                                    "content": content
                                })
    return data


In [3]:
# Example usage
directory_path = '../Dataset/TSpec-LLM/3GPP-clean'
tspec_data = load_tspec_data(directory_path)

# Check an example
print(f"Total documents loaded: {len(tspec_data)}")
print(f"Sample document: {tspec_data[0]}")

Total documents loaded: 2788


In [4]:
type(tspec_data)

list

# Function to save and load data

In [2]:
import pickle

In [3]:
# Function to save chunks to a file
def save_chunks(chunks, filename):
    with open(filename, 'wb') as f:
        pickle.dump(chunks, f)
    print(f"Chunks saved to {filename}")

In [4]:
# Function to load chunks from a file
def load_chunks(filename):
    with open(filename, 'rb') as f:
        chunks = pickle.load(f)
    # print(f"Chunks loaded from {filename}")
    return chunks

# Build chunks

In [8]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Initialize the Text Splitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=2000,       # Maximum size of each chunk
    chunk_overlap=100,     # Amount of overlap between chunks
    separators=["\n\n", "\n", " ", ""]  # Separators for splitting
)

# Function to split text into chunks using RecursiveCharacterTextSplitter
def split_tspec_into_chunks(tspec_data):
    dataset_chunks = []

    for document in tspec_data:
        release = document['release']
        series = document['series']
        content = document['content']
        
        # Split the content into chunks
        chunks = text_splitter.split_text(content)
        for chunk in chunks:
            dataset_chunks.append({
                'release': release,
                'series': series,
                'text': chunk
            })
    
    return dataset_chunks


In [9]:
# Apply the splitting
tspec_chunks = split_tspec_into_chunks(tspec_data)

In [10]:
# Verification example
print(f"Total chunks created: {len(tspec_chunks)}")
print(f"Example chunk: {tspec_chunks[0]}")

Total chunks created: 841654


In [11]:
# chunks_path = r"../Files/tspec_chunks.pkl"
# save_chunks(tspec_chunks, chunks_path)

Chunks saved to ../Files/tspec_chunks.pkl


In [12]:
# # Extract all texts from the chunks
# all_texts = [chunk_data['text'] for chunk_data in tspec_chunks]
# chunks_text_path = r"../Files/tspec_chunks_texts.pkl"
# save_chunks(all_texts, chunks_text_path)

Chunks saved to ../Files/tspec_chunks_texts.pkl


In [13]:
from langchain.text_splitter import MarkdownHeaderTextSplitter

# Configure headers for splitting
headers_to_split_on = [
    ("#", "Header 1"),
    ("##", "Header 2"),
    ("###", "Header 3")
]

# Initialize the MarkdownHeaderTextSplitter
markdown_splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers_to_split_on, strip_headers=False)

# Configure the RecursiveCharacterTextSplitter
chunk_size = 2000
chunk_overlap = 100
text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)

# Function to divide content into chunks
def divide_into_chunks(tspec_data):
    dataset_chunks = []

    for document in tspec_data:
        release = document['release']
        series = document['series']
        content = document['content']
        
        # Split by Markdown headers
        header_chunks = markdown_splitter.split_text(content)
        # print(f"Header chunks: {header_chunks}")  # Debugging: see the header chunks
        
        # Further split the chunks by characters
        for header_chunk in header_chunks:
            # print(f"Header chunk content: {header_chunk.page_content}")  # Debugging: see the content of the chunk
            char_chunks = text_splitter.split_text(header_chunk.page_content)  # Accessing the content correctly
            for chunk in char_chunks:
                dataset_chunks.append({
                    'release': release,
                    'series': series,
                    'text': chunk
                })

    return dataset_chunks

In [14]:
# Apply the division
tspec_chunks = divide_into_chunks(tspec_data)

In [15]:
# Check the result
print(f"Total chunks created: {len(tspec_chunks)}")
print(f"Example chunk: {tspec_chunks[0]}")

Total chunks created: 780651


In [16]:
# chunks_path = r"../Files/tspec_chunks_markdown.pkl"
# save_chunks(tspec_chunks, chunks_path)

Chunks saved to ../Files/tspec_chunks_markdown.pkl


In [12]:
# # Extract all texts from the chunks
# all_texts = [chunk_data['text'] for chunk_data in tspec_chunks]
# chunks_text_path = r"../Files/tspec_chunks_markdown_texts.pkl"
# save_chunks(all_texts, chunks_text_path)

Chunks saved to ../Files/tspec_chunks_markdown_texts.pkl


# Build embeddings

In [5]:
chunks_path = r"../Files/tspec_chunks_markdown.pkl"
tspec_chunks = load_chunks(chunks_path)
print(len(tspec_chunks))

780651


In [6]:
chunks_text_path = r"../Files/tspec_chunks_markdown_texts.pkl"
all_texts = load_chunks(chunks_text_path)
print(len(all_texts))

780651


In [7]:
# num_test_samples = 3000
# tspec_chunks = tspec_chunks[:num_test_samples]
# all_texts = all_texts[:num_test_samples]
# print(len(all_texts))

3000


In [7]:
from sentence_transformers import SentenceTransformer
import torch

# Load the 'all-mpnet-base-v2' model for embeddings on GPU if available
device = "cuda" if torch.cuda.is_available() else "cpu"
embedding_model = SentenceTransformer('all-mpnet-base-v2', device=device)

# Function to generate embeddings in batches on GPU
def generate_embeddings(tspec_chunks, all_texts, batch_size=64):
    
    # Generate embeddings in batches on GPU
    embeddings = embedding_model.encode(
        all_texts,
        batch_size=batch_size,
        convert_to_tensor=False,
        show_progress_bar=True
    )
    # ).to(device)  # Ensure embeddings are on the GPU
    
    # Assign embeddings back to each chunk
    for idx, chunk_data in enumerate(tspec_chunks):
        chunk_data['embedding'] = embeddings[idx]
    
    return tspec_chunks

  from tqdm.autonotebook import tqdm, trange
2024-11-04 00:15:34.286195: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-11-04 00:15:34.301789: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-11-04 00:15:34.315474: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-11-04 00:15:34.319978: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-04 00:15:34.331381

In [8]:
tspec_chunks_with_embeddings = generate_embeddings(tspec_chunks, all_texts)

Batches:   0%|          | 0/12198 [00:00<?, ?it/s]

In [None]:
# print(len(tspec_chunks_with_embeddings), tspec_chunks_with_embeddings[0])

In [9]:
# chunks_path = r"../Files/tspec_chunks_markdown_with_embeddings.pkl"
# save_chunks(tspec_chunks_with_embeddings, chunks_path)

Chunks saved to ../Files/tspec_chunks_markdown_with_embeddings.pkl


# Indexing Embeddings with Faiss and save

In [5]:
chunks_path = r"../Files/tspec_chunks_markdown_with_embeddings.pkl"
tspec_chunks = load_chunks(chunks_path)
print(len(tspec_chunks))

780651


In [6]:
import faiss
import numpy as np
import os

def build_and_save_faiss_index(tspec_chunks, index_file_path="../Files/faiss_index.bin"):
    # Extract the embeddings as a NumPy array
    embeddings_np = np.array([chunk['embedding'] for chunk in tspec_chunks]).astype('float32')
    
    # Normalize the embeddings to L2 for cosine similarity
    faiss.normalize_L2(embeddings_np)

    # Dimensionality of the embeddings
    dim = embeddings_np.shape[1]
    print(f"Dimensionality of embeddings: {dim}")

    # Create a FAISS index for cosine similarity using normalized embeddings
    index = faiss.IndexFlatIP(dim)  # Using Inner Product for cosine similarity after normalization
    
    # Add embeddings to the index
    index.add(embeddings_np)
    
    # Print the number of indices added to the index
    print(f"Number of indices saved: {index.ntotal}")
    
    # Save the FAISS index
    os.makedirs(os.path.dirname(index_file_path), exist_ok=True)
    faiss.write_index(index, index_file_path)
    print(f"FAISS index saved to {index_file_path}")

In [7]:
# build_and_save_faiss_index(tspec_chunks)  # Save the index

Dimensionality of embeddings: 768
Number of indices saved: 780651
FAISS index saved to ../Files/faiss_index.bin


# Search Function

In [5]:
from sentence_transformers import SentenceTransformer
import torch
import faiss
import numpy as np

  from tqdm.autonotebook import tqdm, trange
2024-11-04 10:45:11.016023: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-11-04 10:45:11.161434: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-11-04 10:45:11.220940: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-11-04 10:45:11.238348: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-04 10:45:11.344277

In [6]:
# Function to load the FAISS index
def load_faiss_index(index_file_path="../Files/faiss_index.bin"):
    # Load the saved FAISS index
    faiss_index = faiss.read_index(index_file_path)
    # print(f"FAISS index loaded from {index_file_path}")
    return faiss_index

# Function to perform search on the FAISS index
def search_faiss_index(query_text, faiss_index, model=None, top_k=5):
    # Load the embedding model if not provided
    if model is None:
        # model = SentenceTransformer('all-mpnet-base-v2', device="cuda" if torch.cuda.is_available() else "cpu")
        model = SentenceTransformer('all-mpnet-base-v2', device="cpu")
    
    # Generate and normalize the query embedding
    query_embedding = model.encode(query_text, convert_to_tensor=True)
    query_embedding = torch.nn.functional.normalize(query_embedding, p=2, dim=0).cpu().numpy().reshape(1, -1)
    
    # Perform the search
    distances, indices = faiss_index.search(query_embedding, top_k)
    return distances, indices

In [7]:
import numpy as np

# Function to perform RAG (Retrieve and Generate) search
def search_RAG(query_text, index_file_path="../Files/faiss_index.bin", chunks_path="../Files/tspec_chunks_markdown.pkl", top_k=5):
    # Load the FAISS index
    faiss_index = load_faiss_index(index_file_path)

    # Load the chunks
    tspec_chunks = load_chunks(chunks_path)

    # Search the index for the query text
    distances, indices = search_faiss_index(query_text, faiss_index, top_k=top_k)

    # Prepare formatted result texts
    result_texts = []
    for i, idx in enumerate(indices[0]):
        result_texts.append(f"Information {i + 1}:\n{tspec_chunks[idx]['text']}\n")

    # Clear tspec_chunks to free up memory
    del tspec_chunks

    return "\n".join(result_texts)

In [9]:
import numpy as np

# Function to perform verbose RAG (Retrieve and Generate) search
def search_RAG_verbose(query_text, index_file_path="../Files/faiss_index.bin", chunks_path="../Files/tspec_chunks_markdown.pkl", top_k=5):
    # Load the FAISS index
    faiss_index = load_faiss_index(index_file_path)

    # Load the chunks
    tspec_chunks = load_chunks(chunks_path)

    # Search the index for the query text
    distances, indices = search_faiss_index(query_text, faiss_index, top_k=top_k)

    # Prepare formatted result texts with detailed information
    result_texts = []
    for i, idx in enumerate(indices[0]):
        result_texts.append(
            f"Information {i + 1}:\n"
            f"Index: {idx}\n"
            f"Similarity: {1 - distances[0][i]:.4f}\n"  # Assuming distances are in terms of cosine distance
            f"Text: {tspec_chunks[idx]['text']}\n"
        )

    # Clear tspec_chunks to free up memory
    del tspec_chunks

    return "\n".join(result_texts)

In [9]:
# Define the query text and perform the search
query_text = "reception of a transparent l3 message in unacknowledged mode"

information = search_RAG(query_text, top_k=3)

In [10]:
print(information)

Information 1:
BSC.  
Collision cases are treated as specified in 3GPPTS44.006.  
If BTS has repeated the DISC frame N200 times, BTS sends a RELease
INDication and an ERRor INDication message to BSC (cf. 3GPPTS44.006).  
![](media/image7.png){width="3.65625in" height="1.2083333333333333in"}  
3.5 Transmission of a transparent L3-Message in acknowledged mode
-----------------------------------------------------------------  
This procedure is used by BSC to request the sending of a L3 message to
MS in acknowledged mode.  
BSC sends a DATA REQuest message to BTS. The message contains the
complete L3 message to be sent in acknowledged mode.  
![](media/image8.png){width="3.6979166666666665in" height="1.0625in"}  
3.6 Reception of a transparent L3-Message in acknowledged mode
--------------------------------------------------------------  
This procedure is used by BTS to indicate the reception of a L3 message
in acknowledged mode.  
BTS sends a DATA INDication message to BSC. The message 

# Test with Llama 3.2

In [8]:
from unsloth import FastLanguageModel
import torch

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


In [9]:
max_seq_length = 8192 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

In [10]:
model, tokenizer = FastLanguageModel.from_pretrained(
    # model_name="unsloth/Llama-3.2-3B-bnb-4bit",
    model_name = "unsloth/Llama-3.2-3B-Instruct", # or choose "unsloth/Llama-3.2-1B-Instruct"
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # device_map="auto"
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

==((====))==  Unsloth 2024.10.6: Fast Llama patching. Transformers = 4.46.0.
   \\   /|    GPU: NVIDIA GeForce RTX 3050 Ti Laptop GPU. Max memory: 3.712 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.0+cu124. CUDA = 8.6. CUDA Toolkit = 12.4.
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.27.post2. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth


In [17]:
from unsloth.chat_templates import get_chat_template

def ask_llama_3_2_RAG(model, tokenizer, question_data, top_k=5, index_file_path="../Files/faiss_index.bin", chunks_path="../Files/tspec_chunks_markdown.pkl"):
    """
    Function to generate an answer using the model based on the given question and options, 
    including relevant information from a RAG search.
    
    Parameters:
    - model: The language model loaded for inference.
    - tokenizer: The tokenizer configured with `get_chat_template`.
    - question_data: Dictionary containing the question and options.
    - top_k: Number of relevant chunks to retrieve from the search.
    - index_file_path: Path to the FAISS index file.
    - chunks_path: Path to the chunks file.

    Returns:
    - String: Model's generated response.
    """

    # Extract question and options
    question = question_data['question']
    options = [f"{key}: {value}" for key, value in question_data.items() if 'option' in key]
    
    question_search = (
        f"{question}\n" +
        " ".join(options) + " "
    )
    # print(question_search)

    # Perform RAG search using the question to retrieve relevant information
    rag_results = search_RAG(question_search, index_file_path=index_file_path, chunks_path=chunks_path, top_k=top_k)

    # Create the prompt with the question, options, and RAG results
    prompt = (
        f"Question: {question}\n"
        f"Options:\n" + "\n".join(options) + "\n"
        f"Relevant Information:\n{rag_results}\n"
        "Think step by step before answering and respond with the correct option in the format 'correct option: <X>'."
    )
    # print(prompt)

    # Create the input for the model
    messages = [{"role": "user", "content": prompt}]
    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt",
    ).to("cuda")

    # Generate the response
    outputs = model.generate(
        input_ids=inputs,
        max_new_tokens=2048,
        use_cache=True,
        temperature=1.5,
        min_p=0.1
    )

    # Decode and return the model's output
    response = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
    return response


In [18]:
# Example usage
model = FastLanguageModel.for_inference(model)  # Enable faster inference
tokenizer = get_chat_template(tokenizer, chat_template="llama-3.1")

question_data = {
    'question': 'Which physical channel informs the UE and the RN about the number of OFDM symbols used for the PDCCHs? [3GPP Release 17]',
    'option 1': 'PBCH',
    'option 2': 'PCFICH',
    'option 3': 'PDSCH',
    'option 4': 'PHICH',
    'answer': 'option 2: PCFICH',
    'explanation': 'The physical control format indicator channel (PCFICH) informs the UE and the RN about the number of OFDM symbols used for the PDCCHs.',
    'category': 'Standards specifications'
}

llama_3_2_response = ask_llama_3_2_RAG(model, tokenizer, question_data)
print(f"Reposta Llama 3.2:\n{llama_3_2_response}")

Reposta Llama 3.2:
system

Cutting Knowledge Date: December 2023
Today Date: 26 July 2024

user

Question: Which physical channel informs the UE and the RN about the number of OFDM symbols used for the PDCCHs? [3GPP Release 17]
Options:
option 1: PBCH
option 2: PCFICH
option 3: PDSCH
option 4: PHICH
Relevant Information:
Information 1:
of OFDM symbols of the PUSCH, including all OFDM symbols used for DMRS;  
\- for any OFDM symbol that carries DMRS of the PUSCH,
$M_{\text{sc}}^{\text{UCI}}\left( l \right) = 0$;  
\- for any OFDM symbol that does not carry DMRS of the PUSCH,
$M_{\text{sc}}^{\text{UCI}}\left( l \right) = M_{\text{sc}}^{\text{PUSCH}} - \ M_{\text{sc}}^{PT - RS}\left( l \right)$;  
\- $\alpha$ is configured by higher layer parameter *scaling*;  
\- $l_{0}$ is the symbol index of the first OFDM symbol that does not
carry DMRS of the PUSCH, after the first DMRS symbol(s), in the PUSCH
transmission.  
For CG-UCI transmission on PUSCH with UL-SCH, and if
*numberOfSlotsTBoMS* i