In [2]:
## https://github.com/OPENAIRINTERFACE/openairinterface5g

import subprocess
import os

def clone_repo(git_url, repo_name="codebase"):
    if os.path.exists(repo_name):
        subprocess.run(["rm", "-rf", repo_name])
    subprocess.run(["git", "clone", git_url, repo_name])
    return repo_name

# Example
repo_path = clone_repo("https://github.com/OPENAIRINTERFACE/openairinterface5g", "openairinterface5g")


In [3]:
!pip install langchain langchain-community pgvector psycopg2-binary transformers sentence-transformers




In [4]:
import os
import subprocess
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores.pgvector import PGVector
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.docstore.document import Document

# 2. Load + Chunk Code
def load_and_chunk_code(repo_path):
    chunks = []
    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    for root, _, files in os.walk(repo_path):
        for file in files:
            if file.endswith((".c", ".h", ".cpp", ".py", ".java")):
                file_path = os.path.join(root, file)
                with open(file_path, "r", errors="ignore") as f:
                    content = f.read()
                    for chunk in splitter.split_text(content):
                        chunks.append(Document(page_content=chunk, metadata={"source": file_path}))
    return chunks

# 3. Store in AWS RDS
def store_in_pgvector(docs, aws_connection_string):
    embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    PGVector.from_documents(
        documents=docs,
        embedding=embedding_model,
        connection_string=aws_connection_string,
        collection_name="enterprise_code"
    )


In [None]:
# Commented AWS RDB for security purpose
AWS_DB_URI = " xxxxxxxxxxxxxxxxxx"  
chunks = load_and_chunk_code(repo_path)
#store_in_pgvector(chunks, AWS_DB_URI)

In [29]:
from langchain.chains import RetrievalQA
from langchain_community.vectorstores.pgvector import PGVector
from langchain.embeddings import HuggingFaceEmbeddings
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM

# Connect to your AWS-hosted pgvector
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = PGVector(
    embedding_function=embedding_model,
    connection_string=AWS_DB_URI,
    collection_name="enterprise_code"
)

retriever = vectorstore.as_retriever(
    embedding=embedding_model,  # this is where you use it
    search_kwargs={"k": 5}
)

# Example Query
query = "Summarize resource scheduling in LTE and implement dynamic spectrum sharing"
docs = retriever.get_relevant_documents(query)
context = "\n\n".join([doc.page_content for doc in docs])


  vectorstore = PGVector(


In [None]:
from transformers import AutoModelForSeq2SeqLM
model_id = "Salesforce/codegen-350M-mono"  # Replace with CodeLlama, DeepSeek etc. if needed
os.environ["HUGGINGFACE_TOKEN"] = "xxxxxxxxxxxx"
tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token = os.environ["HUGGINGFACE_TOKEN"])
model = AutoModelForCausalLM.from_pretrained(model_id, token=os.environ["HUGGINGFACE_TOKEN"])
codegen = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512)

prompt = f"""
Here is some L1 layer code:

{context[:2048]}

Develop modular functions for dynamic spectrum sharing feature in LTE with 70:30 split.
"""

result = codegen(prompt)
print(result[0]['generated_text'])


Some weights of the model checkpoint at Salesforce/codegen-350M-mono were not used when initializing CodeGenForCausalLM: ['transformer.h.0.attn.causal_mask', 'transformer.h.1.attn.causal_mask', 'transformer.h.10.attn.causal_mask', 'transformer.h.11.attn.causal_mask', 'transformer.h.12.attn.causal_mask', 'transformer.h.13.attn.causal_mask', 'transformer.h.14.attn.causal_mask', 'transformer.h.15.attn.causal_mask', 'transformer.h.16.attn.causal_mask', 'transformer.h.17.attn.causal_mask', 'transformer.h.18.attn.causal_mask', 'transformer.h.19.attn.causal_mask', 'transformer.h.2.attn.causal_mask', 'transformer.h.3.attn.causal_mask', 'transformer.h.4.attn.causal_mask', 'transformer.h.5.attn.causal_mask', 'transformer.h.6.attn.causal_mask', 'transformer.h.7.attn.causal_mask', 'transformer.h.8.attn.causal_mask', 'transformer.h.9.attn.causal_mask']
- This IS expected if you are initializing CodeGenForCausalLM from the checkpoint of a model trained on another task or with another architecture (e


Here is some L1 layer code:

// This function assigns pre-available RBS to each UE in specified sub-bands before scheduling is done
void
dlsch_scheduler_pre_processor(module_id_t Mod_id,
                              int CC_id,
                              frame_t frameP,
                              sub_frame_t subframeP) {
  eNB_MAC_INST *mac = RC.mac[Mod_id];
  UE_info_t *UE_info = &mac->UE_info;
  const int N_RBG = to_rbg(mac->common_channels[CC_id].mib->message.dl_Bandwidth);
  const int RBGsize = get_min_rb_unit(Mod_id, CC_id);

  store_dlsch_buffer(Mod_id, CC_id, frameP, subframeP);

  UE_list_t UE_to_sched;
  for (int i = 0; i < MAX_MOBILES_PER_ENB; ++i)
    UE_to_sched.next[i] = -1;
  int *cur = &UE_to_sched.head;

  for (int UE_id = UE_info->list.head; UE_id >= 0; UE_id = UE_info->list.next[UE_id]) {
    UE_sched_ctrl_t *ue_sched_ctrl = &UE_info->UE_sched_ctrl[UE_id];
    const UE_TEMPLATE *ue_template = &UE_info->UE_template[CC_id][UE_id];

/// (only in-band mode), indica