In [1]:
from git import Repo
from langchain_community.document_loaders.generic import GenericLoader
from langchain_community.document_loaders.parsers import LanguageParser
from langchain_text_splitters import Language

In [2]:
# Clone
repo_path = "/home/rupesh/aqrtr/ai/langchain/proj-101-go/go_repo-01"


In [5]:
repo = Repo.clone_from("https://github.com/rupeshtr78/nvidia-server", to_path=repo_path)

In [3]:
# Load
loader = GenericLoader.from_filesystem(
    repo_path,
    glob="**/*",
    suffixes=[".go"],
    exclude=["Dockerfile", "vendor", "docker-compose.yml", "Makefile", "README.md"],
    parser=LanguageParser(language=Language.GO, parser_threshold=500),
)
documents = loader.load()
len(documents)

10

In [None]:
for doc in documents:
    print(doc.metadata)
    print("\n\n\n")

In [5]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

go_splitter = RecursiveCharacterTextSplitter.from_language(
    language=Language.GO, chunk_size=2000, chunk_overlap=200
)
texts = go_splitter.split_documents(documents)
len(texts)

18

In [6]:
texts[0]

Document(page_content='package gpumetrics\n\nimport (\n\t"context"\n\t"fmt"\n\t"log"\n\t"sync"\n\n\t"github.com/NVIDIA/go-nvml/pkg/nvml"\n)\n\n// FetchGpuInfo fetches the metrics for all GPU devices\nfunc FetchAllGpuInfo(ctx context.Context, gpu GpuDeviceManager, count int) (GpuMap, error) {\n\n\tif count == 0 {\n\t\treturn nil, fmt.Errorf("no GPU devices found")\n\t}\n\n\tgpuMap := make(GpuMap)\n\n\t// gpuChan := make(chan GpuMap, count)\n\t// defer close(gpuChan)\n\n\terrChan := make(chan error, 1) // only need to store one error\n\tdefer close(errChan)\n\n\twg := new(sync.WaitGroup)\n\twg.Add(count)\n\n\tfor i := 0; i < count; i++ {\n\t\tgo func(i int) {\n\t\t\tdefer wg.Done()\n\n\t\t\t// context done means there has been a cancellation signal\n\t\t\tselect {\n\t\t\tcase <-ctx.Done():\n\t\t\t\terrChan <- ctx.Err()\n\t\t\t\treturn\n\t\t\tdefault:\n\t\t\t}\n\n\t\t\tdevice, ret := gpu.DeviceGetHandleByIndex(i)\n\t\t\tif ret != nvml.SUCCESS {\n\t\t\t\terrChan <- fmt.Errorf("failed to ge

In [7]:
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings

db = Chroma.from_documents(texts, OpenAIEmbeddings(disallowed_special=()))
retriever = db.as_retriever(
    search_type="mmr",  # Also test "similarity"
    search_kwargs={"k": 8},
)

In [8]:
from langchain_openai import ChatOpenAI
from langchain.memory import ConversationSummaryMemory
from langchain.chains import ConversationalRetrievalChain

llm = ChatOpenAI(model="gpt-4")
memory = ConversationSummaryMemory(llm=llm, max_memory_length=5, max_memory_turns=5, return_messages=True, memory_key="chat_history")
qaChain = ConversationalRetrievalChain.from_llm(llm=llm, memory=memory, retriever=retriever)

In [10]:
# Ask a question
question = "Can you suggest some improvements for function FetchAllGpuInfo?"  # Replace with your actual question

# Include 'chat_history' key if required by your function
formatted_question = {'question': question, 'chat_history': ''}

# Invoke the conversation chain with the properly formatted question
result = qaChain.invoke(formatted_question)

# Fetch and print the answer
answer = result['answer']
print(answer)



Number of requested results 20 is greater than number of elements in index 18, updating n_results = 18


1. Error Handling: The `FetchAllGpuInfo` function could use more comprehensive error handling. Right now, it only returns the first error it encounters, but it could be more informative if it collected all errors and returned them together.

2. Parallelism Control: The function spawns a goroutine for each GPU device, which could potentially lead to issues if the count of GPU devices is large. It could be improved by using a worker pool pattern to limit the number of concurrent goroutines.

3. Context Usage: The function could make better use of the context passed in. It could pass the context to the `FetchDeviceMetrics` function to allow for cancellation of that function as well.

4. Logging: The function could use structured logging to make the logs easier to parse and analyze. Additionally, it could include more information in the logs, such as the specific GPU device that an operation is being performed on.

5. Code Duplication: The function has some duplicated code that could be re

In [None]:
from langchain_community.chat_models.ollama import ChatOllama
llmOllama = ChatOllama(model="codellama")

In [12]:
response_message = llmOllama.invoke(
    "what does function FetchAllGpuInfo do? "
)

print(response_message.content)
print(response_message.response_metadata)

Without specific context, it can be assumed that the function FetchAllGpuInfo is used to retrieve information about all available GPU (Graphics Processing Unit) on a system. This might include details like the GPU model, memory size, driver version, clock speed, temperature, usage percentage, and other technical specifications. The actual details fetched can vary depending on the programming language and libraries used.
{'token_usage': {'completion_tokens': 77, 'prompt_tokens': 18, 'total_tokens': 95}, 'model_name': 'gpt-4', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}


In [11]:
from langchain.memory import ConversationSummaryMemory
from langchain.chains import ConversationalRetrievalChain
from langchain_community.chat_models.ollama import ChatOllama
llmOllama = ChatOllama(model="codellama")
memoryOllama = ConversationSummaryMemory(llm=llm, max_memory_length=5, max_memory_turns=5, return_messages=True, memory_key="chat_history")
ollamaChain = ConversationalRetrievalChain.from_llm(llm=llmOllama, memory=memoryOllama, retriever=retriever)

In [13]:
# Ask a question
question = "Can you write tests for function FetchAllGpuInfo?"  # Replace with your actual question

# Include 'chat_history' key if required by your function
formatted_question = {'question': question, 'chat_history': ''}

# Invoke the conversation chain with the properly formatted question
result = ollamaChain.invoke(formatted_question)

# Fetch and print the answer
answer = result['answer']
print(answer)

Number of requested results 20 is greater than number of elements in index 18, updating n_results = 18



Here are some test case suggestions for the `FetchAllGpuInfo` function:

1. Test that the function returns an error when the context is canceled.
2. Test that the function returns an error when the device manager encounters an error while fetching metrics.
3. Test that the function returns a slice of GPU information objects with the expected number of elements, given different numbers of GPUs.
4. Test that the function returns a slice of GPU information objects with the correct values for each metric. For example, test that the temperature is non-zero and within an acceptable range, the power usage is within an acceptable range, etc.
5. Test that the function correctly handles the case where the number of GPUs changes while it's running.
6. Test that the function returns an error when it encounters an unsupported GPU model or version.
7. Test that the function returns an error when it fails to marshal the GPU information objects into JSON.
8. Test that the function correctly handles t

In [None]:
# Ask a question
question = "Can you write read me for the nvidia-server repo?"  # Replace with your actual question

# Include 'chat_history' key if required by your function
formatted_question = {'question': question, 'chat_history': ''}

# Invoke the conversation chain with the properly formatted question
result = ollamaChain.invoke(formatted_question)

# Fetch and print the answer
answer = result['answer']
print(answer)