In [2]:
!nvidia-smi

Tue Aug 27 16:07:54 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 530.30.02              Driver Version: 530.30.02    CUDA Version: 12.1     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                  Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf            Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA RTX A6000                On | 00000000:AF:00.0 Off |                  Off |
| 42%   59C    P8                8W / 300W|  14172MiB / 49140MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
|   1  NVIDIA RTX A6000                On | 00000000:D8:00.0 Off |  

In [3]:
import pandas as pd
from llama_index.llms.ollama import Ollama

from prompts import GRADE_QA_PROMPT,QUESTION_FORMAT


def remove_duplicate_answer(answer: str) -> str:
    selections = answer.split(", ")
    selections = sorted(list(set(selections)))
    return ", ".join(selections)

In [4]:
### Run LLM through Ollama for better inference speed ###
# docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
# docker exec -it ollama ollama pull llama3

In [5]:
llm = Ollama(base_url="http://localhost:11434", model="llama3", request_timeout=60.0, temperature=0.1)
dataset = pd.read_csv("../data/qa_dataset/dataiku_multiple_choice_qa.csv")

questions = [QUESTION_FORMAT.format(**row.to_dict()) for _, row in dataset.iterrows()]
prompts = [GRADE_QA_PROMPT.format(query_str=question) for question in questions]

assert dataset.shape[0] == len(prompts)

In [6]:
results = dataset.copy()

foundation_llama3 = []

for prompt in prompts:
    response = llm.complete(prompt)
    foundation_llama3.append(response.text)
    
results["answer_foundation_llama3"] = foundation_llama3

results["answer"] = results["answer"].apply(remove_duplicate_answer)
results["answer_foundation_llama3"] = results["answer_foundation_llama3"].apply(remove_duplicate_answer)

In [9]:
results["answer_foundation_llama3"].value_counts()

answer_foundation_llama3
D             509
A             399
B             322
C             247
A, B          219
A, C          124
B, C           75
A, D           31
A, B, C        21
B, D            9
C, D            8
A, B, D         5
A, B, C, D      5
B, C, D         2
A, C, D         2
Name: count, dtype: int64

In [10]:
foundation_llama3_scores = (results["answer"] == results["answer_foundation_llama3"]).sum() / results.shape[0] * 100
foundation_llama3_scores

50.606673407482305

In [11]:
results.to_csv("../data/results/foundation_llama3_scores.csv", index=False)