In [1]:
!pip install transformers
!pip install datasets
!pip install evaluate
!pip install rouge_score
!pip install bert-score

Collecting datasets
  Downloading datasets-3.5.1-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2025.3.0,>=2023.1.0 (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets)
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.5.1-py3-none-any.whl (491 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.4/491.4 kB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2025.3.0-py3-none-any.whl (1

In [26]:
import os
import pandas as pd
import numpy as np
import huggingface_hub
from datasets import Dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    pipeline,)
import torch
from textwrap import dedent
from peft import (
    LoraConfig,
    PeftModel,
    PeftConfig,
    TaskType,
    get_peft_model,
)
from google.colab import drive
import bert_score
import evaluate
from tqdm import tqdm

In [4]:
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
TEST_CSV = 
FINETUNED_MODEL = 
BASE_MODEL = 'meta-llama/Llama-3.2-1B-Instruct'

In [None]:
# WILL NEED PERMISSION FROM META ON HF
# AND API KEY TO LOGIN
huggingface_hub.login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [19]:
test_df = pd.read_csv(TEST_CSV)
test_df.head()

Unnamed: 0,question,answer,context,source,subdomain
0,What are Cyndi Harvey's main responsibilities ...,Cyndi Harvey specializes in Tuition & Fees and...,Cyndi Harvey Student Records Specialist: Tuiti...,https://registrar.ku.edu/people/cyndi-harvey,registrar.ku.edu
1,Where is Kristi Kamm's office located?,"Her office is located in LEEP2, room 1410.","LEEP2, room 1410",https://engr.ku.edu/people/kristi-kamm,engr.ku.edu
2,In what year did Gary Minden receive his Ph.D....,He received his Ph.D. in 1982.,"Education — Ph.D. in Electrical Engineering, T...",https://eecs.ku.edu/people/gary-minden,eecs.ku.edu
3,When must readmitted students arrive for the S...,"Readmitted students must arrive by May 28th, 2...","Summer 2025 Semester Arrive by May 28th, 2025,...",https://iss.ku.edu/readmitted_students,iss.ku.edu
4,Which professor is also the Chair of the Depar...,Chris Fischer is the Chair of the Department o...,Monday: 02/17/2025 Chris Fischer Professor and...,http://bioengr.ku.edu/colloquium,bioengr.ku.edu


In [20]:
qa_test = Dataset.from_pandas(test_df)

## Test Prompts


In [21]:
def test_with_context_prompt(row):
    prompt = dedent(
        f"""
    {row["question"]}

    Information:

    ```
    {row["context"]}
    ```
    """
    )
    messages = [
        {
            "role": "system",
            "content": "Use only the information to answer the question",
        },
        {"role": "user", "content": prompt}
    ]
    return tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

In [22]:
def test_without_context_prompt(row):
    prompt = dedent(
        f"""
    {row["question"]}
    """
    )
    messages = [
        {
            "role": "system",
            "content": "You are a helpful assistant",
        },
        {"role": "user", "content": prompt}
    ]
    return tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

## Base Model Output

In [23]:
tokenizer = AutoTokenizer.from_pretrained(FINETUNED_MODEL)

model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.float16,
    device_map="auto",
)

In [24]:
pipe = pipeline(
    task="text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=128,
    return_full_text=False,
)

Device set to use cuda:0


In [27]:
base_model_output_with_context = []
base_model_output_without_context = []
for row in tqdm(qa_test):
  prompt_with_context = test_with_context_prompt(row)
  outputs_with_context = pipe(prompt_with_context)
  base_model_output_with_context.append(outputs_with_context[0]["generated_text"])

  prompt_without_context = test_without_context_prompt(row)
  outputs_without_context = pipe(prompt_without_context)
  base_model_output_without_context.append(outputs_without_context[0]["generated_text"])

  1%|          | 5/876 [00:09<26:50,  1.85s/it]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
100%|██████████| 876/876 [44:53<00:00,  3.08s/it]


In [28]:
test_df['base_model_output_with_context'] = base_model_output_with_context
test_df['base_model_output_without_context'] = base_model_output_without_context

## Finetuned Model Output


In [29]:
model.resize_token_embeddings(len(tokenizer), pad_to_multiple_of=8)
model = PeftModel.from_pretrained(model, FINETUNED_MODEL)
model = model.merge_and_unload()

In [30]:
pipe = pipeline(
    task="text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=128,
    return_full_text=False,
)

Device set to use cuda:0


In [31]:
finetuned_model_output_with_context = []
finetuned_model_output_without_context = []
for row in tqdm(qa_test):
  prompt_with_context = test_with_context_prompt(row)
  outputs_with_context = pipe(prompt_with_context)
  finetuned_model_output_with_context.append(outputs_with_context[0]["generated_text"])

  prompt_without_context = test_without_context_prompt(row)
  outputs_without_context = pipe(prompt_without_context)
  finetuned_model_output_without_context.append(outputs_without_context[0]["generated_text"])

100%|██████████| 876/876 [19:54<00:00,  1.36s/it]


In [33]:
test_df['finetuned_model_output_with_context'] = finetuned_model_output_with_context
test_df['finetuned_model_output_without_context'] = finetuned_model_output_without_context

## Bleu Score

In [34]:
bleu = evaluate.load("bleu")

bleu_base_context = bleu.compute(
    predictions=test_df['base_model_output_with_context'].tolist(),
    references=[[ref] for ref in test_df['answer'].tolist()]
)
bleu_base_without_context = bleu.compute(
    predictions=test_df['base_model_output_without_context'].tolist(),
    references=[[ref] for ref in test_df['answer'].tolist()]
)
bleu_finetuned_context = bleu.compute(
    predictions=test_df['base_model_output_with_context'].tolist(),
    references=[[ref] for ref in test_df['answer'].tolist()]
)
bleu_finetuned_without_context = bleu.compute(
    predictions=test_df['finetuned_model_output_with_context'].tolist(),
    references=[[ref] for ref in test_df['answer'].tolist()]
)

Downloading builder script:   0%|          | 0.00/5.94k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/1.55k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/3.34k [00:00<?, ?B/s]

In [35]:
def print_bleu(name, bleu_obj):
    print(f"{name}")
    print(f"  BLEU Score     : {bleu_obj['bleu']:.4f}")
    print(f"  Precisions     : {[f'{p:.4f}' for p in bleu_obj['precisions']]}")
    print(f"  Length Ratio   : {bleu_obj['length_ratio']:.4f}")
    print(f"  Brevity Penalty: {bleu_obj['brevity_penalty']:.4f}")
    print("")

print_bleu("Base Model (with context)", bleu_base_context)
print_bleu("Base Model (without context)", bleu_base_without_context)
print_bleu("Fine-tuned Model (with context)", bleu_finetuned_context)
print_bleu("Fine-tuned Model (without context)", bleu_finetuned_without_context)

Base Model (with context)
  BLEU Score     : 0.1711
  Precisions     : ['0.3119', '0.1887', '0.1381', '0.1055']
  Length Ratio   : 1.9367
  Brevity Penalty: 1.0000

Base Model (without context)
  BLEU Score     : 0.0186
  Precisions     : ['0.0870', '0.0235', '0.0105', '0.0056']
  Length Ratio   : 5.0614
  Brevity Penalty: 1.0000

Fine-tuned Model (with context)
  BLEU Score     : 0.1711
  Precisions     : ['0.3119', '0.1887', '0.1381', '0.1055']
  Length Ratio   : 1.9367
  Brevity Penalty: 1.0000

Fine-tuned Model (without context)
  BLEU Score     : 0.5097
  Precisions     : ['0.6976', '0.5665', '0.4970', '0.4448']
  Length Ratio   : 0.9393
  Brevity Penalty: 0.9375



## Rouge Score

In [36]:
rouge = evaluate.load("rouge")

rouge_base_context = rouge.compute(
    predictions=test_df['base_model_output_with_context'].tolist(),
    references=[[ref] for ref in test_df['answer'].tolist()]
)
rouge_base_without_context = rouge.compute(
    predictions=test_df['base_model_output_without_context'].tolist(),
    references=[[ref] for ref in test_df['answer'].tolist()]
)
rouge_finetuned_context = rouge.compute(
    predictions=test_df['finetuned_model_output_with_context'].tolist(),
    references=[[ref] for ref in test_df['answer'].tolist()]
)
rouge_finetuned_without_context = rouge.compute(
    predictions=test_df['finetuned_model_output_without_context'].tolist(),
    references=[[ref] for ref in test_df['answer'].tolist()]
)

Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

In [37]:
def print_rouge(name, rouge_obj):
    print(f"{name}")
    print(f"  ROUGE-1   : {rouge_obj['rouge1']:.4f}")
    print(f"  ROUGE-2   : {rouge_obj['rouge2']:.4f}")
    print(f"  ROUGE-L   : {rouge_obj['rougeL']:.4f}")
    print(f"  ROUGE-Lsum: {rouge_obj['rougeLsum']:.4f}")
    print("")

print_rouge("Base Model (with context)", rouge_base_context)
print_rouge("Base Model (without context)", rouge_base_without_context)
print_rouge("Fine-tuned Model (with context)", rouge_finetuned_context)
print_rouge("Fine-tuned Model (without context)", rouge_finetuned_without_context)


Base Model (with context)
  ROUGE-1   : 0.4949
  ROUGE-2   : 0.3333
  ROUGE-L   : 0.4450
  ROUGE-Lsum: 0.4496

Base Model (without context)
  ROUGE-1   : 0.1581
  ROUGE-2   : 0.0581
  ROUGE-L   : 0.1299
  ROUGE-Lsum: 0.1361

Fine-tuned Model (with context)
  ROUGE-1   : 0.6622
  ROUGE-2   : 0.5482
  ROUGE-L   : 0.6408
  ROUGE-Lsum: 0.6412

Fine-tuned Model (without context)
  ROUGE-1   : 0.2841
  ROUGE-2   : 0.1199
  ROUGE-L   : 0.2404
  ROUGE-Lsum: 0.2426



## Bert Score

In [38]:
P_base_with_context, R_base_with_context, F1_base_with_context = bert_score.score(test_df['base_model_output_with_context'].tolist(),
                            test_df['answer'].tolist(),
                            lang="en",
                            verbose=True)
P_base_without_context, R_base_without_context, F1_base_without_context = bert_score.score(test_df['base_model_output_without_context'].tolist(),
                            test_df['answer'].tolist(),
                            lang="en",
                            verbose=True)
P_finetuned_with_context, R_finetuned_with_context, F1_finetuned_with_context = bert_score.score(test_df['finetuned_model_output_with_context'].tolist(),
                            test_df['answer'].tolist(),
                            lang="en",
                            verbose=True)
P_finetuned_without_context, R_finetuned_without_context, F1_finetuned_without_context = bert_score.score(test_df['finetuned_model_output_without_context'].tolist(),
                            test_df['answer'].tolist(),
                            lang="en",
                            verbose=True)

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/27 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/14 [00:00<?, ?it/s]

done in 11.91 seconds, 73.54 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/27 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/14 [00:00<?, ?it/s]

done in 17.81 seconds, 49.18 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/26 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/14 [00:00<?, ?it/s]

done in 7.70 seconds, 113.81 sentences/sec


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/27 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/14 [00:00<?, ?it/s]

done in 10.72 seconds, 81.75 sentences/sec


In [39]:
print("Base Model (with context)")
print(f"  Precision:  {P_base_with_context.mean().item():.4f}")
print(f"  Recall:     {R_base_with_context.mean().item():.4f}")
print(f"  BERTScore F1: {F1_base_with_context.mean().item():.4f}\n")

print("Base Model (without context)")
print(f"  Precision:  {P_base_without_context.mean().item():.4f}")
print(f"  Recall:     {R_base_without_context.mean().item():.4f}")
print(f"  BERTScore F1: {F1_base_without_context.mean().item():.4f}\n")

print("Fine-tuned Model (with context)")
print(f"  Precision:  {P_finetuned_with_context.mean().item():.4f}")
print(f"  Recall:     {R_finetuned_with_context.mean().item():.4f}")
print(f"  BERTScore F1: {F1_finetuned_with_context.mean().item():.4f}\n")

print("Fine-tuned Model (without context)")
print(f"  Precision:  {P_finetuned_without_context.mean().item():.4f}")
print(f"  Recall:     {R_finetuned_without_context.mean().item():.4f}")
print(f"  BERTScore F1: {F1_finetuned_without_context.mean().item():.4f}")

Base Model (with context)
  Precision:  0.8908
  Recall:     0.9202
  BERTScore F1: 0.9049

Base Model (without context)
  Precision:  0.8256
  Recall:     0.8664
  BERTScore F1: 0.8453

Fine-tuned Model (with context)
  Precision:  0.9438
  Recall:     0.9368
  BERTScore F1: 0.9400

Fine-tuned Model (without context)
  Precision:  0.8724
  Recall:     0.8859
  BERTScore F1: 0.8789


In [None]:
test_df.to_csv('evaluation.csv', index = False)

## More Model Inferences

In [None]:
tokenizer = AutoTokenizer.from_pretrained(FINETUNED_MODEL)

model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.float16,
    device_map="auto",
)

model.resize_token_embeddings(len(tokenizer), pad_to_multiple_of=8)
model = PeftModel.from_pretrained(model, FINETUNED_MODEL)
model = model.merge_and_unload()

In [None]:
pipe = pipeline(
    task="text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=128,
    return_full_text=False,
)

Device set to use cuda:0


In [None]:
%%time
# 125 50
row = qa_test[100]
prompt = test_prompt(row)
print(prompt)
outputs = pipe(prompt)
response = f"""
Actual Answer:     {row["answer"]}
Finetuned model answer: {outputs[0]["generated_text"]}
"""
print(response)

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 03 May 2025

Use only the information to answer the question<|eot_id|><|start_header_id|>user<|end_header_id|>

When is the CR/No CR deadline for courses that last less than five days?

Information:

```
The CR/No CR deadline of courses that last less than five days is the day before the first day of the class.
```<|eot_id|><|start_header_id|>assistant<|end_header_id|>



Actual Answer:     The CR/No CR deadline for these courses is the day before the first day of the class.
Finetuned model answer: The CR/No CR deadline for courses that last less than five days is the day before the first day of the class.

CPU times: user 712 ms, sys: 5.56 ms, total: 718 ms
Wall time: 2.01 s


In [None]:

%%time
row = qa_test[123]
prompt = test_prompt(row)
print(prompt)
outputs = pipe(prompt)
response = f"""
Actual Answer:     {row["answer"]}
Finetuned model answer: {outputs[0]["generated_text"]}
"""
print(response)

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 03 May 2025

Use only the information to answer the question<|eot_id|><|start_header_id|>user<|end_header_id|>

What topics are covered in the book regarding research methodologies?

Information:

```
It delves into the intricacies of HRI methodologies, statistical measurements, and research design, providing clear explanations and real-world examples for deeper understanding.
```<|eot_id|><|start_header_id|>assistant<|end_header_id|>



Actual Answer:     The book delves into the intricacies of HRI methodologies, statistical measurements, and research design.
Finetuned model answer: The book delves into the intricacies of HRI methodologies, statistical measurements, and research design, providing clear explanations and real-world examples for deeper understanding.

CPU times: user 1.07 s, sys: 0 ns, total: 1.07 s
Wall time: 1.56 s


In [None]:
row = qa_test[23]
prompt = test_prompt(row)
print(prompt)
outputs = pipe(prompt)
response = f"""
Actual Answer:     {row["answer"]}
Finetuned model answer: {outputs[0]["generated_text"]}
"""
print(response)

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 03 May 2025

Use only the information to answer the question<|eot_id|><|start_header_id|>user<|end_header_id|>

In which city and state is Sarah Zey's office located?

Information:

```
Lawrence, KS 66045
```<|eot_id|><|start_header_id|>assistant<|end_header_id|>



Actual Answer:     Sarah Zey's office is located in Lawrence, KS.
Finetuned model answer: Lawrence, KS



In [None]:
row = qa_test[98]
prompt = test_prompt(row)
print(prompt)
outputs = pipe(prompt)
response = f"""
Actual Answer:     {row["answer"]}
Finetuned model answer: {outputs[0]["generated_text"]}
"""
print(response)

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 03 May 2025

Use only the information to answer the question<|eot_id|><|start_header_id|>user<|end_header_id|>

What is the purpose of the Bioengineering Opportunity Fund at the University of Kansas?

Information:

```
Bioengineering Opportunity Fund: Unrestricted funds that allow the Director to meet urgent needs and take advantage of opportunities as they arise.
```<|eot_id|><|start_header_id|>assistant<|end_header_id|>



Actual Answer:     The Bioengineering Opportunity Fund provides unrestricted funds that allow the Director to meet urgent needs and take advantage of opportunities as they arise.
Finetuned model answer: The Bioengineering Opportunity Fund is unrestricted funds that allow the Director to meet urgent needs and take advantage of opportunities as they arise.



In [None]:
row = qa_test[756]
prompt = test_prompt(row)
print(prompt)
outputs = pipe(prompt)
response = f"""
Actual Answer:     {row["answer"]}
Finetuned model answer: {outputs[0]["generated_text"]}
"""
print(response)

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 03 May 2025

Use only the information to answer the question<|eot_id|><|start_header_id|>user<|end_header_id|>

What are the five critical areas of holistic financial wellness emphasized by Jayhawk Finances?

Information:

```
Jayhawk Finances provides coaching sessions, workshops and information about the five critical areas of holistic financial wellness: Earning Spending Borrowing Protecting Saving and Investing Finances.
```<|eot_id|><|start_header_id|>assistant<|end_header_id|>



Actual Answer:     The five critical areas of holistic financial wellness are Earning, Spending, Borrowing, Protecting, Saving, and Investing Finances.
Finetuned model answer: The five critical areas of holistic financial wellness emphasized by Jayhawk Finances are Earning, Borrowing, Protecting, Saving, and Investing.



In [None]:
row = qa_test[90]
prompt = test_prompt(row)
print(prompt)
outputs = pipe(prompt)
response = f"""
Actual Answer:     {row["answer"]}
Finetuned model answer: {outputs[0]["generated_text"]}
"""
print(response)

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 03 May 2025

Use only the information to answer the question<|eot_id|><|start_header_id|>user<|end_header_id|>

How do Graduate Scholarships differ from Graduate Fellowships at the University of Kansas?

Information:

```
Graduate Scholarships: Scholarships can be added onto fellowships, graduate teaching assistantships or graduate research assistantships, but are only offered for one year.
```<|eot_id|><|start_header_id|>assistant<|end_header_id|>



Actual Answer:     Graduate Scholarships can be added onto fellowships or assistantships but are only offered for one year, whereas fellowships can provide support for one or more years.
Finetuned model answer: Graduate Scholarships can be added onto fellowships, graduate teaching assistantships or graduate research assistantships but are only offered for one year.



In [None]:
%%time
# 125 50
row = qa_test[765]
prompt = test_prompt(row)
print(prompt)
outputs = pipe(prompt)
response = f"""
Actual Answer:     {row["answer"]}
Finetuned model answer: {outputs[0]["generated_text"]}
"""
print(response)

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 03 May 2025

Use only the information to answer the question<|eot_id|><|start_header_id|>user<|end_header_id|>

What specific area of cybersecurity is Koyel Pramanick focusing on in her PhD research?

Information:

```
Koyel is currently a PhD candidate in Computer Science. She is working in the field of cybersecurity, more specifically in software security.
```<|eot_id|><|start_header_id|>assistant<|end_header_id|>



Actual Answer:     Koyel is focusing on software security.
Finetuned model answer: She is focusing on software security.

CPU times: user 276 ms, sys: 0 ns, total: 276 ms
Wall time: 368 ms


In [None]:
%%time
# 125 50
row = qa_test[403]
prompt = test_prompt(row)
print(prompt)
outputs = pipe(prompt)
response = f"""
Actual Answer:     {row["answer"]}
Finetuned model answer: {outputs[0]["generated_text"]}
"""
print(response)

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 03 May 2025

Use only the information to answer the question<|eot_id|><|start_header_id|>user<|end_header_id|>

What must you do with your I-94 record after finding it?

Information:

```
Once you find your I-94 record, please upload it to iHawk on the 'ISS Check-in' tab.
```<|eot_id|><|start_header_id|>assistant<|end_header_id|>



Actual Answer:     You must upload it to iHawk on the 'ISS Check-in' tab after finding your I-94 record.
Finetuned model answer: You must upload it to iHawk on the 'ISS Check-in' tab.

CPU times: user 574 ms, sys: 1.06 ms, total: 575 ms
Wall time: 615 ms


In [None]:
%%time
# 125 50
row = qa_test[450]
prompt = test_prompt(row)
print(prompt)
outputs = pipe(prompt)
response = f"""
Actual Answer:     {row["answer"]}
Finetuned model answer: {outputs[0]["generated_text"]}
"""
print(response)

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 03 May 2025

Use only the information to answer the question<|eot_id|><|start_header_id|>user<|end_header_id|>

What happens if a student does not check the box for the Bioengineering Certificate when applying for their degree?

Information:

```
When you apply for your degree, make sure to CHECK the BOX for the Bioengineering Certificate! Otherwise, we assume you don't want it.
```<|eot_id|><|start_header_id|>assistant<|end_header_id|>



Actual Answer:     It is assumed that the student does not want the Bioengineering Certificate.
Finetuned model answer: If a student does not check the box for the Bioengineering Certificate, they assume they do not want it.

CPU times: user 655 ms, sys: 1.57 ms, total: 657 ms
Wall time: 1.29 s


In [None]:
%%time
# 125 50
row = qa_test[340]
prompt = test_prompt(row)
print(prompt)
outputs = pipe(prompt)
response = f"""
Actual Answer:     {row["answer"]}
Finetuned model answer: {outputs[0]["generated_text"]}
"""
print(response)

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 03 May 2025

Use only the information to answer the question<|eot_id|><|start_header_id|>user<|end_header_id|>

What storage capacity is provided in both laptops and desktops at the School of Engineering?

Information:

```
...Storage: 1TB SSD... Storage: 1TB SSD
```<|eot_id|><|start_header_id|>assistant<|end_header_id|>



Actual Answer:     Both laptops and desktops have a storage capacity of 1TB SSD.
Finetuned model answer: Both laptops and desktops are equipped with a 1TB SSD.

CPU times: user 357 ms, sys: 0 ns, total: 357 ms
Wall time: 357 ms


In [None]:
%%time
# 125 50
row = qa_test[203]
prompt = test_prompt(row)
print(prompt)
outputs = pipe(prompt)
response = f"""
Actual Answer:     {row["answer"]}
Finetuned model answer: {outputs[0]["generated_text"]}
"""
print(response)

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 03 May 2025

Use only the information to answer the question<|eot_id|><|start_header_id|>user<|end_header_id|>

What is the email address for contacting Tim Miller?

Information:

```
Contact Info tim.j.miller@ku.edu
```<|eot_id|><|start_header_id|>assistant<|end_header_id|>



Actual Answer:     Tim Miller's email address is tim.j.miller@ku.edu.
Finetuned model answer: The email address for Tim Miller is tim.j.miller@ku.edu.

CPU times: user 530 ms, sys: 0 ns, total: 530 ms
Wall time: 694 ms


In [None]:
%%time
# 125 50
row = qa_test[506]
prompt = test_prompt(row)
print(prompt)
outputs = pipe(prompt)
response = f"""
Actual Answer:     {row["answer"]}
Finetuned model answer: {outputs[0]["generated_text"]}
"""
print(response)

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 03 May 2025

Use only the information to answer the question<|eot_id|><|start_header_id|>user<|end_header_id|>

Who should be contacted for questions regarding the Distinguished Engineering Service Award nomination process?

Information:

```
Contact Amy Wierman , 785-864-2930 , in the Engineering Deans Office with questions.
```<|eot_id|><|start_header_id|>assistant<|end_header_id|>



Actual Answer:     Amy Wierman should be contacted for questions regarding the nomination process.
Finetuned model answer: Amy Wierman, in the Engineering Deans Office.

CPU times: user 430 ms, sys: 780 µs, total: 431 ms
Wall time: 440 ms


In [None]:
%%time
# 125 50
row = qa_test[120]
prompt = test_prompt(row)
print(prompt)
outputs = pipe(prompt)
response = f"""
Actual Answer:     {row["answer"]}
Finetuned model answer: {outputs[0]["generated_text"]}
"""
print(response)

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 03 May 2025

Use only the information to answer the question<|eot_id|><|start_header_id|>user<|end_header_id|>

What role does Candan Tamerler hold within the administration related to research?

Information:

```
Candan Tamerler, Ph.D. -Track Director, Biomaterials & Tissue Engineering -Wesley G. Cramer Professor, Mechanical Engineering -Associate Vice Chancellor for Research...
```<|eot_id|><|start_header_id|>assistant<|end_header_id|>



Actual Answer:     Candan Tamerler, Ph.D. is the Associate Vice Chancellor for Research.
Finetuned model answer: He is the Track Director of Biomaterials & Tissue Engineering and the Wesley G. Cramer Professor of Mechanical Engineering.

CPU times: user 605 ms, sys: 0 ns, total: 605 ms
Wall time: 605 ms


In [None]:
%%time
# 125 50
row = qa_test[795]
prompt = test_prompt(row)
print(prompt)
outputs = pipe(prompt)
response = f"""
Actual Answer:     {row["answer"]}
Finetuned model answer: {outputs[0]["generated_text"]}
"""
print(response)

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 03 May 2025

Use only the information to answer the question<|eot_id|><|start_header_id|>user<|end_header_id|>

What role do faculty members play in the education of students in the chemical and petroleum engineering department?

Information:

```
Chemical Engineering World renowned faculty, state-of-the-art facilities, innovative teaching and research prepare students to solve problems and positively impact the world.
```<|eot_id|><|start_header_id|>assistant<|end_header_id|>



Actual Answer:     Faculty members are expert educators who teach using innovative methods to prepare students for real-world challenges.
Finetuned model answer: Faculty members prepare students to solve problems and positively impact the world.

CPU times: user 343 ms, sys: 3.81 ms, total: 347 ms
Wall time: 346 ms


In [None]:
%%time
# 125 50
row = qa_test[55]
prompt = test_prompt(row)
print(prompt)
outputs = pipe(prompt)
response = f"""
Actual Answer:     {row["answer"]}
Finetuned model answer: {outputs[0]["generated_text"]}
"""
print(response)

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 03 May 2025

Use only the information to answer the question<|eot_id|><|start_header_id|>user<|end_header_id|>

What must students do to ensure accuracy before traveling with their visa?

Information:

```
Receive and Review Your Entry Visa: Check the visa in your passport to make sure that all the information on it is correct.
```<|eot_id|><|start_header_id|>assistant<|end_header_id|>



Actual Answer:     Check the visa in your passport to make sure that all the information on it is correct.
Finetuned model answer: Students must receive and review their entry visa in their passport to ensure that all information is correct.

CPU times: user 632 ms, sys: 0 ns, total: 632 ms
Wall time: 670 ms


In [None]:
%%time
# 125 50
row = qa_test[98]
prompt = test_prompt(row)
print(prompt)
outputs = pipe(prompt)
response = f"""
Actual Answer:     {row["answer"]}
Finetuned model answer: {outputs[0]["generated_text"]}
"""
print(response)

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 03 May 2025

Use only the information to answer the question<|eot_id|><|start_header_id|>user<|end_header_id|>

What is the purpose of the Bioengineering Opportunity Fund at the University of Kansas?

Information:

```
Bioengineering Opportunity Fund: Unrestricted funds that allow the Director to meet urgent needs and take advantage of opportunities as they arise.
```<|eot_id|><|start_header_id|>assistant<|end_header_id|>



Actual Answer:     The Bioengineering Opportunity Fund provides unrestricted funds that allow the Director to meet urgent needs and take advantage of opportunities as they arise.
Finetuned model answer: The Bioengineering Opportunity Fund allows the Director to meet urgent needs and take advantage of opportunities as they arise.

CPU times: user 517 ms, sys: 2.64 ms, total: 519 ms
Wall time: 522 ms


In [None]:
%%time
# 125 50
row = qa_test[12]
prompt = test_prompt(row)
print(prompt)
outputs = pipe(prompt)
response = f"""
Actual Answer:     {row["answer"]}
Finetuned model answer: {outputs[0]["generated_text"]}
"""
print(response)

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 03 May 2025

Use only the information to answer the question<|eot_id|><|start_header_id|>user<|end_header_id|>

What is the process for submitting a large format printing request?

Information:

```
Request deadlines are 2:00pm Mondays and Wednesdays. (Poster is ready at 4pm Mon if submitted after 2pm Wed. Poster is ready at 4pm Wed if submitted before 2pm Wed.)
```<|eot_id|><|start_header_id|>assistant<|end_header_id|>



Actual Answer:     Requests for printing must be submitted by 2:00 p.m. on Mondays and Wednesdays, with completion times depending on the submission timing.
Finetuned model answer: The process for submitting a large format printing request is as follows: the poster is ready at 4pm Mon if submitted after 2pm Wed., and at 4pm Wed if submitted before 2pm Wed.

CPU times: user 1.47 s, sys: 2.54 ms, total: 1.47 s
Wall time: 2.27 s


In [None]:
%%time
# 125 50
row = qa_test[609]
prompt = test_prompt(row)
print(prompt)
outputs = pipe(prompt)
response = f"""
Actual Answer:     {row["answer"]}
Finetuned model answer: {outputs[0]["generated_text"]}
"""
print(response)

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 03 May 2025

Use only the information to answer the question<|eot_id|><|start_header_id|>user<|end_header_id|>

Which engineering discipline has students coming from different states?

Information:

```
Siana Gonzalez Aerospace Engineering, 2028 Hometown: Hialeah, FL Liam Mckinley Aerospace Engineering, 2028 Hometown: Olathe, KS
```<|eot_id|><|start_header_id|>assistant<|end_header_id|>



Actual Answer:     Aerospace Engineering has students from Florida and Kansas.
Finetuned model answer: Aerospace Engineering.

CPU times: user 170 ms, sys: 2.06 ms, total: 173 ms
Wall time: 176 ms


In [None]:
%%time
# 125 50
row = qa_test[21]
prompt = test_prompt(row)
print(prompt)
outputs = pipe(prompt)
response = f"""
Actual Answer:     {row["answer"]}
Finetuned model answer: {outputs[0]["generated_text"]}
"""
print(response)

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 03 May 2025

Use only the information to answer the question<|eot_id|><|start_header_id|>user<|end_header_id|>

Who will be speaking on March 10, 2025, and what is their title?

Information:

```
Monday: 03/10/2025 Iredia David Iyamu Assistant Professor Department of Medicinal Chemistry University of Kansas
```<|eot_id|><|start_header_id|>assistant<|end_header_id|>



Actual Answer:     Iredia David Iyamu will be the speaker on March 10, 2025, and he is an Assistant Professor in the Department of Medicinal Chemistry.
Finetuned model answer: Iredia David Iyamu, Assistant Professor, Department of Medicinal Chemistry, University of Kansas.

CPU times: user 513 ms, sys: 0 ns, total: 513 ms
Wall time: 512 ms


In [None]:
%%time
# 125 50
row = qa_test[803]
prompt = test_prompt(row)
print(prompt)
outputs = pipe(prompt)
response = f"""
Actual Answer:     {row["answer"]}
Finetuned model answer: {outputs[0]["generated_text"]}
"""
print(response)

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 03 May 2025

Use only the information to answer the question<|eot_id|><|start_header_id|>user<|end_header_id|>

How many credit hours of EECS classes must a Ph.D. student complete at a minimum?

Information:

```
Minimum of 30 credit hours of course work must be EECS classes numbered 700 or higher (or equivalent transferred classes), excluding Directed Graduate Reading (EECS 801), Graduate Problems (EECS 891), Master's Thesis (EECS 899), and Post-Master's Research (EECS 998).
```<|eot_id|><|start_header_id|>assistant<|end_header_id|>



Actual Answer:     A minimum of 30 credit hours of course work must be EECS classes numbered 700 or higher.
Finetuned model answer: 30 credit hours of EECS classes numbered 700 or higher.

CPU times: user 351 ms, sys: 690 µs, total: 352 ms
Wall time: 352 ms


In [None]:
%%time
# 125 50
row = qa_test[456]
prompt = test_prompt(row)
print(prompt)
outputs = pipe(prompt)
response = f"""
Actual Answer:     {row["answer"]}
Finetuned model answer: {outputs[0]["generated_text"]}
"""
print(response)

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 03 May 2025

Use only the information to answer the question<|eot_id|><|start_header_id|>user<|end_header_id|>

Which schools and centers contribute affiliate faculty to the bioengineering program at KU?

Information:

```
Our world-class affiliate faculty are found in the School of Medicine, School of Pharmacy, School of Business, College of Liberal Arts & Sciences, as well as in the many bioscience research centers and labs.
```<|eot_id|><|start_header_id|>assistant<|end_header_id|>



Actual Answer:     Affiliate faculty come from the School of Medicine, School of Pharmacy, School of Business, College of Liberal Arts & Sciences, and various bioscience research centers and labs.
Finetuned model answer: The affiliate faculty are found in the School of Medicine, School of Pharmacy, School of Business, College of Liberal Arts & Sciences, as well as in the many bioscience resear