In [1]:
import psutil
 
def check_if_running(process_name):
    running = False
    for proc in psutil.process_iter(["name"]):
        if process_name in proc.info["name"]:
            running = True
            break
    return running
 
ollama_running = check_if_running("ollama")
 
if not ollama_running:
    raise RuntimeError("Ollama not running. Launch ollama before proceeding.")
print("Ollama running:", check_if_running("ollama"))

Ollama running: True


In [2]:
import json

# load fine-tuned model responses 
file_path = "instruction-data-with-response.json"
with open(file_path, "r") as file:
    test_data = json.load(file)
 
# # Apply alpaca prompt style
# def format_input(entry):
#     instruction_text = (
#         f"Below is an instruction that describes a task. "
#         f"Write a response that appropriately completes the request."
#         f"\n\n### Instruction:\n{entry['instruction']}"
#     )
#     input_text = f"\n\n### Input:\n{entry['input']}" if entry["input"] else ""
#     return instruction_text + input_text

# Apply phi-3 prompt style
def format_input(entry):
    instruction_text = (
        f"<|user|>\n{entry['instruction']}"
    )
    input_text = f"\n{entry['input']}" if entry["input"] else ""
    return instruction_text + input_text

# # Apply smollm prompt style
# def format_input(entry):
#     input_text = (
#         "<|im_start|>user\n"
#         f"{entry['instruction']} \n{entry['input']}"
#         "<|im_end|>"
#         )
#     return input_text

In [3]:
# Ollama with REST API

import urllib.request

MODEL = "llama3"

def query_model(
    prompt, 
    model=MODEL, 
    url="http://localhost:11434/api/chat"
):
    # Create the data payload as a dictionary
    data = {
        "model": model,
        "messages": [
            {"role": "user", "content": prompt}
        ],
        "options": {     # Settings below are required for deterministic responses
            "seed": 123,
            "temperature": 0,
            "num_ctx": 2048
        }
    }


    # Convert the dictionary to a JSON formatted string and encode it to bytes
    payload = json.dumps(data).encode("utf-8")

    # Create a request object, setting the method to POST and adding necessary headers
    request = urllib.request.Request(
        url, 
        data=payload, 
        method="POST"
    )
    request.add_header("Content-Type", "application/json")

    # Send the request and capture the response
    response_data = ""
    with urllib.request.urlopen(request) as response:
        # Read and decode the response
        while True:
            line = response.readline().decode("utf-8")
            if not line:
                break
            response_json = json.loads(line)
            response_data += response_json["message"]["content"]

    return response_data


result = query_model("What do Llamas eat?", MODEL)
print(result)

Llamas are herbivores, which means they primarily eat plants and plant-based foods. Their diet consists of:

1. **Grasses**: Various types of grasses, including tall grasses, short grasses, and grassy weeds.
2. **Leaves**: Leaves from trees and shrubs, such as willow, alder, and oak.
3. **Hay**: High-quality hay, like timothy or alfalfa hay, is a staple in many llama diets.
4. **Fruits**: Fresh fruits, like apples, berries, and melons, are also consumed by llamas.
5. **Grains**: Some llamas may be fed grains, such as oats or corn, but this should not make up more than 10-20% of their diet.
6. **Browse**: Browse refers to the leaves, twigs, and branches of shrubs and trees. Llamas love to browse on plants like willow, alder, and cedar.

In general, a llama's diet should consist of:

* 50-60% hay
* 20-30% grasses and other forages (like browse)
* 10-20% grains (if fed at all)
* 5-10% fruits and treats

It's essential to provide llamas with high-quality food, fresh water, and a balanced d

In [4]:
# Evaluating first three responses with llama3

for entry in test_data[:3]:
    prompt = (
        f"Given the input `{format_input(entry)}` "
        f"and correct output `{entry['output']}`, "
        f"score the model response `{entry['model_response']}`"
        f" on a scale from 0 to 100, where 100 is the best score. "
    )
    print("\nDataset response:")
    print(">>", entry['output'])
    print("\nModel response:")
    print(">>", entry["model_response"])
    print("\nScore:")
    print(">>", query_model(prompt))
    print("\n-------------------------")


Dataset response:
>> The car is as fast as lightning.

Model response:
>> The car is as fast as a cheetah.

Score:
>> I would score the model response 80 out of 100.

Here's why:

* The model correctly identifies that the input sentence is asking for a simile (a comparison using "as" or "like") and generates one.
* The model chooses a relevant and well-known example of speed, which is a cheetah.
* However, the model response could be improved by making it more vivid and engaging. For example, saying "The car is as fast as a cheetah on steroids" would add more humor and emphasis to the comparison.

Overall, the model response is good but not perfect. It demonstrates an understanding of the task and generates a reasonable simile, but could benefit from further refinement and creativity.

-------------------------

Dataset response:
>> The type of cloud typically associated with thunderstorms is cumulonimbus.

Model response:
>> The type of cloud that is typically associated with thunder

In [5]:
# Scoring all responses

from tqdm import tqdm

def generate_model_scores(json_data, json_key, model=MODEL):
    scores = []
    for entry in tqdm(json_data, desc="Scoring entries"):
        prompt = (
            f"Given the input `{format_input(entry)}` "
            f"and correct output `{entry['output']}`, "
            f"score the model response `{entry[json_key]}`"
            f" on a scale from 0 to 100, where 100 is the best score. "
            f"Respond with the integer number only. " 
            f"Do not add any text before or after the integer score. "
        )
        score = query_model(prompt, model)
        try:
            scores.append(int(score))
        except ValueError:
            print(f"Could not convert score: {score}")
            continue

    return scores


scores = generate_model_scores(test_data, "model_response")
print(f"Number of scores: {len(scores)} of {len(test_data)}")
print(f"Average score: {sum(scores)/len(scores):.2f}\n")

Scoring entries: 100%|██████████| 110/110 [00:48<00:00,  2.25it/s]

Number of scores: 110 of 110
Average score: 75.38






### 2 epochs

SmolLM-360M-alpaca-prompt-style `score: 73.44`<br>
SmolLM-360M-smollm-prompt-style`score: 73.14`<br>
SmolLM-360M-phi3-prompt-style`score: 75.09`<br>
SmolLM-360M-instruct-alpaca-prompt-style `score: 74.90`<br>
SmolLM-360M-instruct-smollm-prompt-style `score: 71.75`<br>
SmolLM-360M-instruct-phi3-prompt-style `score: 75.15`<br>
SmolLM-360M-instruct-lora-phi3-prompt-style `score: 74.64`<br>

SmolLM-135M-instruct `score: 63.84`<br>
SmolLM-135M-instruct-lora `score: `<br>
SmolLM-135M `score: 64.30`<br>
SmolLM-135M-lora `score: `<br>