In [None]:
import ollama
import json
import time  

In [None]:
# Define the models to test.  Add more as desired.
models = [
    "llama3.1",
    "mistral",
    "gemma3",
    "qwen2.5",
]

In [None]:
# Define the prompt (comment/uncomment below to try different types of prompts)
prompt = "Tell me a short story about a lost cat finding its way home"
# prompt = "Explain the concept of large language models (LLMs) like I'm five."
# prompt = "Suggest five albums to listen to after listening to 'Nightmare Logic' by the band Power Trip"
# prompt = "Write a haiku about a rainy day."
# prompt = "Explain quantum physics in the style of a pirate."
# prompt = "Generate a Python function that calculates an interesting number sequence."
# prompt = "Describe the feeling of being relaxed in 50 words or less."
# prompt = "If you were a sentient cloud, what would your day be like?"

In [None]:
def run_ollama_prompt(model, prompt):
    """
    Runs a prompt against a specified Ollama model and returns the response.
    Handles errors and timing.
    """
    try:
        start_time = time.time()
        response = ollama.generate(model=model, prompt=prompt)
        end_time = time.time()
        response_text = response['response'].strip()  # Get the text and remove leading/trailing spaces
        generation_time = end_time - start_time
        return response_text, generation_time, None  # Return None for error
    except Exception as e:
        error_message = f"Error with model {model}: {e}"
        print(error_message)  # Print the error message
        return None, None, error_message # Return the error message

In [None]:
def pull_model_if_needed(model_name):
    """
    Attempts to pull the model.  Ollama handles checking if it exists.
    Returns True on success, an error message on failure.
    """
    try:
        ollama.pull(model=model_name)
        print(f"Model '{model_name}' pulled successfully/already existed.")
        return True
    except Exception as e:
        error_message = f"Error pulling model '{model_name}': {e}"
        print(error_message)
        return error_message

In [None]:
def analyze_response(response):
    """
    Analyzes the response for length, and very basic complexity.  This is rudimentary.
    More sophisticated analysis could be added (e.g., using a dedicated NLP library).
    """
    if response is None:
        return 0, 0  

    word_count = len(response.split())
    sentence_count = response.count('.') + response.count('!') + response.count('?') # very unsophisticated :)
    # A very rough proxy for complexity: average words per sentence.
    complexity = word_count / sentence_count if sentence_count > 0 else 0
    return word_count, complexity


In [None]:
def compare_models(prompt, models):
    """
    Runs the prompt against each model, gathers the results, and prints a comparison.
    """
    results = {}

    # Pull models before running the prompt
    for model in models:
        pull_result = pull_model_if_needed(model)
        if pull_result is not True: # check if there was an error pulling the model
            print(f"Skipping model {model} due to error: {pull_result}")
            models.remove(model) # remove the model from the list.

    # iterate through models for the prompt
    print(f"\nRunning prompt: {prompt}")
    for model in models:
        print(f"\nRunning prompt with model: {model}")
        response, generation_time, error = run_ollama_prompt(model, prompt) # Capture error
        if response is not None:
            word_count, complexity = analyze_response(response)
            results[model] = { 
                "prompt": prompt,
                "response": response,
                "word_count": word_count,
                "complexity": complexity,
                "generation_time": generation_time,
                "error": error,
            }
        else:
            results[model] = {
                "prompt": prompt,
                "response": None,
                "word_count": None,
                "complexity": None,
                "generation_time": generation_time,
                "error": error,
            }

    print("\nComparison of Model Outputs:")
    for model, data in results.items(): 
        print(f"\nModel: {model}")
        print(f"\n  Prompt: {data['prompt']}") 
        if data["error"]:
            print(f"    Error: {data['error']}")
        else:
            print(f"    Response: {data['response']}")
            print(f"    Word Count: {data['word_count']}")
            print(f"    Complexity (avg words per sentence): {data['complexity']:.2f}")
            print(f"    Generation Time: {data['generation_time']:.2f} seconds")

    return results  # Return the results dictionary, which includes the error

In [None]:
print("Model pull, prompt and response comparison in progress...\n")
results = compare_models(prompt, models) # Pass the single prompt

# You can further analyze the results here, e.g., save to a JSON file, etc.

timestamp = time.strftime("%Y%m%d-%H%M%S")  # Generate timestamp
filename_json = f"model_comparison_results_{timestamp}.json"  # Include timestamp in filename
filename_md = f"model_comparison_results_{timestamp}.md"

with open(filename_json, "w") as f_json:
    json.dump(results, f_json, indent=4) # indent for pretty printing

with open(filename_md, "w", encoding="utf-8") as f_md:
    f_md.write("# Model Comparison Results\n\n")
    for model, data in results.items(): # Iterate directly over the models and their data
        f_md.write(f"## Model: {model}\n\n")
        f_md.write(f"### Prompt: {data['prompt']}\n\n")
        if data["error"]:
            f_md.write(f"**Error**: {data['error']}\n\n")
        else:
            f_md.write(f"**Response**: {data['response']}\n\n")
            f_md.write(f"**Word Count**: {data['word_count']}\n\n")
            f_md.write(f"**Complexity (avg words per sentence)**: {data['complexity']:.2f}\n\n")
            f_md.write(f"**Generation Time**: {data['generation_time']:.2f} seconds\n\n")
print(f"\nResults saved to {filename_json} and {filename_md}")