In [None]:
import ollama

In [None]:
def pull_model_if_needed(model_name):
    """Pulls the specified model from Ollama if it doesn't exist."""
    try:
        ollama.pull(model=model_name)
        print(f"Model '{model_name}' pulled successfully/already existed.")
        return True
    except Exception as e:
        print(f"Error pulling model '{model_name}': {e}")
        return False

In [None]:
def run_prompt(model_name, prompt):
    """Runs a prompt on a given model and returns the response."""
    try:
        response = ollama.chat(
            model=model_name,
            messages=[{'role': 'user', 'content': prompt}]
        )
        return response['message']['content'].strip()
    except Exception as e:
        print(f"Error running prompt on model '{model_name}': {e}")
        return None

In [None]:
models_to_test = ["mistral", "llama3.1", "granite3.3"]  # Adjust based on your setup
logic_puzzles = [
    {
        "question": "If John is taller than Mary, and Mary is taller than Sue, who is the tallest?",
        "expected_answer": "John is the tallest."
    },
    {
        "question": "There are 5 apples and you take away 3. How many apples do you have?",
        "expected_answer": "You have 3 apples."
    },
    {
        "question": "A farmer has 17 sheep, and all but nine die. How many sheep are left?",
        "expected_answer": "9 sheep are left."
    },
    {
        "question": "If all bloops are floops, and some floops are gloops, are all bloops gloops?",
        "expected_answer": "No, not necessarily."
    },
    {
        "question": "A train leaves New York at 8 am traveling at 60 mph. Another train leaves Chicago at 9 am traveling at 70 mph. Assuming they are on the same track heading towards each other and the distance between New York and Chicago is 800 miles, at what time will they meet?",
        "expected_answer": "The two trains will meet at approximately 2:42 PM." 
    }
]

print("--- Testing Reasoning and Logic in Ollama Models ---")

for model_name in models_to_test:
    print(f"\n--- Testing model: {model_name} ---")
    if not pull_model_if_needed(model_name):
        continue

    for puzzle in logic_puzzles:
        question = puzzle["question"]
        expected_answer = puzzle["expected_answer"]

        # Test with standard prompt
        standard_prompt = question
        answer_standard = run_prompt(model_name, standard_prompt)
        print(f"\nQuestion: {question}")
        print(f"Expected Answer: {expected_answer}")
        print(f"\nAnswer (Standard Prompt):\n {answer_standard}")

        # Test with "Let's think step-by-step" prompt
        step_by_step_prompt = f"Let's think step-by-step. {question}"
        answer_step_by_step = run_prompt(model_name, step_by_step_prompt)
        print(f"\nAnswer (Step-by-Step Prompt):\n {answer_step_by_step}")

print("\n--- Observation ---")
print("Observe which models successfully solve the logic puzzles.")
print("Note if the 'Let's think step-by-step' prompt helps the model by showing its reasoning process (or maybe not).")
print("Identify cases where the models fail and try to understand the nature of the failure (e.g., misunderstanding the logic, calculation errors).")