# Self-Consistency Prompting

This notebook demonstrates self-consistency prompting by generating multiple reasoning paths and using majority voting to find the most reliable answer.

Each method will be evaluated with clear inputs and outputs to understand the process.

In [None]:
# customize model parameters to invoke different responses
# use different reasoning templates to create diversity in responses

In [7]:
import random
from collections import Counter
import re
import json

# Helper function to pretty print data
def print_io(method_name, inputs, outputs):
    """Print method inputs and outputs in a clear format"""
    print(f"\n{'='*60}")
    print(f"METHOD: {method_name}")
    print(f"{'='*60}")
    print(f"INPUTS: {json.dumps(inputs, indent=2)}")
    print(f"\nOUTPUTS: {json.dumps(outputs, indent=2)}")
    print(f"{'='*60}\n")

In [None]:
class SelfConsistencyPrompter:
    def __init__(self, verbose=True):
        self.verbose = verbose
        self.reasoning_templates = [ # invoke reasoning through different templates to create diversity in responses
            "Let's think step by step: {question}",
            "I'll solve this systematically: {question}",
            "Breaking it down: {question}",
            "Mathematical approach: {question}",
            "Logical reasoning: {question}"
        ]
    
    def simulate_llm_reasoning(self, question, method):
        """
        Simulate different LLM reasoning paths
        INPUT: question (str), method (str)
        OUTPUT: reasoning text (str)
        """
        outputs = {
            "sequential": f"Sarah starts with 5 apples. Gives 2 to Mark: 5-2=3. Buys 4 more: 3+4=7. Eats 1: 7-1=6. Answer: 6",
            "net_change": f"Net change: -2 (gave) +4 (bought) -1 (ate) = +1. Starting with 5: 5+1=6. Answer: 6",
            "grouping": f"Give and eat: -2-1=-3. Buy: +4. Net: +1. 5+1=6. Answer: 6",
            "final_calculation": f"Start:5, after giving:3, after buying:7, after eating:6. Answer: 6",
            "error": f"Sarah has 5, gives 2: 5-2=3, buys 4: 3+4=7, Answer: 7"  # Forgot eating
        }
        
        result = outputs.get(method, outputs["error"])
        return result
    
    def extract_answer(self, reasoning_text):
        """
        Extract final numerical answer from reasoning text
        INPUT: reasoning_text (str)
        OUTPUT: answer (int or None)
        """
        patterns = [
            r'Answer:\s*(\d+)',
            r'answer is\s*(\d+)',
            r'(\d+)\s*apples?$',
            r'(\d+)$'
        ]
        
        for pattern in patterns:
            matches = re.findall(pattern, reasoning_text.lower())
            if matches:
                return int(matches[-1])
        
        return None
    
    def generate_paths(self, question, num_paths=5):
        """
        Generate multiple reasoning paths
        INPUT: question (str), num_paths (int)
        OUTPUT: paths (list of str)
        """
        paths = []
        methods = ["sequential", "net_change", "grouping", "final_calculation", "error"]
        
        for i in range(num_paths):
            template = random.choice(self.reasoning_templates)
            method = methods[i % len(methods)]
            reasoning = self.simulate_llm_reasoning(question, method)
            paths.append(reasoning)
        
        return paths
    
    def get_consensus(self, question, num_paths=5):
        """
        Main self-consistency method - generates multiple paths and uses majority voting
        INPUT: question (str), num_paths (int)
        OUTPUT: majority_answer (int), paths (list), confidence (float), answer_counts (Counter)
        """
        paths = self.generate_paths(question, num_paths)
        answers = []
        
        for i, path in enumerate(paths, 1):
            answer = self.extract_answer(path)
            answers.append(answer)
        
        # Remove None values
        valid_answers = [a for a in answers if a is not None]
        
        if not valid_answers:
            return None, paths, 0, {}
        
        # Majority voting
        answer_counts = Counter(valid_answers)
        majority_answer, count = answer_counts.most_common(1)[0]
        confidence = count / len(valid_answers)
        
        return majority_answer, paths, confidence, answer_counts

In [9]:
# STEP 1: Initialize
print("\n" + "="*80)
print("STEP 1: INITIALIZE PROMPTER")
print("="*80)

prompter = SelfConsistencyPrompter(verbose=False)
question = "Sarah has 5 apples. She gives 2 to Mark and buys 4 more. Then she eats 1. How many apples does she have now?"

print(f"Question: {question}\n")


STEP 1: INITIALIZE PROMPTER
Question: Sarah has 5 apples. She gives 2 to Mark and buys 4 more. Then she eats 1. How many apples does she have now?



In [None]:
# STEP 2: Generate multiple reasoning paths
print("\n" + "="*80)
print("STEP 2: GENERATE MULTIPLE REASONING PATHS")
print("="*80)

# individual calls for each reasoning path
paths = prompter.generate_paths(question, num_paths=5)

for i, path in enumerate(paths, 1):
    print(f"\nPath {i}:")
    print(f"  {path}")

# in practice you probably want to standardize a format and parse a dictionary or json instead of strings!!


STEP 2: GENERATE MULTIPLE REASONING PATHS

Path 1:
  Sarah starts with 5 apples. Gives 2 to Mark: 5-2=3. Buys 4 more: 3+4=7. Eats 1: 7-1=6. Answer: 6

Path 2:
  Net change: -2 (gave) +4 (bought) -1 (ate) = +1. Starting with 5: 5+1=6. Answer: 6

Path 3:
  Give and eat: -2-1=-3. Buy: +4. Net: +1. 5+1=6. Answer: 6

Path 4:
  Start:5, after giving:3, after buying:7, after eating:6. Answer: 6

Path 5:
  Sarah has 5, gives 2: 5-2=3, buys 4: 3+4=7, Answer: 7


In [11]:
# STEP 3: Extract answers from each path
print("\n" + "="*80)
print("STEP 3: EXTRACT NUMERICAL ANSWERS FROM EACH PATH")
print("="*80)

extracted_answers = []
for i, path in enumerate(paths, 1):
    answer = prompter.extract_answer(path)
    extracted_answers.append(answer)
    print(f"\nPath {i}:")
    print(f"  Reasoning: {path}")
    print(f"  Extracted Answer: {answer}")


STEP 3: EXTRACT NUMERICAL ANSWERS FROM EACH PATH

Path 1:
  Reasoning: Sarah starts with 5 apples. Gives 2 to Mark: 5-2=3. Buys 4 more: 3+4=7. Eats 1: 7-1=6. Answer: 6
  Extracted Answer: 6

Path 2:
  Reasoning: Net change: -2 (gave) +4 (bought) -1 (ate) = +1. Starting with 5: 5+1=6. Answer: 6
  Extracted Answer: 6

Path 3:
  Reasoning: Give and eat: -2-1=-3. Buy: +4. Net: +1. 5+1=6. Answer: 6
  Extracted Answer: 6

Path 4:
  Reasoning: Start:5, after giving:3, after buying:7, after eating:6. Answer: 6
  Extracted Answer: 6

Path 5:
  Reasoning: Sarah has 5, gives 2: 5-2=3, buys 4: 3+4=7, Answer: 7
  Extracted Answer: 7


In [12]:
# STEP 4: Filter valid answers and count occurrences
print("\n" + "="*80)
print("STEP 4: FILTER VALID ANSWERS & COUNT OCCURRENCES")
print("="*80)

valid_answers = [a for a in extracted_answers if a is not None]
answer_counts = Counter(valid_answers)

print(f"\nTotal paths: {len(paths)}")
print(f"Valid answers extracted: {len(valid_answers)}")
print(f"\nAnswer Distribution:")
for answer, count in sorted(answer_counts.items()):
    print(f"  Answer {answer}: appears {count} time(s)")


STEP 4: FILTER VALID ANSWERS & COUNT OCCURRENCES

Total paths: 5
Valid answers extracted: 5

Answer Distribution:
  Answer 6: appears 4 time(s)
  Answer 7: appears 1 time(s)


In [13]:
# STEP 5: Apply majority voting to find consensus
print("\n" + "="*80)
print("STEP 5: APPLY MAJORITY VOTING FOR CONSENSUS")
print("="*80)

majority_answer, count = answer_counts.most_common(1)[0]
confidence = count / len(valid_answers)

print(f"\nMajority Answer: {majority_answer}")
print(f"Votes: {count} out of {len(valid_answers)}")
print(f"Confidence Score: {confidence:.2%}")


STEP 5: APPLY MAJORITY VOTING FOR CONSENSUS

Majority Answer: 6
Votes: 4 out of 5
Confidence Score: 80.00%


In [14]:
# STEP 6: Final result
print("\n" + "="*80)
print("STEP 6: FINAL CONSENSUS RESULT")
print("="*80)

print(f"\n{'Question:':<20} {question}")
print(f"{'Final Answer:':<20} {majority_answer} apples")
print(f"{'Confidence:':<20} {confidence:.2%}")
print(f"{'Reasoning:':<20} {majority_answer} is the most consistent answer across {count}/{len(valid_answers)} reasoning paths")
print("\n" + "="*80)


STEP 6: FINAL CONSENSUS RESULT

Question:            Sarah has 5 apples. She gives 2 to Mark and buys 4 more. Then she eats 1. How many apples does she have now?
Final Answer:        6 apples
Confidence:          80.00%
Reasoning:           6 is the most consistent answer across 4/5 reasoning paths

