# Self-consistency

Let's use a smaller model to demonstrate the power of self-consistency:

In [27]:
from langchain_google_vertexai import ChatVertexAI
llm_small = ChatVertexAI(model_name="gemini-1.5-flash-001")

In [None]:
from langchain import hub
from langchain_core.output_parsers import StrOutputParser

math_cot_prompt = hub.pull("arietem/math_cot")
cot_chain = math_cot_prompt | llm_small | StrOutputParser()
print(cot_chain.invoke("Solve equation 2*x+5=15"))



Answer: Let's think step by step
1. Subtract 5 from both sides of the equation: 2x+5-5=15-5
2. Simplify: 2x=10
3. Divide both sides by 2: 2x/2 = 10/2
4. Simplify: x=5

<answer>5</answer>


In [29]:
from operator import itemgetter
from langchain_core.prompts import PromptTemplate


parse_prompt_template = (
    "Given the initial question and a full answer, "
    "extract the concise answer. Do not assume anything and "
    "only use a provided full answer.\n\nQUESTION:\n{question}\n"
    "FULL ANSWER:\n{full_answer}\n\nCONCISE ANSWER:\n"
)
parse_prompt = PromptTemplate.from_template(
    parse_prompt_template
)
final_chain = (
  {"full_answer": itemgetter("question") | cot_chain,
    "question": itemgetter("question"),
  }
  | parse_prompt
  | llm_small
  | StrOutputParser()
)

print(final_chain.invoke({"question": "Solve equation 2*x**2-96*x+1152"}))

24 



Let's run generation multiple times and sample the most frequest one from the distribution:

In [30]:
generations = []
for _ in range(20):
  generations.append(final_chain.invoke({"question": "Solve equation 2*x**2-96*x+1152"}, temperature=2.0).strip())

from collections import Counter

print(Counter(generations).most_common(1)[0][0])

x = 24
