# Chain of Thought Self-Consistency

CoT를 여러번 수행 후 투표하여 가장 많은 점수 or 투표를 받은 정답을 선택하는 방법

In [1]:
from langchain_openai import ChatOpenAI
from typing import Union
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field


For example, replace imports like: `from langchain_core.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. 	from pydantic.v1 import BaseModel

  exec(code_obj, self.user_global_ns, self.user_ns)


In [2]:
model = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.8) # CoT 는 다양한 샘플을 만들어야하기 때문에 temperature를 올려야함

In [3]:
class CoT(BaseModel):
    thought: str = Field(description="Step-by-Step Thought Process")
    answer: Union[int, float] = Field(description="Answer")

In [4]:
parser = JsonOutputParser(pydantic_object=CoT)
format_instructions = parser.get_format_instructions()
format_instructions

'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"thought": {"title": "Thought", "description": "Step-by-Step Thought Process", "type": "string"}, "answer": {"title": "Answer", "description": "Answer", "anyOf": [{"type": "integer"}, {"type": "number"}]}}, "required": ["thought", "answer"]}\n```'

In [5]:
from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate

human_prompt_template = HumanMessagePromptTemplate.from_template(
                            "problem: {input}\n{format_instructions}",
                            partial_variables={"format_instructions": format_instructions})

prompt = ChatPromptTemplate.from_messages(
    [
        human_prompt_template,
    ])

In [6]:
cot_chain = prompt | model | parser

In [7]:
query = "(123 + 49) * 20 + 293 + 1 ="

In [8]:
result = cot_chain.invoke({"input": query})

In [9]:
result

{'thought': '(123 + 49) * 20 + 293 + 1', 'answer': 3061}

In [10]:
result['answer']

3061

In [11]:
n_sample = 10

In [12]:
[{"input": query}] * n_sample

[{'input': '(123 + 49) * 20 + 293 + 1 ='},
 {'input': '(123 + 49) * 20 + 293 + 1 ='},
 {'input': '(123 + 49) * 20 + 293 + 1 ='},
 {'input': '(123 + 49) * 20 + 293 + 1 ='},
 {'input': '(123 + 49) * 20 + 293 + 1 ='},
 {'input': '(123 + 49) * 20 + 293 + 1 ='},
 {'input': '(123 + 49) * 20 + 293 + 1 ='},
 {'input': '(123 + 49) * 20 + 293 + 1 ='},
 {'input': '(123 + 49) * 20 + 293 + 1 ='},
 {'input': '(123 + 49) * 20 + 293 + 1 ='}]

In [13]:
answers = cot_chain.batch([{"input": query}] * n_sample)

In [14]:
len(answers)

10

In [15]:
answers[:3]

[{'thought': 'First, perform the addition operation within the parentheses: 123 + 49 = 172. Then, multiply the result by 20: 172 * 20 = 3440. Finally, add 293 and 1 to get the final answer: 3440 + 293 + 1 = 3734.',
  'answer': 3734},
 {'thought': '(123 + 49) * 20 + 293 + 1', 'answer': 3011},
 {'thought': '(123 + 49) * 20 + 293 + 1', 'answer': 3151}]

In [16]:
import pandas as pd

In [17]:
df = pd.DataFrame(answers)

In [18]:
df

Unnamed: 0,thought,answer
0,"First, perform the addition operation within t...",3734
1,(123 + 49) * 20 + 293 + 1,3011
2,(123 + 49) * 20 + 293 + 1,3151
3,"First, calculate the sum of 123 + 49 which equ...",3440
4,"First, perform the calculation inside the pare...",3734
5,(123 + 49) * 20 + 293 + 1,3224
6,(123 + 49) * 20 + 293 + 1,3074
7,(123 + 49) * 20 + 293 + 1,2663
8,(123 + 49) * 20 + 293 + 1,2754
9,(123 + 49) * 20 + 293 + 1,2881


In [19]:
df['answer'].mode()

0    3734
Name: answer, dtype: int64