In [None]:
!pip install -U dspy

In [4]:
import dspy
import os


In [6]:
model_name = "Qwen/Qwen3-Next-80B-A3B-Instruct"
api_key = "token-abc123"
model = dspy.LM(model=f"openai/{model_name}", 
            api_base="http://211.47.56.81:7972/v1", 
            api_key=api_key, temperature=0.6
            )
dspy.settings.configure(lm=model)

In [12]:
class MathQASignature(dspy.Signature):
    """주어진 수학 지시문에 대해 풀이 과정을 설명하고 최종 답을 응답"""
    instruction = dspy.InputField(desc="수학 문제에 대한 지시문")    
    reasoning_steps = dspy.OutputField(desc="풀이 과정")
    final_answer = dspy.OutputField(desc="정답")

class MathQASignature(dspy.Signature):
    """주어진 수학 지시문에 대해 풀이 과정을 설명하고 최종 답을 응답"""
    instruction = dspy.InputField(desc="수학 문제에 대한 지시문")
    reasoning_steps = dspy.OutputField(desc="풀이 과정. '1단계: 1 + 1 = 2\n'와 같은 형식으로 작성")
    final_answer = dspy.OutputField(desc="정답")
    



In [13]:
class GenerateAnswer(dspy.Module):
    def __init__(self):
        super().__init__()
        self.CoT = dspy.ChainOfThought(MathQASignature)
    
    def forward(self, instruction):
        prediction = self.CoT(instruction=instruction)
        return dspy.Prediction(
        	reasoning_steps=prediction.reasoning_steps,
            final_answer=prediction.final_answer
        )


In [14]:
predictor = GenerateAnswer()

prediction = predictor(instruction="(3-5)*3+1=?")
print("Reasoning Steps:", prediction.reasoning_steps)
print("Final Answer:", prediction.final_answer)

Reasoning Steps: 1단계: 3 - 5 = -2
2단계: -2 * 3 = -6
3단계: -6 + 1 = -5
Final Answer: -5


In [15]:
trainset = [
    dspy.Example(
        instruction="7과 8을 곱한 뒤, 5를 더하세요.",
        reasoning_steps="1단계: 7 × 8 = 56\n2단계: 56 + 5 = 61",
        final_answer="61"
    ).with_inputs("instruction"),
    dspy.Example(
        instruction="15에서 9를 뺀 다음, 4를 곱하세요.",
        reasoning_steps="1단계: 15 - 9 = 6\n2단계: 6 × 4 = 24",
        final_answer="24"
    ).with_inputs("instruction"),
    dspy.Example(
        instruction="6을 세 번 더한 값에 2를 곱하세요.",
        reasoning_steps="1단계: 6 + 6 + 6 = 18\n2단계: 18 × 2 = 36",
        final_answer="36"
    ).with_inputs("instruction"),
    dspy.Example(
        instruction="12를 3으로 나눈 뒤, 거기에 7을 더하세요.",
        reasoning_steps="1단계: 12 ÷ 3 = 4\n2단계: 4 + 7 = 11",
        final_answer="11"
    ).with_inputs("instruction"),
    dspy.Example(
        instruction="5와 9를 더한 다음, 그 결과에 2를 빼세요.",
        reasoning_steps="1단계: 5 + 9 = 14\n2단계: 14 - 2 = 12",
        final_answer="12"
    ).with_inputs("instruction"),
]

devset = [
    dspy.Example(
        instruction="9와 4를 더한 뒤, 6을 곱하세요.",
        reasoning_steps="1단계: 9 + 4 = 13\n2단계: 13 × 6 = 78",
        final_answer="78"
    ).with_inputs("instruction"),
    
    dspy.Example(
        instruction="20을 5로 나눈 다음, 3을 더하세요.",
        reasoning_steps="1단계: 20 ÷ 5 = 4\n2단계: 4 + 3 = 7",
        final_answer="7"
    ).with_inputs("instruction"),
    
    dspy.Example(
        instruction="10에서 3을 뺀 후, 그 결과를 2로 나누세요.",
        reasoning_steps="1단계: 10 - 3 = 7\n2단계: 7 ÷ 2 = 3.5",
        final_answer="3.5"
    ).with_inputs("instruction"),
]

In [16]:
from dspy.teleprompt import BootstrapFewShot

class Assess(dspy.Signature):
    """모델의 응답을 평가"""
    assessed_text = dspy.InputField(desc="평가 대상")
    assessment_question = dspy.InputField(desc="평가 질문")
    assessment_answer = dspy.OutputField(desc="예/아니오")

def metric(example, pred, trace=None):
    answer, reasoning_steps = example.final_answer, pred.reasoning_steps

    # 평가 질문 정의
    correct = f"최종 답변이 '{answer}'와 일치하는가? (예/아니오)"
    formatting = "풀이 과정이 형식을 준수하는가? (예/아니오)"
	
    # 평가 수행
    correct_result = dspy.Predict(Assess)(assessed_text=reasoning_steps, assessment_question=correct)
    formatting_result = dspy.Predict(Assess)(assessed_text=reasoning_steps, assessment_question=formatting)

    # 답변 저장
    correct_bool, formatting_bool = [m.assessment_answer.strip() == '예' for m in [correct_result, formatting_result]]

    # 두 가지 평가 기준을 모두 만족하면 2점, 아니면 0점
    score = (correct_bool + formatting_bool) if correct_bool and formatting_bool else 0

    if trace is not None:
        return score >= 2  # 최적화 과정 : 모든 조건을 충족하는지 확인

    return score / 2.0  # 평가 과정 : 0 또는 1 반환

teleprompter = BootstrapFewShot(metric=metric)
compiled_program = teleprompter.compile(GenerateAnswer(), trainset=trainset)

 80%|████████  | 4/5 [00:05<00:01,  1.27s/it]

Bootstrapped 4 full traces after 4 examples for up to 1 rounds, amounting to 4 attempts.





In [17]:
from dspy.evaluate import Evaluate

evaluation = Evaluate(devset=devset, metric=metric, display_progress=True, display_table=True)
eval_result = evaluation(compiled_program)

print(f"Evaluation result: {eval_result}")

Average Metric: 3.00 / 3 (100.0%): 100%|██████████| 3/3 [00:02<00:00,  1.46it/s]

2025/11/30 05:22:56 INFO dspy.evaluate.evaluate: Average Metric: 3.0 / 3 (100.0%)





Unnamed: 0,instruction,example_reasoning_steps,example_final_answer,pred_reasoning_steps,pred_final_answer,metric
0,"9와 4를 더한 뒤, 6을 곱하세요.",1단계: 9 + 4 = 13\n2단계: 13 × 6 = 78,78.0,1단계: 9 + 4 = 13\n2단계: 13 × 6 = 78,78.0,✔️ [1.000]
1,"20을 5로 나눈 다음, 3을 더하세요.",1단계: 20 ÷ 5 = 4\n2단계: 4 + 3 = 7,7.0,1단계: 20 ÷ 5 = 4\n2단계: 4 + 3 = 7,7.0,✔️ [1.000]
2,"10에서 3을 뺀 후, 그 결과를 2로 나누세요.",1단계: 10 - 3 = 7\n2단계: 7 ÷ 2 = 3.5,3.5,1단계: 10 - 3 = 7\n2단계: 7 ÷ 2 = 3.5,3.5,✔️ [1.000]


Evaluation result: EvaluationResult(score=100.0, results=<list of 3 results>)


In [None]:
instruction = "(3-5)*3+1=?"
prediction = compiled_program(instruction=instruction)

print(f"Question: {instruction}", end="\n")
print(f"Reasoning: {prediction.reasoning_steps}")
print(f"Answer: {prediction.final_answer}")
