In [None]:
from dotenv import load_dotenv
import os
from datasets import load_dataset
import json
from openai import OpenAI

load_dotenv()
DEEPSEEK_API_KEY = os.getenv("DEEPSEEK_API_KEY")

In [None]:
#https://api-docs.deepseek.com/

client = OpenAI(
    api_key=DEEPSEEK_API_KEY,
    base_url="https://api.deepseek.com"
)


In [None]:
ds = load_dataset("openai/gsm8k", "main")

In [None]:
nb_samples = ds['train']
print(f"Number of training samples: {len(nb_samples)}")
subset_samples = ds['train'].shuffle(seed=0).select(range(5))
print(f"Number of subset samples: {len(subset_samples)}")
print(f"Example sample: {subset_samples[0]}")

In [None]:
SYSTEM_PROMPT = """
You are a mathematician. You will be given a question, and you will provide a step-by-step reasoning process to arrive at the final answer. 
Before providing the final answer, make sure to think through the problem carefully and show all your work.
Your final answer should be \n#### <answer>
"""
USER_PROMPT = """
Question: {question}
"""

In [None]:
SAVE_JSON_PATH = "cold_start_results.jsonl"

for sample in subset_samples:
    question = sample['question']
    messages=[
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": USER_PROMPT.format(question=question)},
    ]
    response = client.chat.completions.create(
        model="deepseek-reasoner",
        messages=messages
    )
    print(response)
    reasoning_content = response.choices[0].message.reasoning_content
    actual_answer = response.choices[0].message.content
    combined_answer = "<think>" + reasoning_content + "</think>\n" + actual_answer
    messages.append({"role": "assistant", "content": combined_answer})
    print(f"Question: {question}")
    print(f"Answer: {combined_answer}")
    print("\n" + "="*50 + "\n")


    ## Save the messages to a JSONL file
    message_dict = {
        "messages": messages
    }
    with open(SAVE_JSON_PATH, "a") as f:
        f.write(json.dumps(message_dict) + "\n")
