# Setup

## Package Installation

In [3]:
#!pip install --upgrade pip
#!pip install transformers==4.37.0
#!pip install torch torchvision torchaudio
#!pip install tqdm
!pip install datasets

Collecting datasets
  Downloading datasets-3.5.0-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.12.0,>=2023.1.0 (from fsspec[http]<=2024.12.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.12.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.5.0-py3-none-any.whl (491 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.2/491.2 kB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m10.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.12.0-py3-none-any.w

In [4]:
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModelForCausalLM

import os
import json
import logging
from tqdm import tqdm
from datasets import load_dataset

# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# Create directories for outputs
os.makedirs("results", exist_ok=True)

## Data Setup

In [5]:
# TODO: Set up a better prompt template to encourage step-by-step reasoning
PROMPT_TEMPLATE = """Generate a detailed step-by-step solution for this coding problem.
Break down your thought process into clear steps, explaining your reasoning at each stage.

Problem:
{problem}

Step-by-step solution:"""

In [6]:
class CodingCoTDataset(Dataset):
  def __init__(self, examples, tokenizer, max_length=512):
    self.examples = examples
    self.tokenizer = tokenizer
    self.max_length = max_length

  def __len__(self):
    return len(self.examples)

  def __getitem__(self, idx):
    example = self.examples[idx]
    problem = example["problem"]
    solution_cot = example["solution_cot"]

    full_text = PROMPT_TEMPLATE.format(problem=problem)
    full_text_with_output = full_text + solution_cot

    encoded = self.tokenizer(
        full_text_with_output,
        max_length=self.max_length,
        padding="max_length",
        truncation=True,
        return_tensors="pt"
    )

    # Create labels (same as input_ids but with -100 for prompt tokens)
    prompt_length = len(self.tokenizer(full_text, return_tensors="pt")["input_ids"][0])
    labels = encoded["input_ids"].clone()
    labels[0, :prompt_length] = -100  # Don't compute loss for prompt tokens

    return {
        "input_ids": encoded["input_ids"][0],
        "attention_mask": encoded["attention_mask"][0],
        "labels": labels[0]
    }

In [7]:
# Load MBPP dataset
def load_mbpp_dataset():
    print("Loading MBPP dataset...")
    mbpp = load_dataset("mbpp")
    problems = []

    # Extract problems from the MBPP dataset with correct field names
    for item in mbpp["train"]:
        problems.append({
            "problem": item["text"],
            "test_case": item["test_list"],
            "solution": item["code"]
        })

    print(f"Loaded {len(problems)} problems from MBPP dataset")
    return problems

In [8]:
# Generate CoT dataset from teacher model
def generate_cot_dataset(problem_dataset, teacher_model, teacher_tokenizer, num_examples=50):
    examples = []
    logger.info(f"Generating chain-of-thought solutions for {num_examples} problems...")

    # Take a subset of problems for efficiency
    problems_subset = problem_dataset[:num_examples]

    # Process problems with progress bar
    for i, problem in enumerate(tqdm(problems_subset, desc="Generating CoT solutions")):
        # Prompt for chain-of-thought reasoning
        prompt = PROMPT_TEMPLATE.format(problem=problem["problem"])

        # Generate the reasoning from the teacher model
        inputs = teacher_tokenizer(prompt, return_tensors="pt").to(teacher_model.device)

        try:
            with torch.no_grad():
                output = teacher_model.generate(
                    **inputs,
                    max_length=1024,
                    temperature=0.7,
                    do_sample=True,
                    top_p=0.9,
                    num_return_sequences=1
                )

            teacher_solution = teacher_tokenizer.decode(output[0], skip_special_tokens=True)
            solution_start_marker = "Step-by-step solution:"
            solution_start_idx = teacher_solution.find(solution_start_marker) + len(solution_start_marker)
            solution_cot = teacher_solution[solution_start_idx:].strip()

            examples.append({"problem": problem["problem"], "solution_cot": solution_cot})

            # Save a few examples for inspection
            if i < 2:
                print(f"\nExample {i+1}:")
                print(f"Problem: {problem['problem'][:150]}...")
                print(f"Solution (first 150 chars): {solution_cot[:150]}...")

            # Log progress details periodically
            if (i + 1) % 10 == 0:
                logger.info(f"Generated {i + 1}/{len(problems_subset)} solutions")

        except Exception as e:
            logger.error(f"Error generating solution for problem {i}: {e}")
            continue

    logger.info(f"Successfully generated {len(examples)} solutions")

    # Save the dataset
    with open("cot_dataset.json", "w") as f:
        json.dump(examples, f, indent=2)

    logger.info("Dataset saved to cot_dataset.json")
    return examples

# Planning Agent

## Models

In [9]:
# Load models
def load_models():
    teacher_model_name = "Qwen/Qwen2.5-7B-Instruct"
    student_model_name = "Qwen/Qwen2.5-0.5B"

    logger.info(f"Loading teacher model: {teacher_model_name}")
    teacher_tokenizer = AutoTokenizer.from_pretrained(teacher_model_name)
    teacher_model = AutoModelForCausalLM.from_pretrained(
        teacher_model_name,
        device_map="auto",
        torch_dtype=torch.float16
    )
    logger.info(f"Teacher model loaded successfully")

    logger.info(f"Loading student model: {student_model_name}")
    student_tokenizer = AutoTokenizer.from_pretrained(student_model_name)
    student_model = AutoModelForCausalLM.from_pretrained(
        student_model_name,
        device_map="auto"
    )
    logger.info(f"Student model loaded successfully")

    return teacher_model, teacher_tokenizer, student_model, student_tokenizer

# Main

In [10]:
print("Loading models...")
teacher_model, teacher_tokenizer, student_model, student_tokenizer = load_models()

print("Loading dataset...")
mbpp_problems = load_mbpp_dataset()

print("Generating dataset from teacher model outputs...")
# Generate the dataset or load existing one
if os.path.exists("cot_dataset.json"):
    logger.info("Loading existing dataset from cot_dataset.json")
    with open("cot_dataset.json", "r") as f:
        examples = json.load(f)
else:
    logger.info("Generating new dataset using teacher model")
    examples = generate_cot_dataset(mbpp_problems, teacher_model, teacher_tokenizer)

Loading models...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/7.30k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/663 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/27.8k [00:00<?, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/3.86G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/3.86G [00:00<?, ?B/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/3.95G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/3.56G [00:00<?, ?B/s]

Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/243 [00:00<?, ?B/s]



tokenizer_config.json:   0%|          | 0.00/7.23k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/681 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/988M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/138 [00:00<?, ?B/s]

Loading dataset...
Loading MBPP dataset...


README.md:   0%|          | 0.00/9.06k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/87.2k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/116k [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

prompt-00000-of-00001.parquet:   0%|          | 0.00/7.88k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/374 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/500 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/90 [00:00<?, ? examples/s]

Generating prompt split:   0%|          | 0/10 [00:00<?, ? examples/s]

Loaded 374 problems from MBPP dataset
Generating dataset from teacher model outputs...


Generating CoT solutions:   0%|          | 0/50 [00:00<?, ?it/s]ERROR:__main__:Error generating solution for problem 0: CUDA out of memory. Tried to allocate 1.02 GiB. GPU 0 has a total capacity of 14.74 GiB of which 378.12 MiB is free. Process 5733 has 14.37 GiB memory in use. Of the allocated memory 13.72 GiB is allocated by PyTorch, and 537.40 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
Generating CoT solutions:   2%|▏         | 1/50 [00:01<01:26,  1.76s/it]ERROR:__main__:Error generating solution for problem 1: CUDA out of memory. Tried to allocate 1.02 GiB. GPU 0 has a total capacity of 14.74 GiB of which 378.12 MiB is free. Process 5733 has 14.37 GiB memory in use. Of the allocated memory 13.72 GiB is allocated by PyTorch, and 537.65 MiB is 

In [13]:
for i, example in enumerate(mbpp_problems):
  print(f"Problem number: {i}")
  print(f"Problem: {example['problem']}")
  print("Test cases:")
  print(example['test_case'])
  print("Code Solution:")
  print(example['canonical_solution'])

Problem number: 0
Problem: Write a function to find the longest chain which can be formed from the given set of pairs.
Test cases:
['assert max_chain_length([Pair(5, 24), Pair(15, 25),Pair(27, 40), Pair(50, 60)], 4) == 3', 'assert max_chain_length([Pair(1, 2), Pair(3, 4),Pair(5, 6), Pair(7, 8)], 4) == 4', 'assert max_chain_length([Pair(19, 10), Pair(11, 12),Pair(13, 14), Pair(15, 16), Pair(31, 54)], 5) == 5']
Code Solution:
class Pair(object): 
	def __init__(self, a, b): 
		self.a = a 
		self.b = b 
def max_chain_length(arr, n): 
	max = 0
	mcl = [1 for i in range(n)] 
	for i in range(1, n): 
		for j in range(0, i): 
			if (arr[i].a > arr[j].b and
				mcl[i] < mcl[j] + 1): 
				mcl[i] = mcl[j] + 1
	for i in range(n): 
		if (max < mcl[i]): 
			max = mcl[i] 
	return max
Problem number: 1
Problem: Write a python function to find the first repeated character in a given string.
Test cases:
['assert first_repeated_char("abcabc") == "a"', 'assert first_repeated_char("abc") == "N