# Python is All You Need: Rephrasing Question and Instruction Tasks as Programming Problems

> How to write great nbdev notebooks

- order: 2

First, make sure to start the vllm server with your model of choice:
```sh
CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.api_server --model codellama/CodeLlama-34b-Instruct-hf --port 8000
CUDA_VISIBLE_DEVICES=0,3,6,7 python -m vllm.entrypoints.openai.api_server --model codellama/CodeLlama-70b-Instruct-hf --port 8000 --tensor-parallel-size 8
```
CUDA_VISIBLE_DEVICES=0 python3 -m sglang.launch_server --model-path liuhaotian/llava-v1.5-7b --tokenizer-path llava-hf/llava-1.5-7b-hf --port 30000

In [None]:
#| hide
from datasets import load_dataset
from ersatz.interactions import RephraseQAorInstructResponse
from ersatz.symbolic_representers import *
import dspy

lm = dspy.HFClientVLLM(
    model="codellama/CodeLlama-70b-Instruct-hf",
    port=8000,
    url="http://localhost",
    max_tokens=256
)

dspy.settings.configure(lm=lm)
ds = load_dataset("tatsu-lab/alpaca")

In [None]:
import ast
import re

def extract_markdown_code(markdown_string):
    """
    Extracts code blocks from a Markdown string, supporting both standard
    and language-specific code fences.

    Args:
        markdown_string: The Markdown string containing code blocks.

    Returns:
        A list of extracted code blocks.
    """

    code_block_pattern = r'`(?:python)?\n(.*?)\n`'  # Regex pattern
    code_blocks = re.findall(code_block_pattern, markdown_string, flags=re.DOTALL)

    return code_blocks

def is_parsable(code):
    """Checks if the given code string is valid Python syntax."""
    try:
        ast.parse(code)
        return True
    except SyntaxError:
        return False

def is_runnable(code):
    """Checks if the given code string is runnable without errors."""
    try:
        # Using exec for dynamic execution
        exec(code)
        return True
    except Exception as e:
        print(f"Error running code block: {e}")
        return False

In [None]:
REPHRASE_LIKE = """\
Rephrase the response to be an executable python script that solves the instruction.
Make sure you end the code with a print statement and surround the code with ```.\
"""
code_rephraser = dspy.Predict(RephraseQAorInstructResponse)
def rephrase_alpaca(example):
    pred = code_rephraser(
        original_q_or_instruct=example["instruction"] + "\n" + example["input"],
        original_a_or_response=example["output"],
        rephrase_like=REPHRASE_LIKE,
    )
    
    code = extract_markdown_code(pred["rephrased_a_or_response"])
    if len(code) > 0:
        code = code[0]
        return {
            "code": code,
            "parsable": is_parsable(code),
            "runnable": None, # is_runnable(code),
        }
    else:
        return {"code": pred["rephrased_a_or_response"], "parsable": False, "runnable": None}

dataset = ds.map(
    lambda x: rephrase_alpaca(x),
    num_proc=90
)

Map (num_proc=90):   0%|          | 0/52002 [00:00<?, ? examples/s]

  response = send_hfvllm_request_v00(
  response = send_hfvllm_request_v00(
  response = send_hfvllm_request_v00(
  response = send_hfvllm_request_v00(
  response = send_hfvllm_request_v00(
  response = send_hfvllm_request_v00(
  response = send_hfvllm_request_v00(
  response = send_hfvllm_request_v00(
  response = send_hfvllm_request_v00(
  response = send_hfvllm_request_v00(
  response = send_hfvllm_request_v00(
  response = send_hfvllm_request_v00(
  response = send_hfvllm_request_v00(
  response = send_hfvllm_request_v00(
  response = send_hfvllm_request_v00(
  response = send_hfvllm_request_v00(
  response = send_hfvllm_request_v00(
  response = send_hfvllm_request_v00(
  response = send_hfvllm_request_v00(
  response = send_hfvllm_request_v00(
  response = send_hfvllm_request_v00(
  response = send_hfvllm_request_v00(
  response = send_hfvllm_request_v00(
  response = send_hfvllm_request_v00(
  response = send_hfvllm_request_v00(
  response = send_hfvllm_request_v00(
  response =

In [None]:
dataset.push_to_hub("ncoop57/code_rephrased_alpaca", private=True)

Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/53 [00:00<?, ?ba/s]

Deleting unused files from dataset repository:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading metadata:   0%|          | 0.00/653 [00:00<?, ?B/s]

In [None]:
dataset["train"][-2]

{'instruction': 'Generate a list of five things one should keep in mind when considering a career change.',
 'input': '',
 'output': '1. Research potential opportunities and carefully consider options.\n2. Ensure the new career is truly a good fit.\n3. Understand the financial and emotional implications of a career change.\n4. Spend time developing necessary skills and expertise.\n5. Take into account any personal obligations or familial constraints that may affect your choices.',
 'text': 'Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\nGenerate a list of five things one should keep in mind when considering a career change.\n\n### Response:\n1. Research potential opportunities and carefully consider options.\n2. Ensure the new career is truly a good fit.\n3. Understand the financial and emotional implications of a career change.\n4. Spend time developing necessary skills and expertise.\n5. Take into account

In [None]:
idx = 0
pred = code_rephraser(
    original_q_or_instruct=ds["train"][idx]["instruction"] + "\n" + ds["train"][idx]["input"],
    original_a_or_response=ds["train"][idx]["output"],
    rephrase_like=REPHRASE_LIKE
    # "Rephrase the response to be an executable python script that solves the instruction. Make sure you end the code with a print statement and surround the code with ```"
)
print(ds["train"][idx]["instruction"] + "\n" + ds["train"][idx]["input"])
print("="*40)
# print(pred["rephrased_q_or_instruct"])
# print("="*40)
print(ds["train"][idx]["output"])
print("="*40)
# print(pred["rephrased_a_or_response"].split("```python")[-1].split("```")[0])
code = extract_markdown_code(pred["rephrased_a_or_response"])[0]
print(code)
print(is_parsable(code))
print(is_runnable(code))