In [1]:
"""
Build /mnt/ebs/code/forked-context-aware-decoding/eval/apps_tmp/random_2_-1.jsonl with apps data.
Where the with-context prompt would be containing random training examples
with their ground-truth code.
"""

'\nBuild /mnt/ebs/code/forked-context-aware-decoding/eval/apps_tmp/random_2_-1.jsonl with apps data.\nWhere the with-context prompt would be containing random training examples\nwith their ground-truth code.\n'

In [2]:
import pandas as pd
from tqdm import tqdm

In [3]:
def load_data(dataset, data_type):
    data_path_1 = f"/mnt/ebs/code/hallucination-mitigation/data/{dataset}/{data_type}_samples_1.jsonl"
    data_path_2 = f"/mnt/ebs/code/hallucination-mitigation/data/{dataset}/{data_type}_samples_2.jsonl"
    df_1 = pd.read_json(data_path_1, lines=True)
    df_2 = pd.read_json(data_path_2, lines=True)
    # combine the two dataframes
    df = pd.concat([df_1, df_2], ignore_index=True)
    return df

In [4]:
train_df = load_data("apps", "train")
test_df = load_data("apps", "test")

In [5]:
def build_dsc_context(problem):
    prompt = "\nQUESTION:\n"
    prompt += problem["question"]
    fn_name = problem["fn_name"]

    if not fn_name:
        call_format = "\nPlease write your code using Standard Input, i.e. input() and print()."
        prompt += call_format
    else:
        call_format = "\Please write your code using Call-Based format."
        prompt += call_format

    system_prompt = "You are an expert code developer with years of experience."
    user_prompt = f'''As an expert code developer with years of experience, please provide the python code based on the question. Ensure the code is enclosed within triple backticks (```) to mark the start and end of the code block.\n{prompt}'''

    prompt = f'''You are an AI programming assistant, utilizing the DeepSeek Coder model, developed by DeepSeek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer.
### Instruction:
{system_prompt}

{user_prompt}
### Response:
'''
    return prompt

In [6]:
def build_dsc_context_with_few_shot(test_problem, train_problems):
    prompt = "\nQUESTION:\n"
    prompt += test_problem["question"]
    fn_name = test_problem["fn_name"]

    if not fn_name:
        call_format = "\nPlease write your code using Standard Input, i.e. input() and print()."
        prompt += call_format
    else:
        call_format = "\Please write your code using Call-Based format."
        prompt += call_format

    # add few-shot examples
    example_prompt = "\n\nEXAMPLES:\n"
    for i in range(len(train_problems)):
        train_problem = train_problems.iloc[i]
        # print(train_problem.keys())
        example_prompt += f"\nExample {i+1}:\n"
        example_prompt += train_problem["question"]
        example_prompt += "\n\nAnswer:\n"
        # print(type(train_problem["solutions"]))
        example_prompt += train_problem["solutions"][0]

    system_prompt = "You are an expert code developer with years of experience. You have been provided with a few examples to help you answer the question."
    user_prompt = f'''As an expert code developer with years of experience, please provide the python code based on the question. You may consult the following example coding questions and their answers to provide the code. Ensure the code is enclosed within triple backticks (```) to mark the start and end of the code block.\n{example_prompt}\n{prompt}'''

    prompt = f'''You are an AI programming assistant, utilizing the DeepSeek Coder model, developed by DeepSeek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer.
### Instruction:
{system_prompt}

{user_prompt}
### Response:
'''
    return prompt

In [10]:
def build_random_data(num_shots, eval_indices):
    output = []

    for i in tqdm(eval_indices):
        test_problem = test_df.iloc[i]
        train_problems = train_df.sample(n=num_shots, random_state=i)

        line_with_context = {
            "input_index": test_problem["task_id"],
            "assigned_model": "deepseek-ai/deepseek-coder-6.7b-base",
            "assigned_process": 0,
            "filter_p": 1,
            "context_string": build_dsc_context_with_few_shot(test_problem, train_problems),
            "assigned_weight": 2
            # Note: I do not give gold answers here because I do not use it for evaluation.
            # I could also always access it using task_id.
        }

        line_without_context = {
            "input_index": test_problem["task_id"],
            "assigned_model": "deepseek-ai/deepseek-coder-6.7b-base",
            "assigned_process": 1,
            "context_string": build_dsc_context(test_problem),
            "assigned_weight": -1
        }

        output.append(line_with_context)
        output.append(line_without_context)

    # turn to pandas dataframe
    output_df = pd.DataFrame(output)
    output_df.to_json(f"/mnt/ebs/code/forked-context-aware-decoding/eval/apps/random_2_-1_{num_shots}shots_eval300.jsonl", orient="records", lines=True)

In [11]:
num_shots = 1
eval_indices = [912, 204, 2253, 2006, 1828, 1143, 839, 4467, 712, 4837, 3456, 260, 244, 767, 1791, 1905, 4139, 4931, 217, 4597, 1628, 4464, 3436, 1805, 3679, 4827, 2278, 53, 1307, 3462, 2787, 2276, 1273, 1763, 2757, 837, 759, 3112, 792, 2940, 2817, 4945, 2166, 355, 3763, 4392, 1022, 3100, 645, 4522, 2401, 2962, 4729, 1575, 569, 375, 1866, 2370, 653, 1907, 827, 3113, 2277, 3714, 2988, 1332, 3032, 2910, 1716, 2187, 584, 4990, 1401, 4375, 2005, 1338, 3786, 3108, 2211, 4562, 1799, 2656, 458, 1876, 262, 2584, 3286, 2193, 542, 1728, 4646, 2577, 1741, 4089, 3241, 3758, 1170, 2169, 2020, 4598, 4415, 2152, 4788, 3509, 4780, 3271, 2965, 1796, 1133, 4174, 4042, 744, 385, 898, 1252, 1310, 3458, 4885, 520, 3152, 3126, 4881, 3834, 4334, 2059, 4532, 94, 938, 4398, 2185, 2786, 913, 2404, 3561, 1295, 3716, 26, 2157, 4100, 1463, 4158, 871, 2444, 4988, 1629, 3063, 1323, 4418, 4344, 4, 4906, 2655, 4002, 159, 916, 2973, 2519, 1961, 474, 1973, 4647, 701, 3981, 566, 4363, 1030, 1051, 3893, 4503, 1352, 2171, 4322, 4969, 3466, 1735, 4417, 1647, 2553, 3268, 3059, 3588, 4239, 3698, 991, 2030, 1840, 524, 2769, 172, 4819, 4537, 1885, 4820, 1804, 58, 581, 482, 1875, 552, 257, 2706, 580, 4211, 1949, 2281, 3976, 1755, 1083, 4677, 4720, 3872, 1990, 3874, 3334, 1559, 772, 794, 3531, 2902, 3469, 3367, 3825, 443, 806, 496, 3298, 2779, 895, 2036, 1569, 1558, 4393, 3675, 1148, 1503, 3789, 2046, 617, 3630, 4508, 802, 414, 4428, 120, 764, 1936, 1362, 3329, 3978, 3943, 1751, 3285, 480, 1348, 3104, 17, 3198, 2172, 3727, 2336, 3465, 4552, 3986, 1268, 1555, 2430, 1783, 479, 4744, 4441, 499, 2569, 468, 410, 4785, 3905, 4119, 4350, 1289, 465, 4160, 656, 1522, 561, 4874, 556, 1926, 3307, 982, 4666, 2016, 4742, 4870, 325, 671, 3434, 4781, 4630, 4282, 2591]
build_random_data(num_shots, eval_indices)

100%|██████████| 300/300 [00:00<00:00, 1823.35it/s]


In [12]:
train_problems = train_df.sample(n=1, random_state=1)

In [13]:
train_problems

Unnamed: 0,task_id,fn_name,input_sample,question,solutions,difficulty,url
2764,2764,find_abc_sumsqcube,"[20, 8]",We are interested in collecting the triples of...,[from bisect import bisect_right as bisect\n\n...,introductory,https://www.codewars.com/kata/5618716a738b95ce...
