In [1]:
import requests
import random
import json
import os
import tqdm
import time

In [2]:
def retry_with_exponential_backoff(
    func,
    initial_delay: float = 5,
    exponential_base: float = 2,
    jitter: bool = True,
    max_retries: int = 5,
):
    """Retry a function with exponential backoff."""

    def wrapper(*args, **kwargs):
        # Initialize variables
        num_retries = 0
        delay = initial_delay

        # Loop until a successful response or max_retries is hit or an exception is raised
        while True:
            response = func(*args, **kwargs)

            # Retry on specified errors
            if response['message'] != "success":
                # Increment retries
                num_retries += 1

                # Check if max retries has been reached
                if num_retries > max_retries:
                    raise Exception("Maximum Retry Exceed")

                # Increment the delay
                delay *= exponential_base * (1 + jitter * random.random())

                # Sleep for the delay
                time.sleep(delay)
            else:
                return response

    return wrapper

@retry_with_exponential_backoff
def prompt_codegeex(prompt, temperature, top_p=None, n=5):
    """Make an api call to ChatGPT and write the respone to a file"""
    api_url = "https://wudao.aminer.cn/os/api/api/v2/multilingual_code/generate"
    # api_url = "https://maas.aminer.cn/api/paas/model/v2/open/engines/code-generate-block/codegeex-generate-block"
    
    headers = {"Content-Type": "application/json"}
    data = {"lang": "Python",
            "prompt": f"# language: Python\n\n{prompt}",
            "temperature": temperature,
            "top_p": top_p,
            "n":n}

    try:
        response = requests.post(api_url, headers=headers, json=data)
    except requests.exceptions as err:
        print(err)
        raise Exception("Error in Connection...")

    json_response = response.json()
    return json_response

In [3]:
from utils import load_json_dataset
he_data = load_json_dataset("dataset/humaneval/HumanEval.jsonl")

In [4]:
model_name = "codegeex-13B"
max_tokens = 350
temperature = 0.8
top_p = 1.0
n = 3
output_file_pth = f"dataset/humaneval/{model_name.replace('-','_')}-{temperature}-{top_p}-{max_tokens}-{n}_response.json"
# out_file = open(output_file_pth, "a")
print(f"write to {output_file_pth}")

write to dataset/humaneval/codegeex_13B-0.8-1.0-350-3_response.json


In [6]:
def generate_block(prompt, gen, temperature, top_p=None):
    response = prompt_codegeex(prompt+gen, temperature, top_p=top_p, n=1)
    try_cnt = 1
    while (len(response['result']['output']['code']) > 0 and
           response['result']['output']['code'][0] != "" and try_cnt < max_try_limit):
        gen = gen + response['result']['output']['code'][0]
        response = prompt_codegeex(prompt+gen, temperature, top_p=top_p, n=1)
        try_cnt += 1

    return gen

for data in tqdm.tqdm(he_data[:2]):
    prompt = f"{data['prompt']}"
    first_response = prompt_codegeex(prompt, temperature, top_p=top_p, n=n)
    max_try_limit = 25
    gen_code = []
    for first_gen in first_response['result']['output']['code']:
        gen_code.append(generate_block(prompt, first_gen, temperature, top_p=top_p))
    
    data['raw_response'] = gen_code
    # out_file.write(json.dumps(data) + "\n")
    print(gen_code)
    time.sleep(5)



  0%|                                                     | 0/2 [00:00<?, ?it/s]

['    if len(numbers) < 3:\n        return False\n    min_distances = [abs(a - b) for a, b in zip(numbers[1:], numbers[:-1])]\n    return all(\n        distance < threshold for distance in min_distances\n    )\n', '    if len(numbers) < 2:\n        return False\n    # TODO: improve to be O(n^2)\n    for i in range(len(numbers) - 1):\n        j = i + 1\n        while j < len(numbers):\n            if abs(numbers[i] - numbers[j]) < threshold:\n                return True\n            j += 1\n    return False\n', '    if len(numbers) < 3:\n        return False\n    numbers.sort()\n    return numbers[1] - numbers[0] <= threshold and numbers[-1] - numbers[-2] <= threshold\n']


 50%|██████████████████████▌                      | 1/2 [00:23<00:23, 23.12s/it]

["    separated_groups = []\n    paren_indexes = []\n    for char in paren_string:\n        if char == '(':\n            paren_indexes.append(len(separated_groups))\n        elif char == ')':\n            if len(paren_indexes) > 0:\n                paren_indexes.pop()\n            else:\n                return []\n        else:\n            if len(paren_indexes) == 0:\n                separated_groups.append(char)\n            elif char!='':\n                separated_groups[paren_indexes[-1]] += char\n    return separated_groups\n", "    separated_groups = []\n    in_curly_brace_group = False\n    in_paren_group = False\n    for ch in paren_string:\n        if in_curly_brace_group:\n            if ch == '}':\n                in_curly_brace_group = False\n            else:\n                separated_groups.append(ch)\n        elif ch == '{':\n            in_curly_brace_group = True\n        elif ch == '(' and not in_paren_group:\n            separated_groups.append(ch)\n            in_

100%|█████████████████████████████████████████████| 2/2 [01:27<00:00, 43.93s/it]


In [9]:
print(gen_code[2])

    output: List[str] = []
    for i, char in enumerate(paren_string):
        if char == '(':
            left_group: List[str] = []
            right_group: List[str] = []
            while paren_string[i]!= ')':
                left_group.append(paren_string[i])
                i+=1
            output.append(''.join(left_group))
            while paren_string[i]!= ')':
                i+=1
            i+=1
            while paren_string[i]!= '(':
                right_group.append(paren_string[i])
                i+=1
            output.append(''.join(right_group))
        elif char =='':
            output.append(char)
        else:
            pass
    return output

