Ref:  
- [LLMs: Get predictions from a language model](https://python.langchain.com/en/latest/getting_started/getting_started.html#llms-get-predictions-from-a-language-model)
- [A simple guide to setting the GPT-3 temperature](https://algowriting.medium.com/gpt-3-temperature-setting-101-41200ff0d0be)
- [HumanEval-X: A new benchmark for Multilingual Program Synthesis](https://github.com/THUDM/CodeGeeX/blob/main/codegeex/benchmark/README.md)

## Import HumanEval-X (C Plus Plus)

In [1]:
import json

In [2]:
HEP_FILE = 'datasets/HumanEval-X/humaneval_cpp.jsonl'  # Human Evalu Python

In [3]:
HEP = json.loads('[{}]'.format(','.join(list(line.rstrip() for line in open(HEP_FILE, 'r', encoding='utf-8')))))

In [4]:
LANGUAGE = HEP[0]['task_id'].split('/')[0]

In [5]:
LANGUAGE

'CPP'

In [6]:
PROMPT = HEP[0]['prompt']

In [7]:
PROMPT

'/*\nCheck if in given vector of numbers, are any two numbers closer to each other than\ngiven threshold.\n>>> has_close_elements({1.0, 2.0, 3.0}, 0.5)\nfalse\n>>> has_close_elements({1.0, 2.8, 3.0, 4.0, 5.0, 2.0}, 0.3)\ntrue\n*/\n#include<stdio.h>\n#include<vector>\n#include<math.h>\nusing namespace std;\nbool has_close_elements(vector<float> numbers, float threshold){\n'

In [8]:
print(PROMPT)

/*
Check if in given vector of numbers, are any two numbers closer to each other than
given threshold.
>>> has_close_elements({1.0, 2.0, 3.0}, 0.5)
false
>>> has_close_elements({1.0, 2.8, 3.0, 4.0, 5.0, 2.0}, 0.3)
true
*/
#include<stdio.h>
#include<vector>
#include<math.h>
using namespace std;
bool has_close_elements(vector<float> numbers, float threshold){



## Ask CodeGeeX to generate code

In [9]:
import sys
from pathlib import Path

sys.path.append(str(Path('.').resolve().parents[0]))

from codegeex_api import CodeGeeX

#### <font size="7" color="orange">⚠</font> Do <span style="color:red">NOT</span> submit the config file to GitHub because of security.

In [10]:
codegeex_api_config = json.load(open('codegeex_api_config.json', 'r', encoding='utf-8'))

In [11]:
codegeex_api_config

{'api_base': 'http://localhost:8080/v1',
 'api_version': '2.1.0.0',
 'api_type': 'codegeex',
 'api_key': 'D4B94CC818A3D8A725CCC8FE68B97'}

**For "1.x.x.x", return string; for "2.x.x.x", return json.**

In [12]:
m = CodeGeeX(codegeex_api_config)  # model

In [13]:
m_return = m(PROMPT)

In [14]:
print(m_return)

{'stdout': ['    int size = numbers.size();\n    if(size == 0)\n        return false;\n    for(int i=0; i< size - 1; i++){\n        for(int j=i+1; j<size; j++){\n            if(abs(numbers[i] - numbers[j]) < threshold){\n                return true;\n            }\n        }\n    }\n    return false;\n}\n\nint main(){\n    vector<float> numbers;\n    numbers.push_back(1.0);\n    numbers.push_back(2.0);\n    numbers.push_back(3.0);\n    float threshold = 0.3;\n    printf("%d\\n", has_close_elements(numbers, threshold));\n    return 0;\n}\n<|endoftext|>'], 'stderr': '', 'elapsed_time': 8.755622098688036}


In [15]:
print(json.dumps(m_return, indent=4, sort_keys=True))

{
    "elapsed_time": 8.755622098688036,
    "stderr": "",
    "stdout": [
        "    int size = numbers.size();\n    if(size == 0)\n        return false;\n    for(int i=0; i< size - 1; i++){\n        for(int j=i+1; j<size; j++){\n            if(abs(numbers[i] - numbers[j]) < threshold){\n                return true;\n            }\n        }\n    }\n    return false;\n}\n\nint main(){\n    vector<float> numbers;\n    numbers.push_back(1.0);\n    numbers.push_back(2.0);\n    numbers.push_back(3.0);\n    float threshold = 0.3;\n    printf(\"%d\\n\", has_close_elements(numbers, threshold));\n    return 0;\n}\n<|endoftext|>"
    ]
}


**temperature & top_p**  

Ref:  
- Zheng, Q., Xia, X., Zou, X., Dong, Y., Wang, S., Xue, Y., Wang, Z., Shen, L., Wang, A., Li, Y. and Su, T., 2023. Codegeex: A pre-trained model for code generation with multilingual evaluations on humaneval-x. *arXiv preprint arXiv:2303.17568*.

#### 4.1 Evaluation Settings
Page 11:

*For CodeGeeX in code generation, we use t = 0:2; p = 0:95 for pass@1 and
t = 0:8; p = 0:95 for pass@10 and pass@100 (except for Go and JavaScript, where p = 0:9).*

In [16]:
m_return = m(PROMPT, temperature=0.2, top_p=0.95)

In [17]:
print(json.dumps(m_return, indent=4, sort_keys=True))

{
    "elapsed_time": 9.931467335671186,
    "stderr": "",
    "stdout": [
        "    for(int i=0; i<numbers.size(); i++){\n        for(int j=i+1; j<numbers.size(); j++){\n            if(abs(numbers[i]-numbers[j])<threshold)\n                return true;\n        }\n    }\n    return false;\n}\n\nint main(){\n    vector<float> numbers;\n    numbers.push_back(1.0);\n    numbers.push_back(2.0);\n    numbers.push_back(3.0);\n    numbers.push_back(4.0);\n    numbers.push_back(5.0);\n    numbers.push_back(2.0);\n    float threshold = 0.3;\n    printf(\"%s\\n\", has_close_elements(numbers, threshold)? \"true\" : \"false\");\n    return 0;\n}\n<|endoftext|>"
    ]
}


In [18]:
# verbose=True will let more info print out to the sever's stderr.
# However, client will not get these info.

m_return = m(PROMPT, verbose=True)

In [19]:
print(json.dumps(m_return, indent=4, sort_keys=True))

{
    "elapsed_time": 12.661359043326229,
    "stderr": "",
    "stdout": [
        "\tbool is_close = false;\n\tfor(int i = 0; i< numbers.size(); ++i){\n\t\tfor(int j = i+1; j< numbers.size(); ++j){\n\t\t\tif(abs(numbers[i]-numbers[j]) < threshold){\n\t\t\t\tis_close = true;\n\t\t\t\tbreak;\n\t\t\t}\n\t\t}\n\t\tif(is_close) break;\n\t}\n\treturn is_close;\n}\nint main() {\n\tvector<float> vec1 = {1.0,2.0,3.0};\n\tvector<float> vec2 = {1.0,2.8,3.0,4.0,5.0,2.0};\n\tvector<float> vec3 = {-1.0,-2.0,-3.0};\n\tprintf(\"%d\\n\", has_close_elements(vec1,0.3));\n\tprintf(\"%d\\n\", has_close_elements(vec2,0.3));\n\tprintf(\"%d\\n\", has_close_elements(vec3,0.3));\n}\n<|endoftext|>"
    ]
}


In [20]:
print(m_return['stdout'][0])

	bool is_close = false;
	for(int i = 0; i< numbers.size(); ++i){
		for(int j = i+1; j< numbers.size(); ++j){
			if(abs(numbers[i]-numbers[j]) < threshold){
				is_close = true;
				break;
			}
		}
		if(is_close) break;
	}
	return is_close;
}
int main() {
	vector<float> vec1 = {1.0,2.0,3.0};
	vector<float> vec2 = {1.0,2.8,3.0,4.0,5.0,2.0};
	vector<float> vec3 = {-1.0,-2.0,-3.0};
	printf("%d\n", has_close_elements(vec1,0.3));
	printf("%d\n", has_close_elements(vec2,0.3));
	printf("%d\n", has_close_elements(vec3,0.3));
}
<|endoftext|>


**Run 164 and write to jsonl file**

In [21]:
import datetime

 JSON list format. Ref: https://github.com/THUDM/CodeGeeX/blob/main/codegeex/benchmark/README.md#evaluation

In [22]:
file_name = f'results/cpp_t02_p095_{datetime.datetime.now()}'.replace(':', '').replace('-', '').replace('.', '_').replace(' ', '_') + '.jsonl'

In [23]:
file_name

'results/cpp_t02_p095_20230712_081121_397034.jsonl'

In [24]:
import time

In [25]:
st = time.perf_counter()

with open(file_name, 'w') as f:
    for i in HEP:
        print(i['task_id'])
        
        # Generate code
        # use t = 0:2; p = 0:95 for pass@1
        m_return = m(i['prompt'], temperature=0.2, top_p=0.95)
        
        line = {
            'task_id': i['task_id'],
            'prompt': i['prompt'],
            'generation': m_return['stdout'][0].replace('<|endoftext|>', '')
        }

        f.write(json.dumps(line))
        f.write('\n')

et = time.perf_counter()
elapsed_time = et - st
print(f'{len(HEP)} taske(s), elapsed_time: {elapsed_time} S, {elapsed_time/60} minutes, {elapsed_time/60/60} hours.')


CPP/0
CPP/1
CPP/2
CPP/3
CPP/4
CPP/5
CPP/6
CPP/7
CPP/8
CPP/9
CPP/10
CPP/11
CPP/12
CPP/13
CPP/14
CPP/15
CPP/16
CPP/17
CPP/18
CPP/19
CPP/20
CPP/21
CPP/22
CPP/23
CPP/24
CPP/25
CPP/26
CPP/27
CPP/28
CPP/29
CPP/30
CPP/31
CPP/32
CPP/33
CPP/34
CPP/35
CPP/36
CPP/37
CPP/38
CPP/39
CPP/40
CPP/41
CPP/42
CPP/43
CPP/44
CPP/45
CPP/46
CPP/47
CPP/48
CPP/49
CPP/50
CPP/51
CPP/52
CPP/53
CPP/54
CPP/55
CPP/56
CPP/57
CPP/58
CPP/59
CPP/60
CPP/61
CPP/62
CPP/63
CPP/64
CPP/65
CPP/66
CPP/67
CPP/68
CPP/69
CPP/70
CPP/71
CPP/72
CPP/73
CPP/74
CPP/75
CPP/76
CPP/77
CPP/78
CPP/79
CPP/80
CPP/81
CPP/82
CPP/83
CPP/84
CPP/85
CPP/86
CPP/87
CPP/88
CPP/89
CPP/90
CPP/91
CPP/92
CPP/93
CPP/94
CPP/95
CPP/96
CPP/97
CPP/98
CPP/99
CPP/100
CPP/101
CPP/102
CPP/103
CPP/104
CPP/105
CPP/106
CPP/107
CPP/108
CPP/109
CPP/110
CPP/111
CPP/112
CPP/113
CPP/114
CPP/115
CPP/116
CPP/117
CPP/118
CPP/119
CPP/120
CPP/121
CPP/122
CPP/123
CPP/124
CPP/125
CPP/126
CPP/127
CPP/128
CPP/129
CPP/130
CPP/131
CPP/132
CPP/133
CPP/134
CPP/135
CPP/136
CPP/137
CPP/13