Ref:  
- [LLMs: Get predictions from a language model](https://python.langchain.com/en/latest/getting_started/getting_started.html#llms-get-predictions-from-a-language-model)
- [A simple guide to setting the GPT-3 temperature](https://algowriting.medium.com/gpt-3-temperature-setting-101-41200ff0d0be)
- [HumanEval-X: A new benchmark for Multilingual Program Synthesis](https://github.com/THUDM/CodeGeeX/blob/main/codegeex/benchmark/README.md)

## Import HumanEval-X (Java)

In [1]:
import json

In [2]:
HEP_FILE = 'datasets/HumanEval-X/humaneval_java.jsonl'  # Human Evalu Python

In [3]:
HEP = json.loads('[{}]'.format(','.join(list(line.rstrip() for line in open(HEP_FILE, 'r', encoding='utf-8')))))

In [4]:
LANGUAGE = HEP[0]['task_id'].split('/')[0]

In [5]:
LANGUAGE

'Java'

In [6]:
PROMPT = HEP[0]['prompt']

In [7]:
PROMPT

'import java.util.*;\nimport java.lang.*;\n\nclass Solution {\n    /**\n    Check if in given list of numbers, are any two numbers closer to each other than given threshold.\n    >>> hasCloseElements(Arrays.asList(1.0, 2.0, 3.0), 0.5)\n    false\n    >>> hasCloseElements(Arrays.asList(1.0, 2.8, 3.0, 4.0, 5.0, 2.0), 0.3)\n    true\n     */\n    public boolean hasCloseElements(List<Double> numbers, double threshold) {\n'

In [8]:
print(PROMPT)

import java.util.*;
import java.lang.*;

class Solution {
    /**
    Check if in given list of numbers, are any two numbers closer to each other than given threshold.
    >>> hasCloseElements(Arrays.asList(1.0, 2.0, 3.0), 0.5)
    false
    >>> hasCloseElements(Arrays.asList(1.0, 2.8, 3.0, 4.0, 5.0, 2.0), 0.3)
    true
     */
    public boolean hasCloseElements(List<Double> numbers, double threshold) {



## Ask CodeGeeX to generate code

In [9]:
import sys
from pathlib import Path

sys.path.append(str(Path('.').resolve().parents[0]))

from codegeex_api import CodeGeeX

#### <font size="7" color="orange">⚠</font> Do <span style="color:red">NOT</span> submit the config file to GitHub because of security.

In [10]:
codegeex_api_config = json.load(open('codegeex_api_config.json', 'r', encoding='utf-8'))

In [11]:
codegeex_api_config

{'api_base': 'http://localhost:8080/v1',
 'api_version': '2.1.0.0',
 'api_type': 'codegeex',
 'api_key': 'D4B94CC818A3D8A725CCC8FE68B97'}

**For "1.x.x.x", return string; for "2.x.x.x", return json.**

In [12]:
m = CodeGeeX(codegeex_api_config)  # model

In [13]:
m_return = m(PROMPT)

In [14]:
print(m_return)

{'stdout': ['        if (numbers == null || numbers.isEmpty()) return false;\n        if (numbers.size() == 1) return false;\n        // if the list is empty, return false\n        if (numbers.size() == 0) return false;\n        // if the threshold is 0, return false\n        if (threshold == 0) return false;\n        // sort the numbers\n        Collections.sort(numbers);\n        // the smallest number in the list\n        double firstElement = numbers.get(0);\n        // the largest number in the list\n        double lastElement = numbers.get(numbers.size() - 1);\n        // return false if the two smallest numbers in the list are closer than the given threshold\n        if (Math.abs(firstElement - lastElement) < threshold) return false;\n        // if the second smallest number in the list is closer than the given threshold, return true\n        if (Math.abs(firstElement - lastElement) < Math.abs(firstElement - numbers.get(1))) return true;\n        // continue this process until t

In [15]:
print(json.dumps(m_return, indent=4, sort_keys=True))

{
    "elapsed_time": 16.981456866953522,
    "stderr": "",
    "stdout": [
        "        if (numbers == null || numbers.isEmpty()) return false;\n        if (numbers.size() == 1) return false;\n        // if the list is empty, return false\n        if (numbers.size() == 0) return false;\n        // if the threshold is 0, return false\n        if (threshold == 0) return false;\n        // sort the numbers\n        Collections.sort(numbers);\n        // the smallest number in the list\n        double firstElement = numbers.get(0);\n        // the largest number in the list\n        double lastElement = numbers.get(numbers.size() - 1);\n        // return false if the two smallest numbers in the list are closer than the given threshold\n        if (Math.abs(firstElement - lastElement) < threshold) return false;\n        // if the second smallest number in the list is closer than the given threshold, return true\n        if (Math.abs(firstElement - lastElement) < Math.abs(firstElement -

**temperature & top_p**  

Ref:  
- Zheng, Q., Xia, X., Zou, X., Dong, Y., Wang, S., Xue, Y., Wang, Z., Shen, L., Wang, A., Li, Y. and Su, T., 2023. Codegeex: A pre-trained model for code generation with multilingual evaluations on humaneval-x. *arXiv preprint arXiv:2303.17568*.

#### 4.1 Evaluation Settings
Page 11:

*For CodeGeeX in code generation, we use t = 0:2; p = 0:95 for pass@1 and
t = 0:8; p = 0:95 for pass@10 and pass@100 (except for Go and JavaScript, where p = 0:9).*

In [16]:
m_return = m(PROMPT, temperature=0.2, top_p=0.95)

In [17]:
print(json.dumps(m_return, indent=4, sort_keys=True))

{
    "elapsed_time": 4.377203070092946,
    "stderr": "",
    "stdout": [
        "        if (numbers.size() < 2) return false;\n        double min = numbers.get(0);\n        double max = numbers.get(0);\n        for (double number : numbers) {\n            if (number < min) min = number;\n            if (number > max) max = number;\n        }\n        return Math.abs(min - max) < threshold;\n    }\n}<|endoftext|>"
    ]
}


In [19]:
# verbose=True will let more info print out to the sever's stderr.
# However, client will not get these info.

m_return = m(PROMPT, verbose=True)

In [20]:
print(json.dumps(m_return, indent=4, sort_keys=True))

{
    "elapsed_time": 3.5381057471968234,
    "stderr": "",
    "stdout": [
        "        double min = Double.MAX_VALUE;\n        double max = Double.MIN_VALUE;\n        for (double d : numbers) {\n            min = Math.min(min, d);\n            max = Math.max(max, d);\n        }\n        return (min - max) < threshold;\n    }\n}<|endoftext|>"
    ]
}


In [21]:
print(m_return['stdout'][0])

        double min = Double.MAX_VALUE;
        double max = Double.MIN_VALUE;
        for (double d : numbers) {
            min = Math.min(min, d);
            max = Math.max(max, d);
        }
        return (min - max) < threshold;
    }
}<|endoftext|>


**Run 164 and write to jsonl file**

In [22]:
import datetime

 JSON list format. Ref: https://github.com/THUDM/CodeGeeX/blob/main/codegeex/benchmark/README.md#evaluation

In [23]:
file_name = f'results/java_t02_p095_{datetime.datetime.now()}'.replace(':', '').replace('-', '').replace('.', '_').replace(' ', '_') + '.jsonl'

In [24]:
file_name

'results/java_t02_p095_20230712_091936_005485.jsonl'

In [25]:
import time

In [26]:
st = time.perf_counter()

with open(file_name, 'w') as f:
    for i in HEP:
        print(i['task_id'])
        
        # Generate code
        # use t = 0:2; p = 0:95 for pass@1
        m_return = m(i['prompt'], temperature=0.2, top_p=0.95)
        
        line = {
            'task_id': i['task_id'],
            'prompt': i['prompt'],
            'generation': m_return['stdout'][0].replace('<|endoftext|>', '')
        }

        f.write(json.dumps(line))
        f.write('\n')

et = time.perf_counter()
elapsed_time = et - st
print(f'{len(HEP)} taske(s), elapsed_time: {elapsed_time} S, {elapsed_time/60} minutes, {elapsed_time/60/60} hours.')


Java/0
Java/1
Java/2
Java/3
Java/4
Java/5
Java/6
Java/7
Java/8
Java/9
Java/10
Java/11
Java/12
Java/13
Java/14
Java/15
Java/16
Java/17
Java/18
Java/19
Java/20
Java/21
Java/22
Java/23
Java/24
Java/25
Java/26
Java/27
Java/28
Java/29
Java/30
Java/31
Java/32
Java/33
Java/34
Java/35
Java/36
Java/37
Java/38
Java/39
Java/40
Java/41
Java/42
Java/43
Java/44
Java/45
Java/46
Java/47
Java/48
Java/49
Java/50
Java/51
Java/52
Java/53
Java/54
Java/55
Java/56
Java/57
Java/58
Java/59
Java/60
Java/61
Java/62
Java/63
Java/64
Java/65
Java/66
Java/67
Java/68
Java/69
Java/70
Java/71
Java/72
Java/73
Java/74
Java/75
Java/76
Java/77
Java/78
Java/79
Java/80
Java/81
Java/82
Java/83
Java/84
Java/85
Java/86
Java/87
Java/88
Java/89
Java/90
Java/91
Java/92
Java/93
Java/94
Java/95
Java/96
Java/97
Java/98
Java/99
Java/100
Java/101
Java/102
Java/103
Java/104
Java/105
Java/106
Java/107
Java/108
Java/109
Java/110
Java/111
Java/112
Java/113
Java/114
Java/115
Java/116
Java/117
Java/118
Java/119
Java/120
Java/121
Java/122
Jav