In [2]:
import dotenv
import os

from ibm_watsonx_ai.foundation_models.utils.enums import ModelTypes
from ibm_watsonx_ai.foundation_models import Model
from ibm_watsonx_ai.metanames import GenTextParamsMetaNames as GenParams
from ibm_watsonx_ai.foundation_models.extensions.langchain import WatsonxLLM

In [3]:
dotenv.load_dotenv()

api_key = os.getenv("API_KEY", None)
project_id = os.getenv("PROJECT_ID", None)

creds = {
    "url"    : "https://us-south.ml.cloud.ibm.com",
    "apikey" : api_key
}

params = {
    GenParams.DECODING_METHOD:"greedy",
    GenParams.MAX_NEW_TOKENS:3000,
    GenParams.MIN_NEW_TOKENS:1,
    # GenParams.TEMPERATURE:0.5,
}

In [4]:
model = Model(ModelTypes.LLAMA_2_70B_CHAT,creds,params,project_id)

In [5]:
javacode = '''import java.util.*;

public class Main {

    public static void main(String[] args) {
        Scanner in = new Scanner(System.in);
        int n = in.nextInt();
        int[] a = new int[n];
        for(int a_i=0; a_i < n; a_i++){
            a[a_i] = in.nextInt();
        }
        int result = solve(n, a);
        System.out.println(result);
    }

    // test on asian name
    public void fillform(string lastname, string firstname) {
        System.out.println(lastname);
        System.out.println(firstname);
    }

    // test on n from 10 to 30
    // test on a on length of 10 to 20 and value from 30 to 100
    public static int solve(int n, int[] a) {
        int max = 0;
        for(int i = 0; i < n; i++){
            for(int j = i+1; j < n; j++){
                if(a[i] > a[j]){
                    int temp = a[i];
                    a[i] = a[j];
                    a[j] = temp;
                }
            }
        }
        for(int i = 0; i < n; i++){
            if(a[i] > max){
                max = a[i];
            }
        }
        return max;
    }
}'''

In [6]:
#generate test case and junit

prompt = f'''[INST]you are a java programmer, help generate junit testcase for following java code in backquoted:
<<SYS>>
java:`{javacode}`
<</SYS>>
[/INST]Testcase:'''
answer = ""
for response in model.generate_text([prompt]):
    answer += response
print(answer)



1. Test empty array
	* Input: n = 0, a = []
	* Expected output: 0
2. Test single element array
	* Input: n = 1, a = [10]
	* Expected output: 10
3. Test array with multiple elements in ascending order
	* Input: n = 3, a = [10, 20, 30]
	* Expected output: 30
4. Test array with multiple elements in descending order
	* Input: n = 3, a = [30, 20, 10]
	* Expected output: 30
5. Test array with duplicate elements
	* Input: n = 3, a = [10, 20, 10]
	* Expected output: 20
6. Test array with negative elements
	* Input: n = 3, a = [-10, -20, -30]
	* Expected output: -30
7. Test array with very large elements
	* Input: n = 3, a = [1000, 2000, 3000]
	* Expected output: 3000
8. Test array with very small elements
	* Input: n = 3, a = [0.1, 0.2, 0.3]
	* Expected output: 0.3
9. Test edge case where n is very large
	* Input: n = 1000, a = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
	* Expected output: 10
10. Test edge case where n is very small
	* Input: n = 1, a = [1]
	* Expected output: 1

JUnit Test Case:
```
i

In [7]:
#generate test data, you see it can follow the test requirement mentioned, but fail to generate alot

prompt = f'''[INST]you are a java programmer, help generate 100 test data for following java code in backquoted:
- generate test data for fillformm, ensure the test data be unique. dont duplicate.
- generate test data for solve, ensure the test data be unique. dont duplicate.
- generate in json format.
- dont show notes.
<<SYS>>
java:`{javacode}`
<</SYS>>
[/INST]test data in json:```'''
testdata = ""
for response in model.generate_text([prompt]):
    testdata += response
print(testdata)


[
  {
    "lastname": "lee",
    "firstname": "john"
  },
  {
    "lastname": "kim",
    "firstname": "sarah"
  },
  {
    "lastname": "wang",
    "firstname": "michael"
  },
  {
    "lastname": "zhang",
    "firstname": "emily"
  },
  {
    "lastname": "li",
    "firstname": "kevin"
  },
  {
    "lastname": "chen",
    "firstname": "sophia"
  },
  {
    "lastname": "yang",
    "firstname": "daniel"
  },
  {
    "lastname": "xu",
    "firstname": "jessica"
  },
  {
    "lastname": "zhou",
    "firstname": "samantha"
  },
  {
    "lastname": "liu",
    "firstname": "ethan"
  },
  {
    "lastname": "wu",
    "firstname": "lily"
  },
  {
    "lastname": "zhang",
    "firstname": "jack"
  },
  {
    "lastname": "xu",
    "firstname": "anna"
  },
  {
    "lastname": "yang",
    "firstname": "tina"
  },
  {
    "lastname": "li",
    "firstname": "alex"
  },
  {
    "lastname": "chen",
    "firstname": "sabrina"
  },
  {
    "lastname": "zhang",
    "firstname": "ben"
  },
  {
    "lastname"

In [8]:
import numpy as np
from faker import Faker

def generate_normal_distribution(mean, std_dev, size, max_value):
    numbers = np.random.normal(mean, std_dev, size)
    positive_numbers = np.abs(numbers)
    positive_numbers = np.round(positive_numbers).astype(int)
    positive_numbers = np.clip(positive_numbers, 0, max_value)
    return positive_numbers.tolist()

# Example usage
mean = 500  # Mean of the normal distribution
std_dev = 200  # Standard deviation of the normal distribution
size = 100  # Number of values to generate
max_value = 1000  # Maximum value in the range

numbers_list = generate_normal_distribution(mean, std_dev, size, max_value)
print(numbers_list)

[702, 930, 601, 382, 310, 419, 506, 317, 177, 519, 563, 738, 735, 572, 411, 626, 491, 536, 321, 151, 301, 729, 610, 387, 736, 724, 922, 731, 628, 373, 495, 163, 788, 425, 221, 643, 774, 582, 539, 132, 881, 226, 467, 319, 515, 421, 463, 696, 819, 404, 131, 546, 551, 601, 320, 602, 565, 470, 307, 468, 332, 179, 556, 627, 584, 679, 526, 242, 126, 89, 181, 624, 227, 811, 600, 230, 250, 772, 541, 439, 425, 481, 873, 627, 536, 577, 553, 418, 406, 143, 710, 634, 626, 463, 634, 516, 887, 215, 737, 131]


In [15]:
from faker import Faker
import random

# Create Faker objects for locales that represent some of Singapore's ethnic groups
locales = ['zh_CN', 'hi_IN', 'ta_IN', 'en_GB']  # Chinese, Hindi, Tamil, English
fakers = [Faker(locale) for locale in locales]

# Function to randomly select a locale
# Adjust the weights to reflect the population mix as desired
def weighted_random_faker(fakers):
    return random.choices(fakers, weights=[74, 9, 9, 8], k=1)[0]

# Generate a list of unique names
unique_names = []
desired_unique_names = 100  # Replace with the number of unique names you want

while len(unique_names) < desired_unique_names:
    # Randomly choose one of the Faker objects representing different ethnicities
    faker = weighted_random_faker(fakers)
    name = faker.name()
    
    # Add the generated name to the unique_names list if it's not already in the list
    if name not in unique_names:
        unique_names.append(name)

# Print the list of unique names
for name in unique_names:
    print(name)

陶冬梅
வரதராஐன்
周慧
आदित्य पुष्कर
曹坤
सुलभा नाम
马建国
蒲成
谢建
毛秀云
赵敏
胡秀华
王雷
யாழ்வாணன்
朱秀芳
Naomi Cooper
李阳
韩秀英
姚军
朱颖
张秀珍
潘晶
莫鑫
சந்திர
唐红
高颖
आहूजा, शक्ति
方建
张凤兰
இசையொளி
எழினி ஸ்ரீசிவநாராயணன்
ஸ்கந்தா
吴晶
彭想
தணிகைச்செல்வன் மணிமாலா
刘桂芳
马晶
உதியஞ்சேரல்
许林
Cheryl Marshall-Warner
林玉华
Megan Andrews
廖婷婷
莫建国
विजया मदन
आशा दूबे
尚桂香
ரஞ்சனா
Bradley Harrison
申成
方红霞
李斌
程丽娟
申淑珍
江雪梅
தணிகைமுருகன் பவானி
缪瑜
Miss Marilyn Mason
马丽丽
王军
Dr Owen Armstrong
प्रदीप आहूजा
陈红
童军
Sophie Skinner
黄淑珍
杨鑫
张成
Callum Ward
吴杨
Natasha Quinn
எழில்
高杨
कम्बोज छाबरा
向凤兰
方建平
胡丹丹
徐博
李秀珍
மகிழ்ச்சிக்கு
张帆
徐超
ஏழிசைதேவி பவித்ரா
毛金凤
கதிரொளி
杨莹
姜桂珍
高鑫
洪英
梅利
रेयांश ड़ाल
जगन्नाथ बालकृष्णन
吴利
杨玲
焦秀芳
洪波
王林
John Wood
郑玲
张晶


In [18]:
#generate test data, try to generate more variation

for i in range(10):
    subarrays = numbers_list[i*10:i*10+9]
    subnames = unique_names[i*10:i*10+9]

    prompt = f'''[INST]you are a java programmer, help generate 10 test data to for following java code in backquoted:
    - generate test data for fillform method, make first name and last name be consistent in culture. make variation base on name seed, ensure be unique, dont duplicate.
    - generate test data for solve, ensure the test data be unique. dont duplicate.
    - generate test data in json format.
    - the test data should be base on seeding number, ensure the test data be unique. dont duplicate.
    - dont stop unless you get 10 test data.
    - show test data only.
    <<SYS>>
    java:`{javacode}`
    name seed: {subnames}
    number seed: `{subarrays}`
    <</SYS>>
    [/INST]test data in json:```'''
    testdata = ""

    for response in model.generate_text([prompt]):
        testdata += response
    print(testdata)


[
  {
    "lastname": "陶冬梅",
    "firstname": "陶冬梅",
    "n": 10,
    "a": [702, 930, 601, 382, 310, 419, 506, 317, 177]
  },
  {
    "lastname": "வரதராஐன்",
    "firstname": "வரதராஐன்",
    "n": 15,
    "a": [310, 419, 506, 317, 177, 702, 930, 601, 382]
  },
  {
    "lastname": "周慧",
    "firstname": "周慧",
    "n": 20,
    "a": [506, 317, 177, 702, 930, 601, 382, 310, 419]
  },
  {
    "lastname": "आदित्य पुष्कर",
    "firstname": "आदित्य पुष्कर",
    "n": 25,
    "a": [177, 317, 506, 382, 601, 930, 702, 419, 310]
  },
  {
    "lastname": "曹坤",
    "firstname": "曹坤",
    "n": 30,
    "a": [310, 419, 506, 382, 601, 930, 702, 419, 317, 177]
  },
  {
    "lastname": "सुलभा नाम",
    "firstname": "सुलभा नाम",
    "n": 20,
    "a": [702, 930, 601, 382, 310, 419, 506, 317, 177]
  },
  {
    "lastname": "马建国",
    "firstname": "马建国",
    "n": 25,
    "a": [506, 317, 177, 702, 930, 601, 382, 310, 419]
  },
  {
    "lastname": "蒲成",
    "firstname": "蒲成",
    "n": 30,
    "a": [310, 419, 506,