In [None]:
!pip install pandas openai

Collecting openai
  Downloading openai-1.42.0-py3-none-any.whl.metadata (22 kB)
Collecting httpx<1,>=0.23.0 (from openai)
  Downloading httpx-0.27.0-py3-none-any.whl.metadata (7.2 kB)
Collecting jiter<1,>=0.4.0 (from openai)
  Downloading jiter-0.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB)
Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai)
  Downloading httpcore-1.0.5-py3-none-any.whl.metadata (20 kB)
Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai)
  Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)
Downloading openai-1.42.0-py3-none-any.whl (362 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m362.9/362.9 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading httpx-0.27.0-py3-none-any.whl (75 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading httpcore-1.0.5-py3-none-any.whl (77 kB)
[2K   [90m━━

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [None]:
import openai
import pandas as pd
from tqdm import tqdm
import time

import os
from google.colab import userdata

In [None]:
# Verify and set the API key
openai_api_key = userdata.get('OPENAI_API_KEY')
if openai_api_key is None or openai_api_key.strip() == "":
    raise ValueError("OpenAI API key is not set. Please set it in Colab secrets.")

openai.api_key = openai_api_key
print("OpenAI API key set successfully.")

OpenAI API key set successfully.


In [None]:
# Load existing data
keyword_driven_df = pd.read_csv('/content/drive/MyDrive/WORKS/SOFTWARE-DRIVEN-TEST AUTOMATION/dataset & info/SDT UAR Test Cases/keyDriven_testcases.csv')
data_driven_df = pd.read_csv('/content/drive/MyDrive/WORKS/SOFTWARE-DRIVEN-TEST AUTOMATION/dataset & info/SDT UAR Test Cases/Data_driven_testcases.csv')

In [None]:
def get_examples(df, n=3):
    return df.sample(n).to_dict('records')

In [None]:
def create_prompt(examples, test_type):
    prompt = f"Generate a new {test_type} test case following EXACTLY the structure, style, and column names of these examples:\n\n"
    for i, example in enumerate(examples, 1):
        prompt += f"Example {i}:\n"
        for key, value in example.items():
            prompt += f"{key}: {value}\n"
        prompt += "\n"
    prompt += f"Now, create a new {test_type} test case following this EXACT structure and style. Ensure all fields are filled correctly and the content is relevant to {test_type} testing. Make sure to maintain consistency with the existing data in terms of keywords, prompt types, and documentation style."
    return prompt

In [None]:
def generate_test_case(prompt):
    try:
        response = openai.chat.completions.create(
            model="gpt-4",
            messages=[
                {"role": "system", "content": "You are a test case generator that precisely follows given examples and instructions. Ensure consistency with existing data and maintain the same column structure."},
                {"role": "user", "content": prompt}
            ],
            max_tokens=1000
        )
        return response.choices[0].message.content
    except Exception as e:
        print(f"Error generating test case: {e}")
        return None

In [None]:
def parse_generated_case(content, df):
    lines = content.split('\n')
    parsed_case = {col: '' for col in df.columns}
    current_key = None
    for line in lines:
        if ': ' in line:
            key, value = line.split(': ', 1)
            current_key = key.strip()
            if current_key in parsed_case:
                parsed_case[current_key] = value.strip()
        elif current_key and current_key in parsed_case:
            parsed_case[current_key] += '\n' + line.strip()
    return parsed_case

In [None]:
def generate_new_test_cases(df, test_type, count):
    new_test_cases = []
    for _ in tqdm(range(count)):
        examples = get_examples(df)
        prompt = create_prompt(examples, test_type)
        generated_content = generate_test_case(prompt)
        if generated_content:
            parsed_case = parse_generated_case(generated_content, df)
            new_test_cases.append(parsed_case)
        time.sleep(1)  # To avoid rate limiting
    return new_test_cases

In [None]:
# Generate new test cases
new_keyword_cases = generate_new_test_cases(keyword_driven_df, 'keyword-driven', 350)
new_data_cases = generate_new_test_cases(data_driven_df, 'data-driven', 350)

100%|██████████| 350/350 [1:59:04<00:00, 20.41s/it]
 75%|███████▍  | 261/350 [1:30:42<29:51, 20.13s/it]

In [None]:
# Convert to DataFrames
new_keyword_df = pd.DataFrame(new_keyword_cases)
new_data_df = pd.DataFrame(new_data_cases)

In [None]:
# # Save the combined datasets
new_keyword_df.to_excel('/content/drive/MyDrive/WORKS/SOFTWARE-DRIVEN-TEST AUTOMATION/dataset & info/SDT UAR Test Cases/test_keyword_generated.xlsx', index=False)
new_data_df.to_excel('/content/drive/MyDrive/WORKS/SOFTWARE-DRIVEN-TEST AUTOMATION/dataset & info/SDT UAR Test Cases/test_data_generated.xlsx', index=False)

In [None]:
# # Save the combined datasets
new_keyword_df.to_csv('/content/drive/MyDrive/WORKS/SOFTWARE-DRIVEN-TEST AUTOMATION/dataset & info/SDT UAR Test Cases/test_keyword_generated.csv', index=False)
new_data_df.to_csv('/content/drive/MyDrive/WORKS/SOFTWARE-DRIVEN-TEST AUTOMATION/dataset & info/SDT UAR Test Cases/test_data_generated.csv', index=False)

In [None]:
# # Combine with existing data
combined_keyword_df = pd.concat([keyword_driven_df, new_keyword_df], ignore_index=True)
combined_data_df = pd.concat([data_driven_df, new_data_df], ignore_index=True)

In [None]:
# # Save the combined datasets
combined_keyword_df.to_csv('/content/drive/MyDrive/WORKS/SOFTWARE-DRIVEN-TEST AUTOMATION/dataset & info/SDT UAR Test Cases/combined_keyword_driven.csv', index=False)
combined_data_df.to_csv('/content/drive/MyDrive/WORKS/SOFTWARE-DRIVEN-TEST AUTOMATION/dataset & info/SDT UAR Test Cases/combined_data_driven.csv', index=False)

In [None]:
# # Save the combined datasets
combined_keyword_df.to_excel('/content/drive/MyDrive/WORKS/SOFTWARE-DRIVEN-TEST AUTOMATION/dataset & info/SDT UAR Test Cases/combined_keyword_driven.xlsx', index=False)
combined_data_df.to_excel('/content/drive/MyDrive/WORKS/SOFTWARE-DRIVEN-TEST AUTOMATION/dataset & info/SDT UAR Test Cases/combined_data_driven.xlsx', index=False)