Till API Exhausted

In [None]:
import json
import csv
import os
import time
import google.generativeai as genai

# Set the GEMINI API key
os.environ["GEMINI_API_KEY"] = ""  # Replace with your actual API key
genai.configure(api_key=os.environ["GEMINI_API_KEY"])

# Function to call the Gemini API for code generation
def call_gemini_api(prompt):
    try:
        model = genai.GenerativeModel('gemini-pro')
        response = model.generate_content(prompt)
        return response.text
    except Exception as e:
        print(f"Error calling Gemini API: {e}")
        return ""  # Return an empty string or handle the error appropriately

# Function to generate code using the Gemini API
def generate_code(natural_language_desc):
    prompt = f"Generate Java code for the following task:\nDescription: {natural_language_desc}"
    return call_gemini_api(prompt)

# Function to automatically locate the dataset
def locate_dataset(dataset_name="concode/test.json"):
    current_dir = os.getcwd()
    dataset_path = os.path.join(current_dir, dataset_name)
    
    if os.path.exists(dataset_path):
        return dataset_path
    else:
        raise FileNotFoundError(f"Dataset '{dataset_name}' not found in the current directory: {current_dir}")

# Function to read the Concode dataset (handling multiple JSON objects)
def load_concode_dataset(dataset_path):
    data = []
    with open(dataset_path, "r") as file:
        for line in file:
            try:
                data.append(json.loads(line.strip()))  # Load each line as JSON
            except json.JSONDecodeError as e:
                print(f"Error decoding JSON on line: {line}")
                continue
    return data

# Function to clean the natural language description by removing separators (if needed)
def clean_nl(nl):
    return nl.replace("concode_field_sep", "|").replace("concode_elem_sep", "->")

# Function to save results to CSV
def save_to_csv(results, output_csv="GeminiResponse2025.csv"):
    keys = results[0].keys()
    with open(output_csv, "w", newline="") as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=keys)
        writer.writeheader()
        writer.writerows(results)

# # Function to save results to JSON
# def save_to_json(results, output_json="Gemini_output.json"):
#     with open(output_json, "w") as jsonfile:
#         json.dump(results, jsonfile, indent=4)

# Function to append a single result to CSV
def append_to_csv(result, output_csv="GeminiResponse2025.csv"):
    file_exists = os.path.exists(output_csv)
    with open(output_csv, "a", newline="") as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=result.keys())
        if not file_exists:
            writer.writeheader()  # Write header only if the file doesn't exist
        writer.writerow(result)

# # Function to append a single result to JSON
# def append_to_json(result, output_json="Gemini_output.json"):
#     if os.path.exists(output_json):
#         with open(output_json, "r") as jsonfile:
#             data = json.load(jsonfile)
#     else:
#         data = []
#     data.append(result)
#     with open(output_json, "w") as jsonfile:
#         json.dump(data, jsonfile, indent=4)

# Main code to load dataset, generate code and save output
if __name__ == "__main__":
    try:
        # Automatically locate the dataset in the current directory
        dataset_path = locate_dataset()

        # Load dataset
        dataset = load_concode_dataset(dataset_path)

        # Number of prompts to process before pausing
        batch_size = 50  # Adjust this based on API rate limits
        pause_duration = 60  # Pause for 60 seconds after each batch

        # Iterate through the dataset and generate code for each example
        for i, example in enumerate(dataset):
            natural_language_desc = clean_nl(example["nl"])  # Clean natural language description
            java_code = generate_code(natural_language_desc)
            
            # Store the result
            result = {
                "Example": i + 1,
                "Generated Java Code": java_code
            }
            
            # Append the result to CSV and JSON immediately
            append_to_csv(result)
            # append_to_json(result)
            
            print(f"Example {i + 1}:")
            #print("-" * 50)

            # Pause after each batch to avoid hitting API limits
            if (i + 1) % batch_size == 0:
                print(f"Processed {i + 1} prompts. Pausing for {pause_duration} seconds...")
                time.sleep(pause_duration)

        print("All prompts processed. Results saved to CSV and JSON files.")

    except FileNotFoundError as e:
        print(e)
    except Exception as e:
        print(f"An error occurred: {e}")

New

In [None]:
import json
import csv
import os
import time
import google.generativeai as genai
import re

# Set the GEMINI API key
os.environ["GOOGLE_API_KEY"] = "AIzaSyDiITTsgqMxmxPMUwmwNYlaJjne9jjVs30"  
genai.configure(api_key=os.environ["GOOGLE_API_KEY"])  # Configure the API key

# Function to call the Gemini API for code generation
def call_gemini_api(prompt):
    try:
        # Initialize the model
        model = genai.GenerativeModel("gemini-2.0-flash")  # Use the correct model name
        # Generate content
        response = model.generate_content(prompt)
        # Return the generated text
        return response.text
    except Exception as e:
        print(f"Error calling Gemini API: {e}")
        return ""  # Return an empty string or handle the error appropriately




# Function to extract code block from the response (Fallback)
def extract_code_block(response):
    # Use regex to find content between triple backticks
    code_block = re.search(r'```java(.*?)```', response, re.DOTALL)
    if code_block:
        return code_block.group(1).strip()  # Return the code block without the backticks
    return response.strip()  # Return the original response if no code block is found




# Function to generate code using the Gemini API
def generate_code(natural_language_desc):
    prompt = f"""
    Generate Java code for the following task. Return ONLY the code, without any explanations, comments, or additional text.
    Description: {natural_language_desc}
    """
    response = call_gemini_api(prompt)
    return extract_code_block(response)  # Extract and return only the code block (fallback)



# Function to automatically locate the dataset
def locate_dataset(dataset_name="concode/test.json"):
    current_dir = os.getcwd()
    dataset_path = os.path.join(current_dir, dataset_name)

    if os.path.exists(dataset_path):
        return dataset_path
    else:
        raise FileNotFoundError(f"Dataset '{dataset_name}' not found in the current directory: {current_dir}")
    

def load_concode_dataset(dataset_path):
    data = []
    with open(dataset_path, "r", encoding='utf-8') as file:  # Add encoding='utf-8'
        for line in file:
            try:
                data.append(json.loads(line.strip()))
            except json.JSONDecodeError as e:
                print(f"Error decoding JSON on line: {line}")
                continue
            except Exception as e:
                print(f"Unexpected error parsing JSON: {e}")
                continue
    return data   



# Function to clean the natural language description by removing separators (if needed)
def clean_nl(nl):
    return nl.replace("concode_field_sep", "|").replace("concode_elem_sep", "->")

# Function to append a single result to CSV
def append_to_csv(result, output_csv="GeminiResponse2025.csv"):
    file_exists = os.path.exists(output_csv)
    with open(output_csv, "a", newline="", encoding='utf-8') as csvfile:  # Add encoding='utf-8'
        writer = csv.DictWriter(csvfile, fieldnames=result.keys())
        if not file_exists:
            writer.writeheader()
        writer.writerow(result)



# Main code to load dataset, generate code and save output
if __name__ == "__main__":
    try:
        dataset_path = locate_dataset()
        dataset = load_concode_dataset(dataset_path)
        batch_size = 50
        pause_duration = 60

        for i, example in enumerate(dataset):
            try:
                natural_language_desc = clean_nl(example["nl"])  # Wrap this in try-except
                java_code = generate_code(natural_language_desc)

                result = {
                    "Example": i + 1,
                    "Generated Java Code": java_code
                }

                append_to_csv(result)

                print(f"Example {i + 1}:")

                if (i + 1) % batch_size == 0:
                    print(f"Processed {i + 1} prompts. Pausing for {pause_duration} seconds...")
                    time.sleep(pause_duration)
            except KeyError as e:
                print(f"Error: Missing 'nl' field in example {i + 1}. Skipping this example.")
                continue
            except Exception as e:
                print(f"Unexpected error processing Example {i + 1}: {e}")
                continue

        print("All prompts processed. Results saved to CSV files.")

    except FileNotFoundError as e:
        print(e)
    except Exception as e:
        print(f"An error occurred: {e}")

Cleaning csv

In [1]:
import pandas as pd

# Load the CSV file
input_file = "GeminiResponse2025.csv"  # Replace with your file name
output_file = "Output.csv"  # Output file name

# Read the CSV file into a DataFrame
df = pd.read_csv(input_file)

# Drop rows where the 'txt' column is empty
df = df.dropna(subset=['Generated Java Code'])  # Drop rows with empty 'txt' column

# Reset the 'Serial No' column to sequential numbers
df['Example'] = range(1, len(df) + 1)

# Save the cleaned DataFrame to a new CSV file
df.to_csv(output_file, index=False)

print(f"Cleaned file saved as {output_file}")

Cleaned file saved as Output.csv
