In [2]:
import os
import google.generativeai as genai
from google.generativeai.types import HarmCategory, HarmBlockThreshold

def get_prompt(sentence, word):
    prompt = f"""You are an expert sentence completion bot. I will provide you with incomplete sentences. Your job is to complete these sentences in {word} words. Also, the output should just be the remaining part of the sentence and not the entire sentence. I am providing you with a few examples of input and expected output. Example 1: 
    input: The rain was
    output: going to flood the entire city
    Example 2: 
    input: The party was about to end after
    output: the birthday cake was distributed
    Example 3:
    input: Jack fought with him because
    output: he was insecure and jealous
    Now it is your turn, complete this sentence and provide me only the remaining part of the sentence: """ 
    
    prompt += sentence
    
    return prompt

genai.configure(api_key="$add_api_key")

def main(prompt, temperature, top_p, top_k, max_output_tokens, percentage_ai_content):
    # Create the model
    generation_config = {
    "temperature": temperature,
    "top_p": top_p,
    "top_k": top_k,
    "max_output_tokens": max_output_tokens,
    "response_mime_type": "text/plain",
    }

    model = genai.GenerativeModel(
        model_name="gemini-1.5-flash",
        generation_config=generation_config,
        safety_settings={
            HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
            HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
            HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
            HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
        }
    )

    chat_session = model.start_chat(
        history=[
        ]
    )

    response = chat_session.send_message(prompt)

    return response.text

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import pandas as pd
import time
import csv  # To handle proper quoting of CSV

# Load the original data
df = pd.read_csv(f"../data/test_corpus/test_original.csv")
temperature = 0.2
top_p = 0.9
top_k = 0
max_output_tokens = 8192
word_list= ["5", "10", "15", "20"]
percentage_ai_count = "1-2 lines"
# temperature_list = [0.2, 0.4, 0.6, 0.8, 1]
# List of top_p values to process
# top_p_list = [0.8, 0.6, 0.4, 0.2]

# Counter to track when to sleep
request_count = 0

# Iterate over each top_p value
for word in word_list:
    print(f"Processing word length value: {word}")
    
    # Create a copy of the original dataframe to avoid overwriting
    df_copy = df.copy()
    count = 0
    
    for index, row in df_copy.iterrows():
        prompt = get_prompt(row['Xi'], word)  # Get the prompt from 'Xi'
        
        # Replace newlines with spaces and remove double quotes
        response = main(prompt, temperature, top_p, top_k, max_output_tokens, percentage_ai_count).replace('\n', ' ').replace('"', '')
        
        print(f"Incomplete Sentence {count}: {row['Xi']}")
        print(f"Complete Sentence {count}: {response}")
        count += 1

        # Update the dataframe with the results
        df_copy.at[index, 'Xj'] = response  # Store the actual response, not the list
        df_copy.at[index, 'model'] = 'gemini-1.5-flash'
        df_copy.at[index, 'temperature'] = temperature
        df_copy.at[index, 'top_p'] = top_p
        df_copy.at[index, 'top_k'] = top_k
        df_copy.at[index, 'max_output_tokens'] = max_output_tokens
        df_copy.at[index, 'percentage_ai_count'] = word

        request_count += 1  # Increase the request count

    # Save the dataframe to a CSV specific to this top_p value
    df_copy.to_csv(f"../data/test_corpus/gemini/ai_generated_content/num_of_words{word}.csv", index=False)
    print(f"Saved CSV for word_length: {word}")

    # Sleep for 60 seconds after every two top_p values (10 requests since CSV has 5 rows)
    if request_count >= 10:
        print("Sleeping for 60 seconds...")
        time.sleep(60)  # Pause for 60 seconds
        request_count = 0  # Reset request count after sleeping



Processing word length value: 5
Incomplete Sentence 0: Thequestionofhowhuman decision-makersdeterminetheb
Complete Sentence 0: est course of action is complex.  
Incomplete Sentence 1: informed by na turalistic p riors . The agent m ig
Complete Sentence 1: ht learn from experience.  
Incomplete Sentence 2: Classification problems are central to many applications of machine learning, including
Complete Sentence 2: image recognition and natural language processing  
Incomplete Sentence 3: Lifelong learning policies aim to create a skilled workforce capable of adapting to the demands
Complete Sentence 3: of a rapidly changing world.  
Incomplete Sentence 4: Bayesian methods are widely used in machine
Complete Sentence 4: learning and data analysis.  
Saved CSV for word_length: 5
Processing word length value: 10
Incomplete Sentence 0: Thequestionofhowhuman decision-makersdeterminetheb
Complete Sentence 0: est course of action is a complex one.  
Incomplete Sentence 1: informed by na turali