In [28]:
import os
import google.generativeai as genai
from google.generativeai.types import HarmCategory, HarmBlockThreshold

def get_prompt(sentence):
    prompt = """You are an expert sentence completion bot. I will provide you with incomplete sentences. Your job is to complete these sentences in 1 or 2 lines. Also, the output should just be the remaining part of the sentence and not the entire sentence. I am providing you with a few examples of input and expected output. Example 1: 
    input: The rain was
    output: going to flood the entire city
    Example 2: 
    input: The party was about to end after
    output: the birthday cake was distributed
    Example 3:
    input: Jack fought with him because
    output: he was insecure and jealous
    Now it is your turn, complete this sentence and provide me only the remaining part of the sentence: """ 
    
    prompt += sentence
    
    return prompt

genai.configure(api_key="")

def main(prompt):
    # Create the model
    generation_config = {
    "temperature": 0.2,
    "top_p": 0.9,
    "top_k": 0,
    "max_output_tokens": 8192,
    "response_mime_type": "text/plain",
    }

    model = genai.GenerativeModel(
        model_name="gemini-1.5-flash",
        generation_config=generation_config,
        safety_settings={
            HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
            HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
            HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
            HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
        }
    )

    chat_session = model.start_chat(
        history=[
        ]
    )

    response = chat_session.send_message(prompt)

    return response.text

In [30]:
import pandas as pd
import time
import csv  # To handle proper quoting of CSV

# time.sleep(60)  # Pause for 60 seconds

val = 1989

df = pd.read_csv(f"./gemini_1.5_checkpoint_{val}.csv")

# Define batch size
batch_size = 14

# Initialize list to store responses for 'Xj'
second_part = []
count = 1
checkpoint_interval = batch_size  # Saving after every 14 requests (batch size)

# Loop over the DataFrame in batches of 14
for i in range(val, len(df), batch_size):
    batch = df.iloc[i:i + batch_size]
    
    for index, row in batch.iterrows():
        prompt = get_prompt(row['Xi'])  # Get the prompt from 'Xi'
        # Replace newlines with spaces and remove double quotes
        response = main(prompt).replace('\n', ' ').replace('"', '')  
        second_part.append(response)  # Store the response for 'Xj'

        print(f"Incomplete Sentence {count}: {row['Xi']}")
        print(f"Complete Sentence {count}: {response}")
        count += 1

    # Save DataFrame before every sleep to avoid losing data
    df.loc[i:i + batch_size - 1, 'Xj'] = second_part[-batch_size:]  # Store the last batch in 'Xj'
    df.loc[i:i + batch_size - 1, 'model'] = 'gemini-1.5-flash'  # Add 'model' column
    
    # Save checkpoint after each batch
    df.to_csv(f"gemini_1.5_checkpoint_{i + batch_size}.csv", index=False, quoting=csv.QUOTE_ALL)
    print(f"Checkpoint saved after processing batch {i // batch_size + 1}")

    # Wait for 60 seconds to avoid rate limits after each batch
    if i + batch_size < len(df):
        print("Waiting for 60 seconds to avoid rate limit...")
        print(f"Length of incompleted_sentences: {len(second_part)}")
        time.sleep(60)  # Pause for 60 seconds

# Save the final DataFrame to CSV
df.to_csv("gemini-1.5-flash_final.csv", index=False, quoting=csv.QUOTE_ALL)
print("Final dataset saved.")


Incomplete Sentence 1: Two guys with red capes ride
Complete Sentence 1: on motorcycles through the city streets.  
Incomplete Sentence 2: Looking down from the top of a cliff, a man is climbing the cliff
Complete Sentence 2: ...with ropes and climbing gear, determined to reach the summit.  
Incomplete Sentence 3: A woman wearing a gray shirt and blue
Complete Sentence 3: jeans walked into the cafe.  
Incomplete Sentence 4: Fathers reach out to their children encouraging them
Complete Sentence 4: to pursue their dreams and be the best versions of themselves.  
Incomplete Sentence 5: A man is taking a swing with his
Complete Sentence 5: golf club, aiming for the ball on the tee.  
Incomplete Sentence 6: Two men are walking up a ramp while a third walks
Complete Sentence 6: down the stairs.  
Incomplete Sentence 7: A women's roller derby
Complete Sentence 7: is a fast-paced, high-energy sport full of athleticism and strategy.  
Incomplete Sentence 8: Two men are at the beach
Complete Sen

In [20]:
batch_size = 100
for i in range(15, len(df), batch_size):
    batch_df = df.iloc[i:i+batch_size]
    print(batch_df.values.tolist())
    break


[['A black dog running'], ['A man looking through a microscope while'], ['A married woman in'], ['A little girl putting a red'], ['Four young men, wearing hats, in their bare'], ['A group of people'], ['A blond man with a backpack'], ['Several people on a'], ['A man is talking into a mic while a woman looks at'], ['A man sits on a bed reading'], ['A child is playing near a swing set,'], ['A few small children are smiling, and one is'], ['Two little girls in front of'], ['A father is giving his child'], ['A group of people'], ['A woman laying down in front of the doors to'], ['Children are dressed'], ['A man wearing a blue shirt, blue cap, and jeans is'], ['Black dog running with'], ['A man is teaching a class of young children to balance a ball that is'], ['A man is kneeling down on the'], ['A boy looks up'], ['A person guarding'], ['There is one individual in this image with a hard hat om and'], ['The back of a young boy who is wearing'], ['Several people in white shirts and ties are 

In [18]:
print(list(batch))

['Xi']
