# get ask factor response from gemini

In [1]:
# !pip install -q -U google-generativeai
# !pip install python-dotenv

In [2]:
import os
from dotenv import load_dotenv
import random
import time
import json
import google.generativeai as genai

### load model

In [3]:
load_dotenv()
GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')
genai.configure(api_key=GOOGLE_API_KEY)

In [4]:
gemini = genai.GenerativeModel('gemini-pro')

### load data and create initial prompt

In [13]:
with open('../../data/primality_test.json','r') as file:
    data = json.load(file)
primes = data['primes']
composites = data['composites']

# mix primes and composites so model is not biased
data = primes + composites
random.seed(42)
random.shuffle(data)

# create batches, batch sizes are heuristics
# batch sizes, although input token limit is 30,720 tokens, output token limit is 2048
# which restricts the batch size

batch_prompts = []
BATCH_SIZE = 25
index = 0

while index < len(data):
    batch = [data[i] for i in range(index, min(index+BATCH_SIZE, len(data)))]
    batch_prompts.append(batch)
    index += BATCH_SIZE
    
print("Number of batches:", len(batch_prompts))
print("Number of prompts in each batch:", [len(batch) for batch in batch_prompts])
        

Number of batches: 400
Number of prompts in each batch: [25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,

In [6]:
# give a prompt once and then generate the response,
# and then use this prompt+response directly instead of doing everytime

promt_saying_you_are_a_math_assistant = '''You are a math assistant. I will ask you some questions. Please answer in the correct format. For example, if I ask 'What are the factors of 15?' , you should answer 'Factors of 15 = [1, 3, 5, 15]. Each question is in a separate line. Please return each answer in a separate line. Do you understand ?'''
Initial_messages = [{'role':'user', 'parts': [promt_saying_you_are_a_math_assistant] }]

# get response from the model and then append it to the prompt
Initial_response = gemini.generate_content(Initial_messages)                                   
Initial_messages.append(Initial_response.candidates[0].content)

In [9]:
print(Initial_messages)

[{'role': 'user', 'parts': ["You are a math assistant. I will ask you some questions. Please answer in the correct format. For example, if I ask 'What are the factors of 15?' , you should answer 'Factors of 15 = [1, 3, 5, 15]. Each question is in a separate line. Please return each answer in a separate line. Do you understand ?"]}, parts {
  text: "Yes, I understand. I will answer your math questions in the correct format, providing the factors in a list like this: Factors of 15 = [1, 3, 5, 15]."
}
role: "model"
]


In [7]:
import pickle

# save the initial messages to a file, in case we need to restart the conversation, initial message should be consistent
# across all batches

with open('initial_messages_factors.pkl', 'wb') as file:
    pickle.dump(Initial_messages, file)

### conduct experiment 

In [10]:
# path to save responses
response_file = 'gemini_response/factors_response.txt'

# request per minute limit for free api
rpm = 60

In [14]:
# to start from a specific batch number, give printed_batch_num -1 here
batch_no_to_start_from = 0

for i, batch_prompt in enumerate(batch_prompts):

    if i < batch_no_to_start_from:
        continue

    # print status
    print(f"Sending batch {i+1} of {len(batch_prompts)}")

    num_prompts = len(batch_prompt)

    # create prompt for the batch
    curr_prompt = "".join([f"What are the factors of {prompt}?\n" for prompt in batch_prompt])

    
    # create a complete prompt to send to the model
    complete_promt = Initial_messages.copy()
    complete_promt.append({'role':'user', 'parts': [curr_prompt]})

    # get response from the model, candidate_count is 1 because we only need one response,
    # temperature is 0 because we need deterministic responses
    response = gemini.generate_content( complete_promt,
                                        generation_config= genai.GenerationConfig(
                                        candidate_count = 1,
                                        temperature = 0.0,
                                        )
                                )
    
    try:
        # extract text from the response
        model_responses = response.candidates[0].content.parts.__getitem__(0).text.split('\n')
    except IndexError:
        time.sleep(5)
        # get response from the model, candidate_count is 1 because we only need one response,
        # temperature is 0 because we need deterministic responses
        response = gemini.generate_content( complete_promt,
                                            generation_config= genai.GenerationConfig(
                                            candidate_count = 1,
                                            temperature = 0.0,
                                            )
                                    )
        
        try:
            # extract text from the response
            model_responses = response.candidates[0].content.parts.__getitem__(0).text.split('\n')
        except IndexError:
            # if model gives no response, fill with 0
            print("Error in response")
            model_responses = ["0"]*num_prompts

    # save responses to file
    with open(response_file, 'a') as file:
        for model_response in model_responses:
            file.write(model_response + '\n')

    
    #print the prompt and response separated by space
    print('# of prompts: ',len(batch_prompt),'# of responses: ', len(model_responses))


    for prompt, response in zip(batch_prompt, model_responses):
        print(prompt, response, sep=" ->")
    
    
    print("Received responses for batch", i+1, "of", len(batch_prompts))
    print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")

    time.sleep(60//rpm) #to avoid rate limit errors

print("Responses saved to", response_file)

Sending batch 1 of 400
# of prompts:  25 # of responses:  25
54631 ->Factors of 54631 = [1, 54631]
22092 ->Factors of 22092 = [1, 2, 3, 4, 6, 12, 1831, 3662, 5493, 11046, 22092]
99194 ->Factors of 99194 = [1, 2, 7, 14, 7085, 14170, 49597, 99194]
85213 ->Factors of 85213 = [1, 85213]
81041 ->Factors of 81041 = [1, 81041]
81371 ->Factors of 81371 = [1, 81371]
2807 ->Factors of 2807 = [1, 2807]
91499 ->Factors of 91499 = [1, 91499]
96079 ->Factors of 96079 = [1, 96079]
17593 ->Factors of 17593 = [1, 17593]
70181 ->Factors of 70181 = [1, 70181]
47521 ->Factors of 47521 = [1, 3, 7, 11, 13, 21, 33, 39, 77, 143, 361, 613, 1221, 2261, 3673, 4369, 15843, 47521]
86717 ->Factors of 86717 = [1, 86717]
89882 ->Factors of 89882 = [1, 2, 3, 6, 14980, 29961, 44941, 89882]
34403 ->Factors of 34403 = [1, 34403]
61530 ->Factors of 61530 = [1, 2, 3, 5, 6, 10, 15, 30, 2051, 4102, 6153, 12306, 20510, 30765, 61530]
40093 ->Factors of 40093 = [1, 40093]
3547 ->Factors of 3547 = [1, 3547]
89293 ->Factors of 89