# get min max of list from gemini

Install necessary packages

In [1]:
# !pip install -q -U google-generativeai
# !pip install python-dotenv

In [2]:
import os
from dotenv import load_dotenv
import random
import time
import json
import google.generativeai as genai

### load model

In [3]:
load_dotenv()
GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')
genai.configure(api_key=GOOGLE_API_KEY)

In [4]:
gemini = genai.GenerativeModel('gemini-pro')

## For finding min

### load data and create initial prompt

In [5]:
with open('../../data/lists.json','r') as file:
    data = json.load(file)

# create batches, batch sizes are heuristics and I tried out several
# batch sizes, although input token limit is 30,720 tokens, output token limit is 2048
# which restricts the batch size
size_dict = {'lists_size_3_pos':50,
             'lists_size_3_neg':50,
             'lists_size_5_pos':40,
             'lists_size_5_neg':40,
             'lists_size_10_pos':25,
             'lists_size_10_neg':25}

batch_prompts = []

for list in data:
    list_ = data[list]

    # get batch size for the list
    BATCH_SIZE = size_dict[list]

    index = 0
    while index < len(list_):
        batch = []
        while index < len(list_):
            batch_prompt = [f"Find the minimum number in the list {list_[i]}." for i in range(index, min(index+BATCH_SIZE, len(list_)))]
            batch_prompts.append(batch_prompt)
            index += BATCH_SIZE


print("Number of batches:", len(batch_prompts))
print("Number of prompts in each batch:", [len(batch) for batch in batch_prompts])


Number of batches: 86
Number of prompts in each batch: [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 20, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 20, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25]


In [8]:
# give a prompt once and then generate the response,
# and then use this prompt+response directly instead of doing everytime

promt_saying_you_are_a_math_assistant = '''You are a math assistant. I will ask you to find the minimum number in a list. Please answer in the correct format. For example, if I ask 'Find the minimum number in the list [1, 2, 3]', you should answer 'Min([1, 2, 3]) = 1'. Each question is in a separate line. Please return each answer in a separate line. Do you understand ?'''
Initial_messages = [{'role':'user', 'parts': [promt_saying_you_are_a_math_assistant] }]

# get response from the model and then append it to the prompt
Initial_response = gemini.generate_content(Initial_messages)                                   
Initial_messages.append(Initial_response.candidates[0].content)

In [12]:
print(Initial_messages)

[{'role': 'user', 'parts': ["You are a math assistant. I will ask you to find the minimum number in a list. Please answer in the correct format. For example, if I ask 'Find the minimum number in the list [1, 2, 3]', you should answer 'Min([1, 2, 3]) = 1'. Each question is in a separate line. Please return each answer in a separate line. Do you understand ?"]}, parts {
  text: "Yes, I understand. I will find the minimum number in the given list and return the answer in the format \'Min([list]) = [minimum number]\'."
}
role: "model"
]


In [10]:
import pickle

# save the initial messages to a file, in case we need to restart the conversation, initial message should be consistent
# across all batches

with open('initial_messages_min.pkl', 'wb') as file:
    pickle.dump(Initial_messages, file)

### conduct experiment for min

In [13]:
# path to save responses
response_file = 'gemini_response/list_min_responses.txt'

# request per minute limit for free api
rpm = 60

In [14]:
for i, batch_prompt in enumerate(batch_prompts):

    # to start from a specific batch number, give printed_batch_num -1 here
    batch_no_to_start_from = 0 
    if i < batch_no_to_start_from:
        continue

    # print status
    print(f"Sending batch {i+1} of {len(batch_prompts)}")

    num_prompts = len(batch_prompt)

    # create prompt for the batch
    curr_prompt = "".join([f"{prompt} \n " for prompt in batch_prompt])

    
    # create a complete prompt to send to the model
    complete_promt = Initial_messages.copy()
    complete_promt.append({'role':'user', 'parts': [curr_prompt]})

    # get response from the model, candidate_count is 1 because we only need one response,
    # temperature is 0 because we need deterministic responses
    response = gemini.generate_content( complete_promt,
                                        generation_config= genai.GenerationConfig(
                                        candidate_count = 1,
                                        temperature = 0.0,
                                        )
                                )
    
    try:
        # extract text from the response
        model_responses = response.candidates[0].content.parts.__getitem__(0).text.split('\n')
    except IndexError:
        # if model gives no response, fill with 0
        print("Error in response")
        model_responses = ["0"]*num_prompts

    # save responses to file
    with open(response_file, 'a') as file:
        for model_response in model_responses:
            file.write(model_response + '\n')

    
    #print the prompt and response separated by space
    print('# of prompts: ',len(batch_prompt),'# of responses: ', len(model_responses))


    for prompt, response in zip(batch_prompt, model_responses):
        print(prompt, response, sep=" ->")
    
    
    print("Received responses for batch", i+1, "of", len(batch_prompts))
    print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")

    time.sleep(60//rpm) #to avoid rate limit errors

print("Responses saved to", response_file)

Sending batch 1 of 86
# of prompts:  50 # of responses:  50
Find the minimum number in the list [1, 0, 4]. ->Min([1, 0, 4]) = 0
Find the minimum number in the list [6, 0, 0]. ->Min([6, 0, 0]) = 0
Find the minimum number in the list [3, 8, 6]. ->Min([3, 8, 6]) = 3
Find the minimum number in the list [5, 4, 2]. ->Min([5, 4, 2]) = 2
Find the minimum number in the list [5, 9, 4]. ->Min([5, 9, 4]) = 4
Find the minimum number in the list [4, 9, 5]. ->Min([4, 9, 5]) = 4
Find the minimum number in the list [3, 1, 6]. ->Min([3, 1, 6]) = 1
Find the minimum number in the list [4, 1, 9]. ->Min([4, 1, 9]) = 1
Find the minimum number in the list [8, 3, 5]. ->Min([8, 3, 5]) = 3
Find the minimum number in the list [3, 9, 5]. ->Min([3, 9, 5]) = 3
Find the minimum number in the list [3, 8, 8]. ->Min([3, 8, 8]) = 3
Find the minimum number in the list [2, 8, 7]. ->Min([2, 8, 7]) = 2
Find the minimum number in the list [1, 6, 6]. ->Min([1, 6, 6]) = 1
Find the minimum number in the list [8, 4, 5]. ->Min([8,

## For finding max

### load data and create initial prompt

In [15]:
with open('../../data/lists.json','r') as file:
    data = json.load(file)

# create batches, batch sizes are heuristics and I tried out several
# batch sizes, although input token limit is 30,720 tokens, output token limit is 2048
# which restricts the batch size
size_dict = {'lists_size_3_pos':50,
             'lists_size_3_neg':50,
             'lists_size_5_pos':40,
             'lists_size_5_neg':40,
             'lists_size_10_pos':25,
             'lists_size_10_neg':25}

batch_prompts = []

for list in data:
    list_ = data[list]

    # get the batch size for the list
    BATCH_SIZE = size_dict[list]

    index = 0
    while index < len(list_):
        batch = []
        while index < len(list_):
            batch_prompt = [f"Find the maximum number in the list {list_[i]}." for i in range(index, min(index+BATCH_SIZE, len(list_)))]
            batch_prompts.append(batch_prompt)
            index += BATCH_SIZE


print("Number of batches:", len(batch_prompts))
print("Number of prompts in each batch:", [len(batch) for batch in batch_prompts])


Number of batches: 86
Number of prompts in each batch: [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 20, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 20, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25]


In [16]:
# give a prompt once and then generate the response,
# and then use this prompt+response directly instead of doing everytime

promt_saying_you_are_a_math_assistant = '''You are a math assistant. I will ask you to find the maximum number in a list. Please answer in the correct format. For example, if I ask 'Find the maximum number in the list [1, 2, 3]', you should answer 'Max([1, 2, 3]) = 3'. Each question is in a separate line. Please return each answer in a separate line. Do you understand ?'''
Initial_messages = [{'role':'user', 'parts': [promt_saying_you_are_a_math_assistant] }]

# get response from the model and then append it to the prompt
Initial_response = gemini.generate_content(Initial_messages)                                   
Initial_messages.append(Initial_response.candidates[0].content)

In [20]:
print(Initial_messages)

[{'role': 'user', 'parts': ["You are a math assistant. I will ask you to find the maximum number in a list. Please answer in the correct format. For example, if I ask 'Find the maximum number in the list [1, 2, 3]', you should answer 'Max([1, 2, 3]) = 3'. Each question is in a separate line. Please return each answer in a separate line. Do you understand ?"]}, parts {
  text: "Yes, I understand. I will find the maximum number in a list and return the answer in the correct format.\n\nMax([1, 2, 3]) = 3\nMax([4, 5, 6]) = 6\nMax([7, 8, 9]) = 9"
}
role: "model"
]


In [23]:
import pickle

# save the initial messages to a file, in case we need to restart the conversation, initial message should be consistent
# across all batches
with open('initial_messages_max.pkl', 'wb') as file:
    pickle.dump(Initial_messages, file)

### conduct experiment for max

In [23]:
# path to save responses
response_file = 'gemini_response/list_max_responses.txt'

# request per minute limit for free api
rpm = 60

In [24]:
for i, batch_prompt in enumerate(batch_prompts):
    
    # to start from a specific batch number, give (printed_batch_num -1) here
    batch_no_to_start_from = 0 
    if i < batch_no_to_start_from:
        continue

    # print status
    print(f"Sending batch {i+1} of {len(batch_prompts)}")

    num_prompts = len(batch_prompt)

    # create prompt for the batch
    curr_prompt = "".join([f"{prompt} \n " for prompt in batch_prompt])

    
    # create a complete prompt to send to the model
    complete_promt = Initial_messages.copy()
    complete_promt.append({'role':'user', 'parts': [curr_prompt]})

    # get response from the model, candidate_count is 1 because we only need one response,
    # temperature is 0 because we need deterministic responses
    response = gemini.generate_content( complete_promt,
                                        generation_config= genai.GenerationConfig(
                                        candidate_count = 1,
                                        temperature = 0.0,
                                        )
                                )
    
    try:
        # extract text from the response
        model_responses = response.candidates[0].content.parts.__getitem__(0).text.split('\n')
    except IndexError:
        # if model gives no response, fill with 0
        print("Error in response")
        model_responses = ["0"]*num_prompts

    # save responses to file
    with open(response_file, 'a') as file:
        for model_response in model_responses:
            file.write(model_response + '\n')

    
    #print the prompt and response separated by space
    print('# of prompts: ',len(batch_prompt),'# of responses: ', len(model_responses))


    for prompt, response in zip(batch_prompt, model_responses):
        print(prompt, response, sep=" ->")
    
    
    print("Received responses for batch", i+1, "of", len(batch_prompts))
    print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")

    time.sleep(60//rpm) #to avoid rate limit errors

print("Responses saved to", response_file)

Sending batch 1 of 86
# of prompts:  50 # of responses:  50
Find the maximum number in the list [1, 0, 4]. ->Max([1, 0, 4]) = 4
Find the maximum number in the list [6, 0, 0]. ->Max([6, 0, 0]) = 6
Find the maximum number in the list [3, 8, 6]. ->Max([3, 8, 6]) = 8
Find the maximum number in the list [5, 4, 2]. ->Max([5, 4, 2]) = 5
Find the maximum number in the list [5, 9, 4]. ->Max([5, 9, 4]) = 9
Find the maximum number in the list [4, 9, 5]. ->Max([4, 9, 5]) = 9
Find the maximum number in the list [3, 1, 6]. ->Max([3, 1, 6]) = 6
Find the maximum number in the list [4, 1, 9]. ->Max([4, 1, 9]) = 9
Find the maximum number in the list [8, 3, 5]. ->Max([8, 3, 5]) = 8
Find the maximum number in the list [3, 9, 5]. ->Max([3, 9, 5]) = 9
Find the maximum number in the list [3, 8, 8]. ->Max([3, 8, 8]) = 8
Find the maximum number in the list [2, 8, 7]. ->Max([2, 8, 7]) = 8
Find the maximum number in the list [1, 6, 6]. ->Max([1, 6, 6]) = 6
Find the maximum number in the list [8, 4, 5]. ->Max([8,