# get division response from gemini

Install necessary packages

In [1]:
# !pip install -q -U google-generativeai
# !pip install python-dotenv

In [2]:
import os
from dotenv import load_dotenv
import random
import time
import json
import google.generativeai as genai

### load model

In [3]:
load_dotenv()
GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')
genai.configure(api_key=GOOGLE_API_KEY)

In [4]:
gemini = genai.GenerativeModel('gemini-pro')

### load data

In [5]:
with open('../../data/division_dataset.json','r') as file:
    data = json.load(file)

# create batches, batch sizes are heuristics and I tried out several
# batch sizes, although input token limit is 30,720 tokens, output token limit is 2048
# which restricts the batch size

batch_prompts = []

index = 0
while index < len(data):
    if index < 432:
        num_prompts = 50
    elif index < 1032:
        num_prompts = 40
    elif index < 1632:
        num_prompts = 30
    elif index < 2232:
        num_prompts = 20
    elif index < 2832:
        num_prompts = 15
    else:
        num_prompts = 10
    
    batch_prompt = data[index:min(index+num_prompts, len(data))]
    batch_prompts.append(batch_prompt)
    index += num_prompts

print("Number of batches:", len(batch_prompts))
print("Number of prompts in each batch:", [len(batch) for batch in batch_prompts])


Number of batches: 173
Number of prompts in each batch: [50, 50, 50, 50, 50, 50, 50, 50, 50, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 7]


## get response for accuracy of 4 digits after decimal point

### create initial prompt

In [6]:
# give a prompt once and then generate the response,
# and then use this prompt+response directly instead of doing everytime

promt_saying_you_are_a_math_assistant = '''You are a math assistant. I will ask you some division questions. Please answer in the correct format. For example, if I ask 'What is 1 / 2 with accuracy of 4 digits after decimal point?',you should answer '1 / 2 = 0.5000'. Each question is in a separate line. Please return each answer in a separate line with required accuracy. Do you understand ?'''
Initial_messages = [{'role':'user', 'parts': [promt_saying_you_are_a_math_assistant] }]

# get response from the model and then append it to the prompt
Initial_response = gemini.generate_content(Initial_messages)                                   
Initial_messages.append(Initial_response.candidates[0].content)

In [7]:
print(Initial_messages)

[{'role': 'user', 'parts': ["You are a math assistant. I will ask you some division questions. Please answer in the correct format. For example, if I ask 'What is 1 / 2 with accuracy of 4 digits after decimal point?',you should answer '1 / 2 = 0.5000'. Each question is in a separate line. Please return each answer in a separate line with required accuracy. Do you understand ?"]}, parts {
  text: "Yes, I understand. I will provide the answers to your division questions with the specified accuracy of 4 digits after the decimal point. Each answer will be on a separate line.\n\nFor example, if you ask \'What is 1 / 2 with accuracy of 4 digits after decimal point?\', I will answer \'1 / 2 = 0.5000\'.\n\nLet\'s begin."
}
role: "model"
]


In [8]:
import pickle

# save the initial messages to a file, in case we need to restart the conversation
with open('initial_messages_div_acc4.pkl', 'wb') as file:
    pickle.dump(Initial_messages, file)

### conduct experiment

In [9]:
# path to save responses
response_file = 'gemini_response/div_acc4_response.txt'

# request per minute limit for free api
rpm = 60

In [10]:
# to start from a specific batch number, give printed_batch_num -1 here
batch_no_to_start_from = 0
DIGIT_ACC = 4
for i, batch_prompt in enumerate(batch_prompts):
    if i < batch_no_to_start_from:
        continue
    # print status
    print(f"Sending batch {i+1} of {len(batch_prompts)}")

    num_prompts = len(batch_prompt)

    # create prompt for the batch
    curr_prompt = "".join([f"what is {a} / {b} with accuracy of {DIGIT_ACC} digits after decimal point? \n " for (a,b,_,_,_) in batch_prompt])
    
    # create a complete prompt to send to the model
    complete_promt = Initial_messages.copy()
    complete_promt.append({'role':'user', 'parts': [curr_prompt]})

    # get response from the model, candidate_count is 1 because we only need one response,
    # temperature is 0 because we need deterministic responses
    response = gemini.generate_content( complete_promt,
                                        generation_config= genai.GenerationConfig(
                                        candidate_count = 1,
                                        temperature = 0.0,
                                        )
                                )
    
    try:
        # extract text from the response
        model_responses = response.candidates[0].content.parts.__getitem__(0).text.split('\n')
    except IndexError:
        print("Error in response")
        model_responses = ["0"]*num_prompts

    # save responses to file
    with open(response_file, 'a') as file:
        for model_response in model_responses:
            file.write(model_response + '\n')

    
    #print the prompt and response separated by space
    print('# of prompts: ',len(batch_prompt),'# of responses: ', len(model_responses))


    for prompt, response in zip(batch_prompt, model_responses):
        print(prompt, response, sep=" ->")
    
    
    print("Received responses for batch", i+1, "of", len(batch_prompts))
    print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")

    time.sleep(60//rpm) #to avoid rate limit errors

print("Responses saved to", response_file)

Sending batch 1 of 173
# of prompts:  50 # of responses:  50
[1.0, 2.0, 0.5, 0.5, 0.5] ->1.0 / 2.0 = 0.5000
[2.0, 1.0, 2.0, 2.0, 2.0] ->2.0 / 1.0 = 2.0000
[1.0, 3.0, 0.3333, 0.33333333, 0.333333333333] ->1.0 / 3.0 = 0.3333
[3.0, 1.0, 3.0, 3.0, 3.0] ->3.0 / 1.0 = 3.0000
[1.0, 4.0, 0.25, 0.25, 0.25] ->1.0 / 4.0 = 0.2500
[4.0, 1.0, 4.0, 4.0, 4.0] ->4.0 / 1.0 = 4.0000
[1.0, 5.0, 0.2, 0.2, 0.2] ->1.0 / 5.0 = 0.2000
[5.0, 1.0, 5.0, 5.0, 5.0] ->5.0 / 1.0 = 5.0000
[1.0, 6.0, 0.1667, 0.16666667, 0.166666666667] ->1.0 / 6.0 = 0.1667
[6.0, 1.0, 6.0, 6.0, 6.0] ->6.0 / 1.0 = 6.0000
[1.0, 7.0, 0.1429, 0.14285714, 0.142857142857] ->1.0 / 7.0 = 0.1429
[7.0, 1.0, 7.0, 7.0, 7.0] ->7.0 / 1.0 = 7.0000
[1.0, 8.0, 0.125, 0.125, 0.125] ->1.0 / 8.0 = 0.1250
[8.0, 1.0, 8.0, 8.0, 8.0] ->8.0 / 1.0 = 8.0000
[1.0, 9.0, 0.1111, 0.11111111, 0.111111111111] ->1.0 / 9.0 = 0.1111
[9.0, 1.0, 9.0, 9.0, 9.0] ->9.0 / 1.0 = 9.0000
[2.0, 1.0, 2.0, 2.0, 2.0] ->2.0 / 1.0 = 2.0000
[1.0, 2.0, 0.5, 0.5, 0.5] ->1.0 / 2.0 = 0.5000


## get response for accuracy of 8 digits after decimal point

### create initial prompt

In [15]:
# give a prompt once and then generate the response,
# and then use this prompt+response directly instead of doing everytime

promt_saying_you_are_a_math_assistant = '''You are a math assistant. I will ask you some division questions. Please answer in the correct format. For example, if I ask 'What is 1 / 2 with accuracy of 8 digits after decimal point?',you should answer '1 / 2 = 0.50000000'. Each question is in a separate line. Please return each answer in a separate line with required accuracy. Do you understand ?'''
Initial_messages = [{'role':'user', 'parts': [promt_saying_you_are_a_math_assistant] }]

# get response from the model and then append it to the prompt
Initial_response = gemini.generate_content(Initial_messages)                                   
Initial_messages.append(Initial_response.candidates[0].content)

In [16]:
print(Initial_messages)

[{'role': 'user', 'parts': ["You are a math assistant. I will ask you some division questions. Please answer in the correct format. For example, if I ask 'What is 1 / 2 with accuracy of 8 digits after decimal point?',you should answer '1 / 2 = 0.50000000'. Each question is in a separate line. Please return each answer in a separate line with required accuracy. Do you understand ?"]}, parts {
  text: "Yes, I understand. I am a math assistant designed to answer division questions with a specified accuracy. I will provide the answers in the correct format, with the required number of digits after the decimal point."
}
role: "model"
]


In [17]:
import pickle

# save the initial messages to a file, in case we need to restart the conversation
with open('initial_messages_div_acc8.pkl', 'wb') as file:
    pickle.dump(Initial_messages, file)

### conduct experiment

In [18]:
# path to save responses
response_file = 'gemini_response/div_acc8_response.txt'

# request per minute limit for free api
rpm = 60

In [19]:
# to start from a specific batch number, give printed_batch_num -1 here
batch_no_to_start_from = 0
DIGIT_ACC = 8
for i, batch_prompt in enumerate(batch_prompts):
    if i < batch_no_to_start_from:
        continue
    # print status
    print(f"Sending batch {i+1} of {len(batch_prompts)}")

    num_prompts = len(batch_prompt)

    # create prompt for the batch
    curr_prompt = "".join([f"what is {a} / {b} with accuracy of {DIGIT_ACC} digits after decimal point? \n " for (a,b,_,_,_) in batch_prompt])
    
    # create a complete prompt to send to the model
    complete_promt = Initial_messages.copy()
    complete_promt.append({'role':'user', 'parts': [curr_prompt]})

    # get response from the model, candidate_count is 1 because we only need one response,
    # temperature is 0 because we need deterministic responses
    response = gemini.generate_content( complete_promt,
                                        generation_config= genai.GenerationConfig(
                                        candidate_count = 1,
                                        temperature = 0.0,
                                        )
                                )
    
    try:
        # extract text from the response
        model_responses = response.candidates[0].content.parts.__getitem__(0).text.split('\n')
    except IndexError:
        print("Error in response")
        model_responses = ["0"]*num_prompts

    # save responses to file
    with open(response_file, 'a') as file:
        for model_response in model_responses:
            file.write(model_response + '\n')

    
    #print the prompt and response separated by space
    print('# of prompts: ',len(batch_prompt),'# of responses: ', len(model_responses))


    for prompt, response in zip(batch_prompt, model_responses):
        print(prompt, response, sep=" ->")
    
    
    print("Received responses for batch", i+1, "of", len(batch_prompts))
    print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")

    time.sleep(60//rpm) #to avoid rate limit errors

print("Responses saved to", response_file)

Sending batch 1 of 173
# of prompts:  50 # of responses:  50
[1.0, 2.0, 0.5, 0.5, 0.5] ->1.0 / 2.0 = 0.50000000
[2.0, 1.0, 2.0, 2.0, 2.0] ->2.0 / 1.0 = 2.00000000
[1.0, 3.0, 0.3333, 0.33333333, 0.333333333333] ->1.0 / 3.0 = 0.33333333
[3.0, 1.0, 3.0, 3.0, 3.0] ->3.0 / 1.0 = 3.00000000
[1.0, 4.0, 0.25, 0.25, 0.25] ->1.0 / 4.0 = 0.25000000
[4.0, 1.0, 4.0, 4.0, 4.0] ->4.0 / 1.0 = 4.00000000
[1.0, 5.0, 0.2, 0.2, 0.2] ->1.0 / 5.0 = 0.20000000
[5.0, 1.0, 5.0, 5.0, 5.0] ->5.0 / 1.0 = 5.00000000
[1.0, 6.0, 0.1667, 0.16666667, 0.166666666667] ->1.0 / 6.0 = 0.16666667
[6.0, 1.0, 6.0, 6.0, 6.0] ->6.0 / 1.0 = 6.00000000
[1.0, 7.0, 0.1429, 0.14285714, 0.142857142857] ->1.0 / 7.0 = 0.14285714
[7.0, 1.0, 7.0, 7.0, 7.0] ->7.0 / 1.0 = 7.00000000
[1.0, 8.0, 0.125, 0.125, 0.125] ->1.0 / 8.0 = 0.12500000
[8.0, 1.0, 8.0, 8.0, 8.0] ->8.0 / 1.0 = 8.00000000
[1.0, 9.0, 0.1111, 0.11111111, 0.111111111111] ->1.0 / 9.0 = 0.11111111
[9.0, 1.0, 9.0, 9.0, 9.0] ->9.0 / 1.0 = 9.00000000
[2.0, 1.0, 2.0, 2.0, 2.0] ->2.

## get response for accuracy of 12 digits after decimal point

### create initial prompt

In [22]:
# give a prompt once and then generate the response,
# and then use this prompt+response directly instead of doing everytime

promt_saying_you_are_a_math_assistant = '''You are a math assistant. I will ask you some division questions. Please answer in the correct format. For example, if I ask 'What is 1 / 2 with accuracy of 12 digits after decimal point?',you should answer '1 / 2 = 0.500000000000'. Each question is in a separate line. Please return each answer in a separate line with required accuracy. Do you understand ?'''
Initial_messages = [{'role':'user', 'parts': [promt_saying_you_are_a_math_assistant] }]

# get response from the model and then append it to the prompt
Initial_response = gemini.generate_content(Initial_messages)                                   
Initial_messages.append(Initial_response.candidates[0].content)

In [23]:
print(Initial_messages)

[{'role': 'user', 'parts': ["You are a math assistant. I will ask you some division questions. Please answer in the correct format. For example, if I ask 'What is 1 / 2 with accuracy of 12 digits after decimal point?',you should answer '1 / 2 = 0.500000000000'. Each question is in a separate line. Please return each answer in a separate line with required accuracy. Do you understand ?"]}, parts {
  text: "Yes, I understand. I can answer division questions with the specified accuracy. For example, if you ask \'What is 1 / 2 with accuracy of 12 digits after decimal point?\', I will answer \'1 / 2 = 0.500000000000\'. Each question will have its own answer in a separate line with the required accuracy."
}
role: "model"
]


In [24]:
import pickle

# save the initial messages to a file, in case we need to restart the conversation
with open('initial_messages_div_acc12.pkl', 'wb') as file:
    pickle.dump(Initial_messages, file)

### conduct experiment

In [25]:
# path to save responses
response_file = 'gemini_response/div_acc12_response.txt'

# request per minute limit for free api
rpm = 60

In [26]:
# to start from a specific batch number, give printed_batch_num -1 here
batch_no_to_start_from = 0
DIGIT_ACC = 12
for i, batch_prompt in enumerate(batch_prompts):
    if i < batch_no_to_start_from:
        continue
    # print status
    print(f"Sending batch {i+1} of {len(batch_prompts)}")

    num_prompts = len(batch_prompt)

    # create prompt for the batch
    curr_prompt = "".join([f"what is {a} / {b} with accuracy of {DIGIT_ACC} digits after decimal point? \n " for (a,b,_,_,_) in batch_prompt])
    
    # create a complete prompt to send to the model
    complete_promt = Initial_messages.copy()
    complete_promt.append({'role':'user', 'parts': [curr_prompt]})

    # get response from the model, candidate_count is 1 because we only need one response,
    # temperature is 0 because we need deterministic responses
    response = gemini.generate_content( complete_promt,
                                        generation_config= genai.GenerationConfig(
                                        candidate_count = 1,
                                        temperature = 0.0,
                                        )
                                )
    
    try:
        # extract text from the response
        model_responses = response.candidates[0].content.parts.__getitem__(0).text.split('\n')
    except IndexError:
        print("Error in response")
        model_responses = ["0"]*num_prompts

    # save responses to file
    with open(response_file, 'a') as file:
        for model_response in model_responses:
            file.write(model_response + '\n')

    
    #print the prompt and response separated by space
    print('# of prompts: ',len(batch_prompt),'# of responses: ', len(model_responses))


    for prompt, response in zip(batch_prompt, model_responses):
        print(prompt, response, sep=" ->")
    
    
    print("Received responses for batch", i+1, "of", len(batch_prompts))
    print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")

    time.sleep(60//rpm) #to avoid rate limit errors

print("Responses saved to", response_file)

Sending batch 1 of 173
# of prompts:  50 # of responses:  50
[1.0, 2.0, 0.5, 0.5, 0.5] ->1.0 / 2.0 = 0.500000000000
[2.0, 1.0, 2.0, 2.0, 2.0] ->2.0 / 1.0 = 2.000000000000
[1.0, 3.0, 0.3333, 0.33333333, 0.333333333333] ->1.0 / 3.0 = 0.333333333333
[3.0, 1.0, 3.0, 3.0, 3.0] ->3.0 / 1.0 = 3.000000000000
[1.0, 4.0, 0.25, 0.25, 0.25] ->1.0 / 4.0 = 0.250000000000
[4.0, 1.0, 4.0, 4.0, 4.0] ->4.0 / 1.0 = 4.000000000000
[1.0, 5.0, 0.2, 0.2, 0.2] ->1.0 / 5.0 = 0.200000000000
[5.0, 1.0, 5.0, 5.0, 5.0] ->5.0 / 1.0 = 5.000000000000
[1.0, 6.0, 0.1667, 0.16666667, 0.166666666667] ->1.0 / 6.0 = 0.166666666667
[6.0, 1.0, 6.0, 6.0, 6.0] ->6.0 / 1.0 = 6.000000000000
[1.0, 7.0, 0.1429, 0.14285714, 0.142857142857] ->1.0 / 7.0 = 0.142857142857
[7.0, 1.0, 7.0, 7.0, 7.0] ->7.0 / 1.0 = 7.000000000000
[1.0, 8.0, 0.125, 0.125, 0.125] ->1.0 / 8.0 = 0.125000000000
[8.0, 1.0, 8.0, 8.0, 8.0] ->8.0 / 1.0 = 8.000000000000
[1.0, 9.0, 0.1111, 0.11111111, 0.111111111111] ->1.0 / 9.0 = 0.111111111111
[9.0, 1.0, 9.0, 9.0, 