# Generating result

## Set model

In [1]:
!pip install -q -U google-generativeai

In [2]:
import pathlib
import google.generativeai as genai

In [3]:
from google.colab import userdata
GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')
genai.configure(api_key=GOOGLE_API_KEY)

In [4]:
for m in genai.list_models():
  if 'generateContent' in m.supported_generation_methods:
    print(m.name)

models/gemini-1.0-pro
models/gemini-1.0-pro-001
models/gemini-1.0-pro-latest
models/gemini-1.0-pro-vision-latest
models/gemini-pro
models/gemini-pro-vision


In [5]:
model = genai.GenerativeModel('gemini-1.0-pro-001')

## generate text response/chat response and write it to csv

# function

In [6]:
import os
import pandas as pd
from tqdm import tqdm
import time
from vertexai.preview.generative_models import (
    HarmCategory,
    HarmBlockThreshold )
from google.cloud.aiplatform_v1beta1.types.content import SafetySetting

def getGemini10Pro001Response(prompt):
    safety_settings : list[str] = [{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
                    {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
                    {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
                    {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"}]
    response = model.generate_content(prompt,safety_settings = safety_settings).text
    return response

def getLLMResponse(prompt):
    return getGemini10Pro001Response(prompt)

def splitQuestion(question):
    sentences = question.split('. ')
    last_sentence = sentences[-1]
    # Remove the last sentence
    sentences.pop(-1)
    # Join the remaining
    new_question = '. '.join(sentences)
    return last_sentence, new_question
def getGeminiChatResponseNoSub(model,context,question): #model type / large question in str/ subquestions in array
    header_instruction = ''' Consider this fictional board game scenario and the rules defined for the scenario: ''' ##### the one for starting the chat
    # header_last_instruction=''' Given this, wait for the subproblems I give you and answer them accordingly   '''
    final_instruction = ''' Given this, answer the final question: ''' ### what's the best answer etc...
    # last_sentence, new_question = splitQuestion(question)
    safety_settings : list[str] = [{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
                    {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
                    {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
                    {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"}]
    chat = model.start_chat(history=[])
    response = chat.send_message(header_instruction + context + final_instruction + question,
                                 safety_settings=safety_settings)
    # return response.text
    history =""
    for message in chat.history:
        history += message.role + ": " + message.parts[0].text + "\n"
    return history, response.text

def getGeminiChatResponse(model,context,question,subquestions): #model type / large question in str/ subquestions in array
    header_instruction = ''' Consider this fictional board game scenario and the rules defined for the scenario: ''' ##### the one for starting the chat
    header_last_instruction=''' Given this, wait for the subproblems I give you and answer them accordingly ok?  '''
    final_instruction = ''' Given the answers you have generated for the subproblems, answer the final question: ''' ### what's the best answer etc...
    # last_sentence, new_question = splitQuestion(question)
    safety_settings : list[str] = [{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
                    {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
                    {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
                    {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"}]
    chat = model.start_chat(history=[])
    response = chat.send_message(header_instruction + context +header_last_instruction,safety_settings=safety_settings )
    for subs in subquestions:
        response = chat.send_message(subs,safety_settings=safety_settings)
    response  = chat.send_message(final_instruction + question,safety_settings=safety_settings)
    # return response.text
    history =""
    for message in chat.history:
        history += message.role + ": " + message.parts[0].text + "\n"
    return history, response.text

In [59]:
import csv
import json
def getBaseAnswers(dataDf):
    output_csv_path = '/content/drive/MyDrive/gemini/data/Gemini_results.csv'
    #output_csv_path = '/content/drive/MyDrive/gemini/data/Gemini_sub_generation.csv'
    with open(output_csv_path, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Example', 'Response'])
        count = 0
        for index, row in tqdm(dataDf.iterrows(), total=dataDf.shape[0]):
            prompt = row['example'] + "\n The label is what (proved, disproved, unknown)?"
            #prompt = "Based on this question: "+row['example'] + "\n Could you generate some sub-questions?"+ "\n each sub-problem should be divided by '||' "
            response = getLLMResponse(prompt)
            writer.writerow([row['example'], response])
            count += 1
            print("The response for " + str(count) + "is: ",response)
def getSubs(dataDf):
    # output_csv_path = '/content/drive/MyDrive/gemini/data/Gemini_results.csv'
    output_csv_path = '/content/drive/MyDrive/gemini/data/Gemini_sub_generation.csv'
    with open(output_csv_path, mode='a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        # writer.writerow(['Example', 'Response'])
        count = 0
        for index, row in tqdm(dataDf.iloc[977:].iterrows(), total=dataDf.shape[0]-977):
            # prompt = row['example'] + "\n The label is what (proved, disproved, unknown)?"
            prompt = "Based on this question: "+row['example'] + "\n Could you generate some sub-questions?"+ '''\n
            the response should follow the format: \n
            1: (subquestion1) \n
            2: (subquestion2) \n
            3: (subquestion1) \n
            and so on......'''
            response = getLLMResponse(prompt)
            writer.writerow([row['example'], response])
            count += 1
            # print("The response for " + str(count) + "is: ",response)

def getChatResponse(dataDf):
    # output_csv_path = '/content/drive/MyDrive/gemini/data/Gemini_final_results.csv'
    output_csv_path = '/content/drive/MyDrive/gemini/data/Gemini_decomp_Gemini_sol.csv'
    with open(output_csv_path, mode='a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        # writer.writerow(['context','question','answer', 'chat'])
        count = 0
        for index, row in tqdm(dataDf.iloc[924:].iterrows(), total=dataDf.shape[0]-924):
            context = row['context']
            question = row['question'] + "\n Give me a one word answer (proved, disproved, unknown)"
            subquestion = row["parsed_Response"]
            if not subquestion or subquestion == "nan":
                print("no subs")
                history,response = getGeminiChatResponseNoSub(model,context,question)
            else:
                subquestions = subquestion.split("||")
                print("have subs")
                history,response = getGeminiChatResponse(model,context,question,subquestions)
            writer.writerow([context, question,response,history])
            count += 1
            # print("The response for " + str(count) + "is: ",response)
def getCOTAnswers(dataDf):
    output_csv_path = '/content/drive/MyDrive/gemini/data/Gemini_CoT_results.csv'
    CoT = '''
        Given the question:
            A few players are playing a boardgame. The current state of the game is as follows. The carp rolls the dice for the squirrel. The sea bass sings a victory song for the elephant. And the rules of the game are as follows. Rule1: If the carp rolls the dice for the squirrel, then the squirrel removes from the board one of the pieces of the sheep. Rule2: If the squirrel removes one of the pieces of the sheep and the zander knocks down the fortress of the sheep, then the sheep removes one of the pieces of the whale. Rule3: Regarding the zander, if it took a bike from the store, then we can conclude that it does not knock down the fortress of the sheep. Rule4: The zander knocks down the fortress of the sheep whenever at least one animal sings a song of victory for the elephant. Rule3 is preferred over Rule4. Based on the game state and the rules and preferences, does the sheep remove from the board one of the pieces of the whale?
        The reasoning is:
            We know the sea bass sings a victory song for the elephant, and according to Rule4 "if at least one animal sings a victory song for the elephant, then the zander knocks down the fortress of the sheep", and for the conflicting and higher priority rule Rule3 we cannot prove the antecedent "the zander took a bike from the store", so we can conclude "the zander knocks down the fortress of the sheep". We know the carp rolls the dice for the squirrel, and according to Rule1 "if the carp rolls the dice for the squirrel, then the squirrel removes from the board one of the pieces of the sheep", so we can conclude "the squirrel removes from the board one of the pieces of the sheep". We know the squirrel removes from the board one of the pieces of the sheep and the zander knocks down the fortress of the sheep, and according to Rule2 "if the squirrel removes from the board one of the pieces of the sheep and the zander knocks down the fortress of the sheep, then the sheep removes from the board one of the pieces of the whale", so we can conclude "the sheep removes from the board one of the pieces of the whale". So the statement "the sheep removes from the board one of the pieces of the whale" is proved and the answer is "yes".
        The label is:
            proved
        Now giving the question: '''
    with open(output_csv_path, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Example', 'Response'])
        count = 0
        for index, row in tqdm(dataDf.iterrows(), total=dataDf.shape[0]):
            prompt = row['example'] + "\n The label is what (proved, disproved, unknown)?"
            response = getLLMResponse(prompt)
            writer.writerow([row['example'], response])
            count += 1
            print("The response for " + str(count) + "is: ",response)

# call

In [60]:
def main():
    # dataPath = "/content/drive/MyDrive/gemini/data/test.json"
    # df = pd.read_json(dataPath, dtype=str)
    # getBaseAnswers(df)
    #  dataPath = "/content/drive/MyDrive/gemini/data/output-with-subproblems.json"
    #  df = pd.read_json(dataPath, dtype=str)
    #  getChatResponse(df)
     dataPath = "/content/drive/MyDrive/gemini/data/Gemini_parsed_sub_generation.csv"
     df = pd.read_csv(dataPath)
     df = df.astype(str)
     getChatResponse(df)

if __name__ == "__main__":
    main()


  0%|          | 0/76 [00:00<?, ?it/s]

have subs


  1%|▏         | 1/76 [00:16<20:50, 16.67s/it]

have subs


  3%|▎         | 2/76 [00:29<17:37, 14.28s/it]

have subs


  4%|▍         | 3/76 [00:37<14:00, 11.51s/it]

have subs


  5%|▌         | 4/76 [00:45<12:09, 10.13s/it]

have subs


  7%|▋         | 5/76 [00:55<12:05, 10.21s/it]

have subs


  8%|▊         | 6/76 [01:02<10:26,  8.96s/it]

have subs


  9%|▉         | 7/76 [01:21<13:59, 12.17s/it]

have subs


 11%|█         | 8/76 [01:34<14:15, 12.58s/it]

have subs


 12%|█▏        | 9/76 [01:45<13:30, 12.10s/it]

have subs


 13%|█▎        | 10/76 [02:03<15:17, 13.90s/it]

have subs


 14%|█▍        | 11/76 [02:21<16:29, 15.23s/it]

have subs


 16%|█▌        | 12/76 [02:40<17:28, 16.38s/it]

have subs


 17%|█▋        | 13/76 [03:00<18:04, 17.22s/it]

have subs


 18%|█▊        | 14/76 [03:13<16:39, 16.12s/it]

have subs


 20%|█▉        | 15/76 [03:32<17:18, 17.03s/it]

have subs


 21%|██        | 16/76 [03:45<15:45, 15.76s/it]

have subs


 22%|██▏       | 17/76 [04:01<15:36, 15.88s/it]

have subs


 24%|██▎       | 18/76 [04:23<16:56, 17.52s/it]

have subs


 25%|██▌       | 19/76 [04:47<18:37, 19.61s/it]

have subs


 26%|██▋       | 20/76 [05:05<17:56, 19.22s/it]

have subs


 28%|██▊       | 21/76 [05:22<16:58, 18.53s/it]

have subs


 29%|██▉       | 22/76 [05:44<17:27, 19.39s/it]

have subs


 30%|███       | 23/76 [05:52<14:18, 16.20s/it]

have subs


 32%|███▏      | 24/76 [06:11<14:45, 17.03s/it]

have subs


 33%|███▎      | 25/76 [06:22<12:55, 15.21s/it]

have subs


 34%|███▍      | 26/76 [06:42<13:47, 16.55s/it]

no subs


 36%|███▌      | 27/76 [06:43<09:46, 11.97s/it]

have subs


 37%|███▋      | 28/76 [06:58<10:07, 12.65s/it]

have subs


 38%|███▊      | 29/76 [07:12<10:27, 13.34s/it]

have subs


 39%|███▉      | 30/76 [07:32<11:42, 15.27s/it]

have subs


 41%|████      | 31/76 [07:53<12:43, 16.96s/it]

have subs


 42%|████▏     | 32/76 [08:01<10:22, 14.16s/it]

have subs


 43%|████▎     | 33/76 [08:17<10:36, 14.81s/it]

have subs


 45%|████▍     | 34/76 [08:32<10:21, 14.80s/it]

have subs


 46%|████▌     | 35/76 [08:52<11:09, 16.34s/it]

have subs


 47%|████▋     | 36/76 [09:17<12:35, 18.89s/it]

have subs


 49%|████▊     | 37/76 [09:34<11:59, 18.45s/it]

have subs


 50%|█████     | 38/76 [09:50<11:06, 17.55s/it]

no subs


 51%|█████▏    | 39/76 [09:51<07:47, 12.64s/it]

have subs


 53%|█████▎    | 40/76 [10:15<09:37, 16.04s/it]

have subs


 54%|█████▍    | 41/76 [10:26<08:35, 14.72s/it]

have subs


 55%|█████▌    | 42/76 [10:44<08:47, 15.51s/it]

have subs


 57%|█████▋    | 43/76 [10:51<07:11, 13.09s/it]

have subs


 58%|█████▊    | 44/76 [11:10<07:58, 14.94s/it]

have subs


 59%|█████▉    | 45/76 [11:30<08:24, 16.26s/it]

have subs


 61%|██████    | 46/76 [11:44<07:48, 15.61s/it]

have subs


 62%|██████▏   | 47/76 [11:58<07:22, 15.27s/it]

have subs


 63%|██████▎   | 48/76 [12:06<06:01, 12.91s/it]

have subs


 64%|██████▍   | 49/76 [12:23<06:27, 14.37s/it]

have subs


 66%|██████▌   | 50/76 [12:34<05:43, 13.22s/it]

have subs


 67%|██████▋   | 51/76 [12:46<05:20, 12.81s/it]

have subs


 68%|██████▊   | 52/76 [12:54<04:33, 11.41s/it]

have subs


 70%|██████▉   | 53/76 [13:06<04:24, 11.49s/it]

have subs


 71%|███████   | 54/76 [13:25<05:07, 13.97s/it]

have subs


 72%|███████▏  | 55/76 [13:33<04:11, 11.95s/it]

have subs


 74%|███████▎  | 56/76 [14:04<05:52, 17.62s/it]

have subs


 75%|███████▌  | 57/76 [14:15<05:02, 15.93s/it]

have subs


 76%|███████▋  | 58/76 [14:35<05:05, 17.00s/it]

have subs


 78%|███████▊  | 59/76 [15:43<09:10, 32.39s/it]

have subs


 79%|███████▉  | 60/76 [16:05<07:46, 29.18s/it]

have subs


 80%|████████  | 61/76 [16:27<06:44, 26.98s/it]

have subs


 82%|████████▏ | 62/76 [16:57<06:33, 28.08s/it]

have subs


 83%|████████▎ | 63/76 [17:07<04:53, 22.55s/it]

have subs


 84%|████████▍ | 64/76 [17:27<04:21, 21.82s/it]

have subs


 86%|████████▌ | 65/76 [17:45<03:48, 20.76s/it]

have subs


 87%|████████▋ | 66/76 [18:10<03:40, 22.01s/it]

no subs


 88%|████████▊ | 67/76 [18:12<02:22, 15.82s/it]

have subs


 89%|████████▉ | 68/76 [18:53<03:08, 23.52s/it]

have subs


 91%|█████████ | 69/76 [19:22<02:54, 24.97s/it]

have subs


 92%|█████████▏| 70/76 [19:32<02:03, 20.64s/it]

have subs


 93%|█████████▎| 71/76 [19:43<01:27, 17.56s/it]

have subs


 95%|█████████▍| 72/76 [20:00<01:10, 17.67s/it]

have subs


 96%|█████████▌| 73/76 [20:13<00:48, 16.21s/it]

have subs


 97%|█████████▋| 74/76 [20:24<00:29, 14.71s/it]

have subs


 99%|█████████▊| 75/76 [20:40<00:15, 15.06s/it]

have subs


100%|██████████| 76/76 [20:58<00:00, 16.56s/it]


In [58]:
import pandas as pd

df = pd.read_csv('/content/drive/MyDrive/gemini/data/Gemini_decomp_Gemini_sol.csv')

# The length of the DataFrame is the number of rows excluding the header
row_count = len(df)

print(f"Number of rows excluding the header: {row_count}")


Number of rows excluding the header: 924


In [None]:
def CoT():
    dataPath = "/content/drive/MyDrive/gemini/data/test.json"
    with open(dataPath, 'r', encoding='utf-8') as file:
        data = json.load(file)
    df = pd.DataFrame(data)
    getCOTAnswers(df)
CoT()

In [None]:
def Generate_subs():
    dataPath = "/content/drive/MyDrive/gemini/data/test.json"
    with open(dataPath, 'r', encoding='utf-8') as file:
        data = json.load(file)
    df = pd.DataFrame(data)
    getSubs(df)
Generate_subs()

100%|██████████| 23/23 [00:58<00:00,  2.54s/it]


#Process Answer

## Check Header

### subs

In [None]:
import csv

file_path = '/content/drive/MyDrive/gemini/data/Gemini_final_results.csv'

with open(file_path, mode='r', encoding='utf-8') as file:
    reader = csv.reader(file)
    header = next(reader)
    print(f"Header: {header}")


Header: ['context', 'question', 'answer', 'chat']


### base

In [None]:
file_path2 = '/content/drive/MyDrive/gemini/data/Gemini_results.csv'

with open(file_path2, mode='r', encoding='utf-8') as file:
    reader = csv.reader(file)
    header = next(reader)
    print(f"Header: {header}")

Header: ['Example', 'Response']


### cot

In [None]:
import csv

file_path = '/content/drive/MyDrive/gemini/data/Gemini_CoT_results.csv'

with open(file_path, mode='r', encoding='utf-8') as file:
    reader = csv.reader(file)
    header = next(reader)
    print(f"Header: {header}")

Header: ['Example', 'Response']


### gene_subs

In [61]:
import csv

file_path = '/content/drive/MyDrive/gemini/data/Gemini_decomp_Gemini_sol.csv'

with open(file_path, mode='r', encoding='utf-8') as file:
    reader = csv.reader(file)
    header = next(reader)
    print(f"Header: {header}")

Header: ['context', 'question', 'answer', 'chat']


## Change all answer to lower

### subs

In [None]:
import csv

file_path = '/content/drive/MyDrive/gemini/data/Gemini_final_results.csv'
column_name = 'answer'

modified_rows = []
with open(file_path, mode='r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    headers = reader.fieldnames
    for row in reader:
        row[column_name] = row[column_name].lower()  # Convert to lowercase
        modified_rows.append(row)

with open(file_path, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.DictWriter(file, fieldnames=headers)
    writer.writeheader()
    writer.writerows(modified_rows)

### base

In [None]:
import csv

file_path = '/content/drive/MyDrive/gemini/data/Gemini_results.csv'
column_name = 'Response'

modified_rows = []
with open(file_path, mode='r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    headers = reader.fieldnames
    for row in reader:
        row[column_name] = row[column_name].lower()  # Convert to lowercase
        modified_rows.append(row)

with open(file_path, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.DictWriter(file, fieldnames=headers)
    writer.writeheader()
    writer.writerows(modified_rows)

### cot

In [None]:
import csv

file_path = '/content/drive/MyDrive/gemini/data/Gemini_CoT_results.csv'
column_name = 'Response'

modified_rows = []
with open(file_path, mode='r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    headers = reader.fieldnames
    for row in reader:
        row[column_name] = row[column_name].lower()  # Convert to lowercase
        modified_rows.append(row)

with open(file_path, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.DictWriter(file, fieldnames=headers)
    writer.writeheader()
    writer.writerows(modified_rows)

### gene_subs

In [62]:
import csv

file_path = '/content/drive/MyDrive/gemini/data/Gemini_decomp_Gemini_sol.csv'
column_name = 'answer'

modified_rows = []
with open(file_path, mode='r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    headers = reader.fieldnames
    for row in reader:
        row[column_name] = row[column_name].lower()  # Convert to lowercase
        modified_rows.append(row)

with open(file_path, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.DictWriter(file, fieldnames=headers)
    writer.writeheader()
    writer.writerows(modified_rows)

##Check the answer and update the csv

### subs

In [None]:
import re
file_path = '/content/drive/MyDrive/gemini/data/Gemini_final_results.csv'
column_name = 'answer'
def contains_all_three(text):
    # Regular expression pattern to match whole words only
    proved_pattern = r'\bproved\b'
    disproved_pattern = r'\bdisproved\b'
    unknown_pattern = r'\bunknown\b'
    has_proved = re.search(proved_pattern, text) is not None
    has_disproved = re.search(disproved_pattern, text) is not None
    has_unknown = re.search(unknown_pattern, text) is not None
    return has_proved and has_disproved and has_unknown
def contains_both_proved_disproved(text):
    # Regular expression pattern to match whole words only
    proved_pattern = r'\bproved\b'
    disproved_pattern = r'\bdisproved\b'
    has_proved = re.search(proved_pattern, text) is not None
    has_disproved = re.search(disproved_pattern, text) is not None
    return has_proved and has_disproved
def contains_proved(text):
    # Regular expression pattern to match whole words only
    proved_pattern = r'\bproved\b'
    has_proved = re.search(proved_pattern, text) is not None
    return has_proved
def contains_disproved(text):
    # Regular expression pattern to match whole words only
    disproved_pattern = r'\bdisproved\b'
    has_disproved = re.search(disproved_pattern, text) is not None
    return has_disproved
def contains_unknown(text):
    # Regular expression pattern to match whole words only
    unknown_pattern = r'\bunknown\b'
    has_unknown = re.search(unknown_pattern, text) is not None
    return has_unknown
def contains_none(text):
    # Regular expression pattern to match whole words only
    proved_pattern = r'\bproved\b'
    disproved_pattern = r'\bdisproved\b'
    unknown_pattern = r'\bunknown\b'
    has_proved = re.search(proved_pattern, text) is None
    has_disproved = re.search(disproved_pattern, text) is None
    has_unknown = re.search(unknown_pattern, text) is None
    return has_proved and has_disproved and has_unknown
modified_rows = []
with open(file_path, mode='r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    countb = 0
    countp = 0
    countd = 0
    countu = 0
    counta = 0
    countn = 0
    for index, row in enumerate(reader, start=1):
        column_value = row[column_name]
        if contains_all_three(column_value):
            counta +=1
        if contains_none(column_value):
            print("the index for none: ",index)
            countn +=1
            row[column_name] = "proved"
        if contains_both_proved_disproved(column_value):
            countb +=1
        if contains_proved(column_value):
            countp +=1
            row[column_name] = "proved"
        if contains_disproved(column_value):
            countd +=1
            row[column_name] = "disproved"
        if contains_unknown(column_value):
            countu +=1
            row[column_name] = "unknown"
        modified_rows.append(row)
    print("all three: ",counta)
    print("none: ",countn)
    print("both p and d: ",countb)
    print("proved: ",countp)
    print("disproved: ",countd)
    print("unknown: ",countu)
    print("total: ",len(modified_rows))
with open("/content/drive/MyDrive/gemini/data/updated_final_results.csv", mode='w', newline='', encoding='utf-8') as file:
    writer = csv.DictWriter(file, fieldnames=reader.fieldnames)
    writer.writeheader()
    writer.writerows(modified_rows)

the index for none:  153
all three:  0
none:  1
both p and d:  0
proved:  286
disproved:  411
unknown:  302
total:  1000


### base

In [None]:
import re
file_path = '/content/drive/MyDrive/gemini/data/Gemini_results.csv'
column_name = 'Response'
def contains_all_three(text):
    # Regular expression pattern to match whole words only
    proved_pattern = r'\bproved\b'
    disproved_pattern = r'\bdisproved\b'
    unknown_pattern = r'\bunknown\b'
    has_proved = re.search(proved_pattern, text) is not None
    has_disproved = re.search(disproved_pattern, text) is not None
    has_unknown = re.search(unknown_pattern, text) is not None
    return has_proved and has_disproved and has_unknown
def contains_both_proved_disproved(text):
    # Regular expression pattern to match whole words only
    proved_pattern = r'\bproved\b'
    disproved_pattern = r'\bdisproved\b'
    has_proved = re.search(proved_pattern, text) is not None
    has_disproved = re.search(disproved_pattern, text) is not None
    return has_proved and has_disproved
def contains_proved(text):
    # Regular expression pattern to match whole words only
    proved_pattern = r'\bproved\b'
    has_proved = re.search(proved_pattern, text) is not None
    return has_proved
def contains_disproved(text):
    # Regular expression pattern to match whole words only
    disproved_pattern = r'\bdisproved\b'
    has_disproved = re.search(disproved_pattern, text) is not None
    return has_disproved
def contains_unknown(text):
    # Regular expression pattern to match whole words only
    unknown_pattern = r'\bunknown\b'
    has_unknown = re.search(unknown_pattern, text) is not None
    return has_unknown
def contains_none(text):
    # Regular expression pattern to match whole words only
    proved_pattern = r'\bproved\b'
    disproved_pattern = r'\bdisproved\b'
    unknown_pattern = r'\bunknown\b'
    has_proved = re.search(proved_pattern, text) is None
    has_disproved = re.search(disproved_pattern, text) is None
    has_unknown = re.search(unknown_pattern, text) is None
    return has_proved and has_disproved and has_unknown
modified_rows = []
with open(file_path, mode='r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    countb = 0
    countp = 0
    countd = 0
    countu = 0
    counta = 0
    countn = 0
    for index, row in enumerate(reader, start=1):
        column_value = row[column_name]
        if contains_all_three(column_value):
            counta +=1
        if contains_none(column_value):
            countn +=1
        if contains_both_proved_disproved(column_value):
            countb +=1
        if contains_proved(column_value):
            countp +=1
            row[column_name] = "proved"
        if contains_disproved(column_value):
            countd +=1
            row[column_name] = "disproved"
        if contains_unknown(column_value):
            countu +=1
            row[column_name] = "unknown"
        modified_rows.append(row)
    print("all three: ",counta)
    print("none: ",countn)
    print("both p and d: ",countb)
    print("proved: ",countp)
    print("disproved: ",countd)
    print("unknown: ",countu)
    print("total: ",len(modified_rows))
with open("/content/drive/MyDrive/gemini/data/updated_gemini_results.csv", mode='w', newline='', encoding='utf-8') as file:
    writer = csv.DictWriter(file, fieldnames=reader.fieldnames)
    writer.writeheader()
    writer.writerows(modified_rows)

all three:  0
none:  0
both p and d:  0
proved:  357
disproved:  443
unknown:  200
total:  1000


### cot

In [None]:
import re
file_path = '/content/drive/MyDrive/gemini/data/Gemini_CoT_results.csv'
column_name = 'Response'
def contains_all_three(text):
    # Regular expression pattern to match whole words only
    proved_pattern = r'\bproved\b'
    disproved_pattern = r'\bdisproved\b'
    unknown_pattern = r'\bunknown\b'
    has_proved = re.search(proved_pattern, text) is not None
    has_disproved = re.search(disproved_pattern, text) is not None
    has_unknown = re.search(unknown_pattern, text) is not None
    return has_proved and has_disproved and has_unknown
def contains_both_proved_disproved(text):
    # Regular expression pattern to match whole words only
    proved_pattern = r'\bproved\b'
    disproved_pattern = r'\bdisproved\b'
    has_proved = re.search(proved_pattern, text) is not None
    has_disproved = re.search(disproved_pattern, text) is not None
    return has_proved and has_disproved
def contains_proved(text):
    # Regular expression pattern to match whole words only
    proved_pattern = r'\bproved\b'
    has_proved = re.search(proved_pattern, text) is not None
    return has_proved
def contains_disproved(text):
    # Regular expression pattern to match whole words only
    disproved_pattern = r'\bdisproved\b'
    has_disproved = re.search(disproved_pattern, text) is not None
    return has_disproved
def contains_unknown(text):
    # Regular expression pattern to match whole words only
    unknown_pattern = r'\bunknown\b'
    has_unknown = re.search(unknown_pattern, text) is not None
    return has_unknown
def contains_none(text):
    # Regular expression pattern to match whole words only
    proved_pattern = r'\bproved\b'
    disproved_pattern = r'\bdisproved\b'
    unknown_pattern = r'\bunknown\b'
    has_proved = re.search(proved_pattern, text) is None
    has_disproved = re.search(disproved_pattern, text) is None
    has_unknown = re.search(unknown_pattern, text) is None
    return has_proved and has_disproved and has_unknown
modified_rows = []
with open(file_path, mode='r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    countb = 0
    countp = 0
    countd = 0
    countu = 0
    counta = 0
    countn = 0
    for index, row in enumerate(reader, start=1):
        column_value = row[column_name]
        if contains_all_three(column_value):
            counta +=1
        if contains_none(column_value):
            countn +=1
        if contains_both_proved_disproved(column_value):
            countb +=1
        if contains_proved(column_value):
            countp +=1
            row[column_name] = "proved"
        if contains_disproved(column_value):
            countd +=1
            row[column_name] = "disproved"
        if contains_unknown(column_value):
            countu +=1
            row[column_name] = "unknown"
        modified_rows.append(row)
    print("all three: ",counta)
    print("none: ",countn)
    print("both p and d: ",countb)
    print("proved: ",countp)
    print("disproved: ",countd)
    print("unknown: ",countu)
    print("total: ",len(modified_rows))
with open("/content/drive/MyDrive/gemini/data/Gemini_parsed_CoT_results.csv", mode='w', newline='', encoding='utf-8') as file:
    writer = csv.DictWriter(file, fieldnames=reader.fieldnames)
    writer.writeheader()
    writer.writerows(modified_rows)

all three:  0
none:  0
both p and d:  0
proved:  331
disproved:  448
unknown:  221
total:  1000


### gene_subs

In [75]:
import re
file_path = '/content/drive/MyDrive/gemini/data/Gemini_decomp_Gemini_sol.csv'
column_name = 'answer'
def contains_all_three(text):
    # Regular expression pattern to match whole words only
    proved_pattern = r'\bproved\b'
    disproved_pattern = r'\bdisproved\b'
    unknown_pattern = r'\bunknown\b'
    has_proved = re.search(proved_pattern, text) is not None
    has_disproved = re.search(disproved_pattern, text) is not None
    has_unknown = re.search(unknown_pattern, text) is not None
    return has_proved and has_disproved and has_unknown
def contains_both_proved_disproved(text):
    # Regular expression pattern to match whole words only
    proved_pattern = r'\bproved\b'
    disproved_pattern = r'\bdisproved\b'
    has_proved = re.search(proved_pattern, text) is not None
    has_disproved = re.search(disproved_pattern, text) is not None
    return has_proved and has_disproved
def contains_proved(text):
    # Regular expression pattern to match whole words only
    proved_pattern = r'\bproved\b'
    has_proved = re.search(proved_pattern, text) is not None
    return has_proved
def contains_disproved(text):
    # Regular expression pattern to match whole words only
    disproved_pattern = r'\bdisproved\b'
    has_disproved = re.search(disproved_pattern, text) is not None
    return has_disproved
def contains_unknown(text):
    # Regular expression pattern to match whole words only
    unknown_pattern = r'\bunknown\b'
    has_unknown = re.search(unknown_pattern, text) is not None
    return has_unknown
def contains_none(text):
    # Regular expression pattern to match whole words only
    proved_pattern = r'\bproved\b'
    disproved_pattern = r'\bdisproved\b'
    unknown_pattern = r'\bunknown\b'
    has_proved = re.search(proved_pattern, text) is None
    has_disproved = re.search(disproved_pattern, text) is None
    has_unknown = re.search(unknown_pattern, text) is None
    return has_proved and has_disproved and has_unknown
modified_rows = []
with open(file_path, mode='r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    countb = 0
    countp = 0
    countd = 0
    countu = 0
    counta = 0
    countn = 0
    for index, row in enumerate(reader, start=1):
        column_value = row[column_name]
        if contains_all_three(column_value):
            counta +=1
        if contains_none(column_value):
            countn +=1
        if contains_both_proved_disproved(column_value):
            countb +=1
        if contains_proved(column_value):
            countp +=1
            row[column_name] = "proved"
        if contains_disproved(column_value):
            countd +=1
            row[column_name] = "disproved"
        if contains_unknown(column_value):
            countu +=1
            row[column_name] = "unknown"
        modified_rows.append(row)
    print("all three: ",counta)
    print("none: ",countn)
    print("both p and d: ",countb)
    print("proved: ",countp)
    print("disproved: ",countd)
    print("unknown: ",countu)
    print("total: ",len(modified_rows))
with open("/content/drive/MyDrive/gemini/data/Gemini_parsed_decomp_Gemini_sol.csv", mode='w', newline='', encoding='utf-8') as file:
    writer = csv.DictWriter(file, fieldnames=reader.fieldnames)
    writer.writeheader()
    writer.writerows(modified_rows)

all three:  0
none:  0
both p and d:  0
proved:  240
disproved:  441
unknown:  319
total:  1000


##Checked the one didn't contain any of the three for subs

In [73]:
file_path = '/content/drive/MyDrive/gemini/data/Gemini_decomp_Gemini_sol.csv'
column_name = 'answer'

with open(file_path, mode='r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    for index, row in enumerate(reader):
        if index ==432:
            print(row[column_name])


disproved


## change value for index

In [72]:
df = pd.read_csv('/content/drive/MyDrive/gemini/data/Gemini_decomp_Gemini_sol.csv')
df.at[432, 'answer'] = "disproved"
df.to_csv('/content/drive/MyDrive/gemini/data/Gemini_decomp_Gemini_sol.csv', index=False)

## Recheck the answer for subs

In [None]:
import csv

file_path = '/content/drive/MyDrive/gemini/data/updated_final_results.csv'
column_name = 'answer'

with open(file_path, mode='r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    count = 1
    for row in reader:
        print(str(count) + ": " + row[column_name])
        count +=1

# Accuracy check

## Using subs

In [76]:
dataPath = "/content/drive/MyDrive/gemini/data/output-with-subproblems.json"
dataDf = pd.read_json(dataPath, dtype=str)
file_path = '/content/drive/MyDrive/gemini/data/updated_final_results.csv'
column_name = 'answer'
label = []
predicted = []
for index, row in tqdm(dataDf.iterrows(), total=dataDf.shape[0]):
    label.append(row['label'])
with open(file_path, mode='r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    for row in reader:
        predicted.append(row[column_name])
print(len(predicted))

100%|██████████| 1000/1000 [00:00<00:00, 10773.41it/s]


1000


In [77]:
if len(label) == len(predicted):
    matches = sum(1 for label_value, predicted_value in zip(label, predicted) if label_value == predicted_value)
    accuracy = matches / len(label)
    print(f"Accuracy using subproblems: {accuracy:.2%}")
else:
    print("Error: The lists 'label' and 'predicted' have different lengths.")

Accuracy using subproblems: 51.80%


## The base answer

In [None]:
file_path2 = '/content/drive/MyDrive/gemini/data/updated_gemini_results.csv'
column_name = 'Response'
predicted2 = []
with open(file_path2, mode='r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    for row in reader:
        predicted2.append(row[column_name])
print(len(predicted2))

1000


In [None]:
if len(label) == len(predicted2):
    matches = sum(1 for label_value, predicted_value in zip(label, predicted2) if label_value == predicted_value)
    accuracy = matches / len(label)
    print(f"Accuracy without using subproblems: {accuracy:.2%}")
else:
    print("Error: The lists 'label' and 'predicted' have different lengths.")

Accuracy without using subproblems: 45.20%


## The CoT answer

In [82]:
file_path3 = '/content/drive/MyDrive/gemini/data/Gemini_parsed_CoT_results.csv'
column_name = 'Response'
predicted3 = []
with open(file_path3, mode='r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    for row in reader:
        predicted3.append(row[column_name])
print(len(predicted3))

1000


In [83]:
if len(label) == len(predicted3):
    matches = sum(1 for label_value, predicted_value in zip(label, predicted3) if label_value == predicted_value)
    accuracy = matches / len(label)
    print(f"Accuracy using CoT: {accuracy:.2%}")
else:
    print("Error: The lists 'label' and 'predicted' have different lengths.")

Accuracy using CoT: 42.90%


## gemini-decomp-gemini-solver

In [78]:
file_path3 = '/content/drive/MyDrive/gemini/data/Gemini_parsed_decomp_Gemini_sol.csv'
column_name = 'answer'
predicted3 = []
with open(file_path3, mode='r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    for row in reader:
        predicted3.append(row[column_name])
print(len(predicted3))

1000


In [80]:
if len(label) == len(predicted3):
    matches = sum(1 for label_value, predicted_value in zip(label, predicted3) if label_value == predicted_value)
    accuracy = matches / len(label)
    print(f"Accuracy using gemini-decomp-gemini-solver: {accuracy:.2%}")
else:
    print("Error: The lists 'label' and 'predicted' have different lengths.")

Accuracy using gemini-decomp-gemini-solver: 50.30%


# parse answer

In [None]:
import pandas as pd
import re

df = pd.read_csv('/content/drive/MyDrive/gemini/data/Gemini_sub_generation.csv')  # Replace with the actual path to your CSV file

#separates the questions with '||'
def separate_questions(text):
    questions_with_numbers = re.findall(r'(\d+: .*?\?)', text)
    return ' || '.join(questions_with_numbers)

df['parsed_Response'] = df['Response'].apply(separate_questions)
df.to_csv('/content/drive/MyDrive/gemini/data/Gemini_parsed_sub_generation.csv', index=False)  # Replace with your desired output file path

In [None]:
import pandas as pd
import json

# Load and prepare the JSON data
with open('/content/drive/MyDrive/gemini/data/output-with-subproblems.json', 'r') as file:
    json_data = json.load(file)
columns_needed = ['context', 'question']
new_columns_df = pd.DataFrame(json_data)[columns_needed]

# Load the original CSV
original_df = pd.read_csv('/content/drive/MyDrive/gemini/data/Gemini_parsed_sub_generation.csv')

# Concatenate and save
updated_df = pd.concat([original_df, new_columns_df], axis=1)
updated_df.to_csv('/content/drive/MyDrive/gemini/data/Gemini_parsed_sub_generation.csv', index=False)

In [None]:
import pandas as pd
pa = '/content/drive/MyDrive/gemini/data/Gemini_parsed_sub_generation.csv'
# Load the CSV into a DataFrame
df = pd.read_csv(pa)

# Delete the "Example" column
df = df.drop('Example', axis=1)

# Save the modified DataFrame back to the CSV
df.to_csv(pa, index=False)