In [1]:
import openai
import json
from tqdm import tqdm
from time import sleep
import tiktoken

enc = tiktoken.encoding_for_model("gpt-3.5-turbo")
openai.api_key = open("apikey.txt", "r").read().strip("\n")

with open("../output/group_selection_grobid/braestrup_animal_1963.json") as f:
    dat = json.load(f)

texts = [_['text'] for _ in dat['pdf_parse']['body_text']]

In [55]:
instructions = """
Instructions:
 - You are an editors working for a journal. 
 - Your task is to analyze articles that consist of many paragraphs.
 - Your analysis should contain the following:
     - Does each sentence contains mentions or references to other authors (recall that names often start with upper case) ?
     - Does each sentence contains mentions to a theory?
     - Does each sentence contains equations?
     - Does each sentence contains mentions to a model?
     - Does each sentence refers to the author's view or someone else's view ?
 - Provide a sentiment score for the paragraphs using the following scale: 1 to 9 (where 1 is most negative, 5 is neutral, and 9 is most positive), take into account opposing sentiments in the mentions or references to other authors. 
 - Format your response in a json format where for each sentence, you provide the text, overall sentiment score, then if there are mentions or references, with their associated sentiment scores, and finally the equation, model, and if this is the author's view.
"""

In [56]:
def calc_price(nb_toks):
    return print(f"{round((nb_toks / 1000) * 0.002, 4)}$")

def calc_tot_price():
    tot_toks = 0
    for i in range(0, len(texts), 4):
        message_history = [ instructions ] + [ texts[i] ]
        for j in range(4):
            upper_index = j+1+i
            try:
                message_history += message_history + [texts[upper_index]]
            except:
                break
        tot_toks += len(enc.encode(' '.join(message_history)))
        
    return calc_price(tot_toks)


In [57]:
calc_tot_price()

0.1383$


## Experiment 1.2: Detecting opposing sentiments in Braestrup 1963

`Description:` Can gpt3.5-turbo detect opposing sentiments in scientific papers? 

`Abilities Required:`
 - Parsing a potentially noisy paragraph sentence by sentence
 - Detecting opposing sentiments in scientific contexts
 - Identifying formal and informal references/mentions 

`Extra condition:` I wanted to make sure that the output were the same on two different occasions.

`Last updated`: 2023-04-11

In [16]:
message_history = []


def chat(inp, role="user"):
    message_history.append({"role": role, "content": inp})
    
    completion = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=message_history
    )

    reply_content = completion.choices[0].message.content
    message_history.append({"role": "assistant", "content": reply_content})
    return reply_content

user_input = f"""
{instructions}

Here is the first paragraph:
{texts[0]}
"""

chat(user_input);

In [25]:
for text in tqdm(texts):
    chat(text)
    sleep(1)

 15%|█▌        | 6/39 [01:30<08:17, 15.08s/it]


InvalidRequestError: This model's maximum context length is 4097 tokens. However, your messages resulted in 4257 tokens. Please reduce the length of the messages.

In [55]:
# have a global history
global_history = []
global_history += message_history 

In [61]:
# delete incomplete prompt
del global_history[-3:]

In [66]:
# restart message history
message_history = []

user_input = f"""
{instructions}

Here is the first paragraph:
{texts[5]}
"""

chat(user_input)

'{\n  "paragraph 1": [\n    {\n      "text": "Various problems and aspects are mixed together in the most confusing way.",\n      "sentiment": 4,\n      "mentions": [],\n      "theory_mention": [],\n      "equation": [],\n      "model_mention": [],\n      "author_view": true\n    },\n    {\n      "text": "One point to which I shall revert later is the ambiguous use of the word dispersion in some chapters.",\n      "sentiment": 3,\n      "mentions": [],\n      "theory_mention": [],\n      "equation": [],\n      "model_mention": [],\n      "author_view": true\n    },\n    {\n      "text": "(Usage in other chapters would appear to be in order. It is amusing to reflect that if this had been a religious text in the strict sense, philologists might, in some distant future, have referred these chapters to different authors).",\n      "sentiment": 5,\n      "mentions": [\n        {\n          "text": "philologists",\n          "sentiment": 5\n        }\n      ],\n      "theory_mention": [],\n 

In [53]:
texts[5]

'Various problems and aspects are mixed together in the most confusing way. One point to which I shall revert later is the ambiguous use of the word dispersion in some chapters. (Usage in other chapters would appear to be in order. It is amusing to reflect that if this had been a religious text in the strict sense, philologists might, in some distant future, have referred these chapters to different authors). The term epideictic displays, "signifying literally\'meant for display\' but connoting in its original Greek form the presenting of a sample", is used consistently instead of such commonly used uncommittal terms as communal display, thus perhaps suggesting to the reader, not to mention the author, that the highly controversial theories involved have actually been proved.'

## Billing

In [43]:

calc_price(16_956)

0.0339$
