In [1]:
import openai
import glob
import json
import time
import backoff
import random
import math
from scipy.stats import pearsonr

# The following useful code borrowed from Quinn Dombrowski

@backoff.on_exception(backoff.expo, openai.error.RateLimitError)
def completions_with_backoff(**kwargs):
    return openai.ChatCompletion.create(**kwargs)

### Read in data and OpenAPI credentials

In [42]:
# read in my org code and api_key from a local file, obvs not included in the repo y'all!

with open('credentials.txt', encoding = 'utf-8') as f:
    openai.organization = f.readline().strip()
    openai.api_key = f.readline().strip()
    

The data is in separate files produced by different readers. The data in these files is represented in json.

In [3]:
files = glob.glob('/Users/tunder/Dropbox/python/time/results/*.json')
alljsons = []
for afile in files: 
    with open(afile, encoding = 'utf-8') as f:
        for line in f:
            j = json.loads(line)
            if 'underwood' in afile:
                reader = 'u'
            elif 'mercado' in afile:
                reader = 'm'
            elif 'lee' in afile:
                reader = 'l'
            else:
                print('error')
            alljsons.append((reader, j))

We simplify this to two parallel lists of **texts** (from the fiction) and **times** estimated by the human readers. Segment ids are attached to both lists.

In [4]:
texts = []
times = []

for reader, afile in alljsons:
    for docid, v in afile.items():
        for seg in v['segments']:
            text = seg['text']
            index = seg['idx']
            segid = docid + '|' + str(index) + '|' + reader
            unmarkedid = docid + '|' + str(index)
            minutes = seg['narratedtime']
            texts.append((segid, text))
            times.append((segid, minutes))

In [43]:
print(len(times), len(texts))
#sanity check

1168 1168


In [6]:
times[0:10]

[('RadcASR.txt|0|u', 52560000.0),
 ('RadcASR.txt|1|u', 5256000.0),
 ('RadcASR.txt|9|u', 120.0),
 ('RadcASR.txt|90|u', 10.0),
 ('RadcASR.txt|107|u', 7.0),
 ('RadcASR.txt|109|u', 20160.0),
 ('RadcASR.txt|113|u', 360.0),
 ('RadcASR.txt|137|u', 10.0),
 ('RadcASR.txt|148|u', 222.0),
 ('RadcASR.txt|160|u', 9986400.0)]

In [7]:
texts[1]

('RadcASR.txt|1|u',
 'said he, "were once the seat of luxury\n \n and vice. They exhibited a singular\n instance of the retribution of Heaven,\n and were from that period forsaken,\n and abandoned to decay." His words\n excited my curiosity, and I enquired\n further concerning their meaning.\n "A solemn history belongs to this\n castle," said he, "which is too long and\n intricate for me to relate. It is, however,\n contained in a manuscript in our\n library, of which, I could, perhaps, procure\n you a sight. A brother of our order,\n a descendant of the noble house of\n Mazzini, collected and recorded the\n most striking incidents relating to his\n family, and the history thus formed, he\n left as a legacy to our convent. If you\n please, we will walk thither."\n I accompanied him to the convent,\n and the friar introduced me to his superior,\n a man of an intelligent mind and\n benevolent heart, with whom I passed\n some hours in interesting conversation.\n I believe my sentiments pl

### Define the prompt that teaches the model how to estimate time

In [8]:
user_prompts = ["", "", "", ""]
user_prompts[0] = "Read the following passage of fiction. Then do five things. \
1: Briefly summarize the passage.\n \
2: Reason step by step to decide how much time is described in the passage. \
If the passage doesn't include any explicit reference to time, you can guess how much time the events described would have taken. \
Even description can imply the passage of time by mentioning the earlier history of people or buildings. \
But characters' references to the past or future in spoken dialogue should not count as time that passed in the scene. \
Report the time using units of years, weeks, days, hours, or minutes. Do not say zero or N/A.\n \
3: If you described a range of possible times in step 2 take the midpoint of the range. \
Then multiply to convert the units into minutes. \n\
4: Report only the number of minutes elapsed, which should match the number in step 3. Do not reply N/A. \n\
5: Given the amount of speculation required in step 2, describe your certainty about the estimate--either high, moderate, or low.\n \
The passage follows:\n \
TWENTY-FIVE It was raining again the next morning, a slanting gray rain like a swung curtain \
of crystal beads. I got up feeling sluggish and tired and stood looking out of the windows, with \
a dark harsh taste of Sternwoods still in my mouth. I was as empty of life as a scarecrow's pockets. \
I went out to the kitchenette and drank two cups of black coffee. You can have a hangover from other \
things than alcohol. I had one from women. Women made me sick. I shaved and showered and dressed and \
got my raincoat out and went downstairs and looked out of the front door. Across the street, a hundred feet up, \
a gray Plymouth sedan was parked. It was the same one that had tried to trail me around the day before, the same \
one that I had asked Eddie Mars about. There might be a cop in it, if a cop had that much time on his hands and \
wanted to waste it following me around. Or it might be a smoothie in the detective business trying to get a noseful \
of somebody else's case in order to chisel a way into it. Or it might be the Bishop of Bermuda disapproving of my night life."

In [9]:
assistant_prompts = ["", "", "", ""]
assistant_prompts[0] = "1: A detective wakes up 'the next morning,' looks out a window for an undefined time, drinks \
(and presumably needs to make) two cups of coffee, then shaves and showers and gets dressed \
before stepping out his front door and seeing a car. \n\
2: Making coffee, showering, and getting dressed take at least an hour. There's some ambiguity \
about whether to count the implicit reference to yesterday (since this is 'the next morning') \
as time elapsed in the passage, but let's say no, since yesterday is not actually described. So, an hour to 90 minutes. \n\
3: 1.25 hours have elapsed. Multiplying by 60 minutes an hour that's 75 minutes.\n \
4: 75 minutes.\n \
5: Low confidence, because of ambiguity about a reference to the previous day."

In [10]:
user_prompts[1] = "Read the following passage of fiction. Then do five things. \
1: Briefly summarize the passage.\n \
2: Reason step by step to decide how much time is described in the passage. \
If the passage doesn't include any explicit reference to time, you can guess how much time the events described would have taken. \
Even description can imply the passage of time by mentioning the earlier history of people or buildings. \
But characters' references to the past or future in spoken dialogue should not count as time that passed in the scene. \
Report the time using units of years, weeks, days, hours, or minutes. Do not say zero or N/A.\n \
3: If you described a range of possible times in step 2 take the midpoint of the range. \
Then multiply to convert the units into minutes. \n\
4: Report only the number of minutes elapsed, which should match the number in step 3. Do not reply N/A. \n\
5: Describe your certainty about the estimate—either high, moderate, or low.\n \
The passage follows:\n \
CHAPTER I  \"TOM!\" No answer.  \"TOM!\"  No answer.  \"What's gone with that boy, I wonder? You TOM!\" \
No answer.  The old lady pulled her spectacles down and looked over them about the room; then she put them up and looked out \
under them. She seldom or never looked _through_ them for so small a thing as a boy; they were her state pair, the pride of her heart, \
and were built for \"style,\" not service--she could have seen through a pair of stove-lids just as well. \
She looked perplexed for a moment, and then said, not fiercely, but still loud enough for the furniture to hear:  \
\"Well, I lay if I get hold of you I'll--\"  She did not finish, for by this time she was bending down and punching \
under the bed with the broom, and so she needed breath to punctuate the punches with. She resurrected nothing but the cat.  \
\"I never did see the beat of that boy!\" She went to the open door and stood in it and looked out among the tomato vines \
and \"jimpson\" weeds that constituted the garden. No Tom. So she lifted up her voice at an angle calculated for distance and \
shouted: \"Y-o-u-u TOM!\" There was a slight noise behind her and she turned just in time to seize a small boy by the slack \
of his roundabout and arrest his flight."

In [11]:
assistant_prompts[1] = "1: An old lady calls for a boy named Tom, checks for him under the bed, goes to the open door and calls for him — then finally catches him.\n\
2: The lady's actions are described minutely and seem rapid; they probably took two to four minutes. The lady also\
alludes to Tom's past bad behavior, but references to the past in dialogue should not count as time passing in the scene. \n\
3: Three minutes have elapsed.\n\
4: 3 minutes.\n\
5: High confidence."

In [12]:
user_prompts[2] = "Read the following passage of fiction. Then do five things. \
1: Briefly summarize the passage.\n \
2: Reason step by step to decide how much time is described in the passage. \
If the passage doesn't include any explicit reference to time, you can guess how much time the events described would have taken. \
Even description can imply the passage of time by mentioning the earlier history of people or buildings. \
But characters' references to the past or future in spoken dialogue should not count as time that passed in the scene. \
Report the time using units of years, weeks, days, hours, or minutes. Do not say zero or N/A.\n \
3: If you described a range of possible times in step 2 take the midpoint of the range. \
Then multiply to convert the units into minutes. \n\
4: Report only the number of minutes elapsed, which should match the number in step 3. Do not reply N/A. \n\
5: Describe your certainty about the estimate—either high, moderate, or low.\n \
The passage follows:\n \
Miss WESTBURY, TO Lady BELL SYDNEY. I Hoped to have left town before now, my brother was so much better, \
that the doctors thought he might be removed without danger, and wished it, as change of air\
would have been of service to him but his fever is returned with violence Heaven knows what may be \
the consequence of this relapse. What a miserable time have I had of it my dear Lady Bell and what but \
a continuation of misery have I to expect, should he not recover? alas! 'tis not the loss of him alone I \
shall have to deplore my amiable Rochley too must be the object of my sorrow, for in that wretched case he \
never can be mine but let me drop the melancholy subject, it sinks my spirits why should I also sink yours. \
You may possibly recollect my telling you, how violently Lord Templeton was struck with the charms of the \
elegant creature we saw some time ago at Mrs. Bellmours I was convinced her charms had made a very deep \
impression I however thought no more of it, till calling one day last week to enquire for my brother, I saw \
him and by way of a little chat, asked, if he had been so fortunate as to get another sight of the lovely \
Miss Beverley? I have not, yet I confess to you Miss Westbury, but I would give half my estate, could I obtain \
that happiness are you serious my lord?"

In [13]:
assistant_prompts[2] = "1: This appears to be an epistolary narrative. The letter-writer explains that she would have left town except \
for her brother's illness. She then recalls telling her correspondent some time ago about the beauty of a woman, \
and recalls an event last week.\n \
2: There is an explicit reference to an event a week earlier. Some of the other references, for instance to a brother's\
illness, might stretch over several weeks. We can compromise and say two weeks.\n \
3: 2 weeks have elapsed. At seven days a week that's 14 days. 24 hours in a day, so 336 hours. 60 minutes in an hour, so 20160 minutes.\n \
4: 20160 minutes.\n \
5: Moderate confidence."

In [14]:
user_prompts[3] = "Read the following passage of fiction. Then do five things. \
1: Briefly summarize the passage.\n \
2: Reason step by step to decide how much time is described in the passage. \
If the passage doesn't include any explicit reference to time, you can guess how much time the events described would have taken. \
Even description can imply the passage of time by mentioning the earlier history of people or buildings. \
But characters' references to the past or future in spoken dialogue should not count as time that passed in the scene. \
Report the time using units of years, weeks, days, hours, or minutes. Do not say zero or N/A.\n \
3: If you described a range of possible times in step 2 take the midpoint of the range. \
Then multiply to convert the units into minutes. \n\
4: Report only the number of minutes elapsed, which should match the number in step 3. Do not reply N/A. \n\
5: Given the amount of speculation required in step 2, describe your certainty about the estimate--either high, moderate, or low.\n \
The passage follows:\n \
said he, 'were once the seat of luxury and vice. They exhibited a singular\n instance of the retribution of Heaven, \
and were from that period forsaken, and abandoned to decay. His words excited my curiosity, and I enquired further \
concerning their meaning. 'A solemn history belongs to this castle,' said he, 'which is too long and\n intricate \
for me to relate. It is, however, contained in a manuscript in our library, of which, I could, perhaps, procure \
you a sight. A brother of our order, a descendant of the noble house of Mazzini, collected and recorded the most \
striking incidents relating to his family, and the history thus formed, he left as a legacy to our convent. \
If you please, we will walk thither.' I accompanied him to the convent, and the friar introduced me to his superior, \
a man of an intelligent mind and\n benevolent heart, with whom I passed some hours in interesting conversation. \
I believe my sentiments pleased him; for by his indulgence, I was permitted\n to take abstracts of the history before \
me, which, with some further particulars obtained in conversation with the abate, I have arranged in the following pages. \
CHAPTER I. TOWARDS the close of the sixteenth century, this castle was in the possession of Ferdinand, fifth marquis of Mazzini, \
and was for some years the principal residence of his family."

In [15]:
assistant_prompts[3] = "1: This passage moves from a frame narrative set (judging by diction) at the end of the eighteenth century, \
to a story set at the end of the sixteenth century. \n\
2: Although the events in the frame narrative might only take a few minutes, the distance between the two narratives is 200 years. \n\
3: The passage as a whole spans 200 years. At 365 days a year that's 73000 days x 24 hours in a day, so 1752000 hours. 60 minutes in an hour, so 105120000 minutes!\n\
4: 105120000 minutes. \n\
5: Moderate confidence."

In [16]:
user_prompts[2]

"Read the following passage of fiction. Then do five things. 1: Briefly summarize the passage.\n 2: Reason step by step to decide how much time is described in the passage. If the passage doesn't include any explicit reference to time, you can guess how much time the events described would have taken. Even description can imply the passage of time by mentioning the earlier history of people or buildings. But characters' references to the past or future in spoken dialogue should not count as time that passed in the scene. Report the time using units of years, weeks, days, hours, or minutes. Do not say zero or N/A.\n 3: If you described a range of possible times in step 2 take the midpoint of the range. Then multiply to convert the units into minutes. \n4: Report only the number of minutes elapsed, which should match the number in step 3. Do not reply N/A. \n5: Describe your certainty about the estimate—either high, moderate, or low.\n The passage follows:\n Miss WESTBURY, TO Lady BELL S

### Code that accesses the API, sends prompts, and parses results

In [17]:
def parse_response(astring):
    astring = astring.replace('1:', '')
    astring = astring.replace('2:', '<**>')
    astring = astring.replace('3:', '<**>')
    astring = astring.replace('4:', '<**>')
    astring = astring.replace('5:', '<**>')
    if not '<**>' in astring:
        astring = astring.replace('1.', '')
        astring = astring.replace('2.', '<**>')
        astring = astring.replace('3.', '<**>')
        astring = astring.replace('4.', '<**>')
        astring = astring.replace('5.', '<**>')
    parts = [x.strip() for x in astring.split('<**>')]
    return parts

In [19]:
prefix = "Read the following passage of fiction. Then do five things. \
1: Briefly summarize the passage.\n \
2: Reason step by step to decide how much time is described in the passage. \
If the passage doesn't include any explicit reference to time, you can guess how much time the events described would have taken. \
Even description can imply the passage of time by mentioning the earlier history of people or buildings. \
But characters' references to the past or future in spoken dialogue should not count as time that passed in the scene. \
Report the time using units of years, weeks, days, hours, or minutes. Do not say zero or N/A.\n \
3: If you described a range of possible times in step 2 take the midpoint of the range. \
Then multiply to convert the units into minutes. \n\
4: Report only the number of minutes elapsed, which should match the number in step 3. Do not reply N/A. \n\
5: Given the amount of speculation required in step 2, describe your certainty about the estimate--either high, moderate, or low.\n \
The passage follows: \n"

results = []
ctr = 0

for textpair, timepair in zip(texts[ctr:], times[ctr:]):
    neworder = [0,1,2,3]
    random.shuffle(neworder)
    ctr += 1
    
    segidA, text = textpair
    segidB, time = timepair
    assert segidA == segidB
    
    prompt = [{"role": "system", "content": "You are analyzing fiction to estimate how much fictive time elapses in a given passage. You are willing to speculate if uncertain."},
          {"role": "user", "content": user_prompts[neworder[0]]}, 
            {"role": "assistant", "content": assistant_prompts[neworder[0]]},
            {"role": "user", "content": user_prompts[neworder[1]]},
            {"role": "assistant", "content": assistant_prompts[neworder[1]]},
            {"role": "user", "content": user_prompts[neworder[2]]},
            {"role": "assistant", "content": assistant_prompts[neworder[2]]},
            {"role": "user", "content": user_prompts[neworder[3]]},
            {"role": "assistant", "content": assistant_prompts[neworder[3]]}]
    
    text = text.replace('\n', ' ').replace('\u2019', "'").replace('\t', ' ').replace('  ', ' ')
    fulltext = prefix + text
    userline = {"role": "user", "content": fulltext}
    
    p = list(prompt)
    p.append(userline)
    try:
        completion = completions_with_backoff(
            model = "gpt-4",
            messages = p,
            max_tokens = 700,
            temperature = 0
        )
    except openai.error.InvalidRequestError:
        print('ERROR in API')
        print(fulltext)
        results.append((ctr, segidA, text, time, 0, neworder, 'API ERROR', ""))
        continue
        
    parts = parse_response(completion['choices'][0]['message']['content'])
    print(ctr, time, parts[-2], parts[-1])
    # print(completion['choices'][0]['message']['content'])
    # print('')
    
    try:
        if ' ' in parts[-2]:
            timewords = parts[-2].split(' ')
            if len(timewords) > 2 and not timewords[1].startswith('minutes'):
                print('ERROR in minutes')
                results.append((ctr, segidA, text, time, 0, neworder, "CALC ERROR", completion['choices'][0]['message']['content']))
                continue
                
            minutepart = timewords[0].replace(',', '')
            if '-' in minutepart:
                rangeA = minutepart.split('-')[0]
                rangeB = minutepart.split('-')[1]
                result_time = (int(rangeA) + int(rangeB)) / 2
                print('HYPHEN adjusted')
            else:
                try:
                    result_time = int(minutepart)
                except:
                    result_time = float(minutepart)
                    
            units = timewords[1].strip()
            if units.startswith('second'):
                result_time = result_time / 60
                print('SECONDS adjusted')
            elif units.startswith('year'):
                result_time = result_time * 525600
                print('YEARS adjusted')
                
        else:
            result_time = int(parts[-2])
            
    except:
        print('ERROR in minutes')
        results.append((ctr, segidA, text, time, 0, neworder, "CALC ERROR", completion['choices'][0]['message']['content']))
        continue
        
    results.append((ctr, segidA, text, time, result_time, neworder, "VALID", completion['choices'][0]['message']['content']))
    if ctr % 10 == 5:
        a = []
        b = []
        for res in results:
            if 'ERROR' not in res[6]:
                a.append(math.log(res[3] + 0.1))
                b.append(math.log(res[4] + 0.1))
        print('PEARSON:', pearsonr(a, b))
        print()
        
    

1 52560000.0 45 minutes. Moderate confidence.
2 5256000.0 180 minutes. Moderate confidence.
3 120.0 20 minutes. Moderate confidence.
4 10.0 10 minutes. Moderate confidence.
5 7.0 4 minutes. Moderate confidence.
PEARSON: (0.8555612044651562, 0.06444944796774625)

6 20160.0 90 minutes. Moderate confidence.
7 360.0 75 minutes. Moderate confidence, as the time spent eating and resting at the village is not explicitly stated.
8 10.0 7.5 minutes. Moderate confidence.
9 222.0 65 minutes. Moderate confidence.
10 9986400.0 9,198,000 minutes. Low confidence, due to the uncertainty of the narrator's age and the speculative nature of the estimate.
11 5.0 3.5 minutes. Moderate confidence.
12 2.0 3.5 minutes. Moderate confidence.
13 60.0 45 minutes. Moderate confidence.
14 10.0 3.5 minutes. Moderate confidence.
15 40320.0 4320 minutes. Moderate confidence.
PEARSON: (0.69078099091017, 0.004353244154731867)

16 13140000.0 3,942,000 minutes. Low confidence, due to the lack of specific time references a

KeyboardInterrupt: 

In [77]:
text

'   '

In [24]:
a = []
b = []
for res in results:
    if 'ERROR' not in res[6]:
        a.append(math.log(res[3] + 0.1))
        b.append(math.log(res[4] + 0.1))

In [25]:
pearsonr(a, b)

(0.6469939605384247, 3.5947094617018895e-16)

In [23]:
a = []
b = []
for res in results:
    if 'ERROR' not in res[6]:
        a.append(res[3] + 0.1)
        b.append(res[4] + 0.1)
pearsonr(a ,b)

(0.068187140974503, 0.4499031199474439)

In [20]:
with open('outputGPT4.tsv', encoding = 'utf-8', mode = 'w') as f:
    for res in results:
        outputline = '\t'.join([str(x).replace('\n', ' ').replace('"', "'") for x in res]) + '\n'
        f.write(outputline)

### Final calculation after removing training examples, etc from the output

In [26]:
import pandas as pd

In [31]:
corrected_data = pd.read_csv('outputGPT4.tsv', sep = '\t', header = None)

In [32]:
corrected_data.head()

Unnamed: 0,0,1,2,3,4,5,6,7
0,1,RadcASR.txt|0|u,A SICILIAN ROMANCE. ON the northern shore of...,52560000.0,45.0,"[2, 0, 3, 1]",VALID,1: The narrator describes visiting the ruins o...
1,3,RadcASR.txt|9|u,"They stopped to observe it, when it suddenly d...",120.0,20.0,"[3, 1, 2, 0]",VALID,1: Madame de Menon and Vincent observe a stran...
2,4,RadcASR.txt|90|u,"said he, 'the keys are ours, and we have not a...",10.0,10.0,"[3, 0, 1, 2]",VALID,1: The passage describes a group of people try...
3,5,RadcASR.txt|107|u,"said the marquis, 'what think ye? What evil sp...",7.0,4.0,"[3, 0, 1, 2]",VALID,1: The passage describes a marquis leading a g...
4,6,RadcASR.txt|109|u,She was ordered to confine herself to her apar...,20160.0,90.0,"[3, 1, 2, 0]",VALID,"1: A woman is confined to her apartment, worri..."


In [34]:
a = corrected_data.loc[corrected_data[6] == 'VALID', 3]

In [37]:
human_times = [math.log(x + 0.1) for x in a]

In [38]:
b = corrected_data.loc[corrected_data[6] == 'VALID', 4]
gpt_times = [math.log(x + 0.1) for x in b]

In [39]:
pearsonr(human_times, gpt_times)

(0.6819552265972909, 7.235561756217241e-18)

In [40]:
len(a)

121