In [1]:
%autosave 15 

Autosaving every 15 seconds


In [None]:
import pandas as pd
import openai
import time
import re

# Configuration
openai.api_key = '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'
input_file = "remainder_to_score.csv"
output_file = "remainder_scored.csv"
temperature_setting = 0.1
max_tokens_setting = 1000

# Read the csv sheet into a pandas DataFrame
df = pd.read_csv(input_file)

# Create a list to store the parsed descriptions
parsed_descriptions = []

# Your instruction
instruction = """
Your goal is to parse, categorize, and score the valence of the following description.
First, parse the description into discrete meaningful chunks. A chunk is defined as a unique occurrence, observation, or thought, typically expressed as a grammatical unit such as a verb phrase, clause, adjective, proper noun, or separated by a conjunction such as for, and, nor, but, or, yet, so.
When parsing, if a sentence or phrase appears to belong to multiple categories, split it into the respective distinct chunks. For example, if "Today I went to the endocrinologist." can be categorized as both "Event" and "Time", split it into "Today" as "Time" and "I went to the endocrinologist" as "Event".
Then, categorize each chunk of the description as Event, Time, Place, Perceptual, Thought/Emotion, Semantic, Extraneous, or Reflection. Each chunk can be assigned to only one category:
Event: Happenings, individuals present, actions or reactions of self and others, topics of conversation.
Time: Year, season, month, day of week, time of day, statements about relative timing within an event.
Place: Localization of an event including the city, street, building, room, part of room.
Perceptual: Auditory, olfactory, tactile, taste, visual and visual details, body position, duration, weather.
Thought/Emotion: Emotional state, thoughts, implications of self and others.
Semantic: General knowledge or facts and pieces of information that are not directly related to the main experience being described.
Extraneous: Tangential events to the main memory recalled.
Reflection: Meta-cognitive statements, editorializing, speculation.

After categorizing, score the valence of each chunk as (positive), (negative), or (neutral). Valence captures the emotional tone of the chunk, indicating whether it is positive, negative, or neutral in nature. Items are only positive or negative in nature if they have desriptive adjectives in them.
Please format your response in the following manner:
For chunks:
-"Chunk of text"
-"Another chunk of text"
...
For categories:
-Category
-Another Category
...
For valence:
-valence
-valence
"""

def process_description(description):
    print(f"Processing: '{description}'")
    
    messages_payload = [
        {
            "role": "user",
            "content": instruction
        },
        {
            "role": "user",
            "content": description
        }
    ]
    
    try:
        # Make the API call
        response = openai.ChatCompletion.create(
            model="gpt-4-0613",
            messages=messages_payload,
            temperature=temperature_setting,
            max_tokens=max_tokens_setting
        )

        # Extract and return the output from the response
        output = response['choices'][0]['message']['content'].strip()
        print(f"Processed successfully. Output: '{output}'")
        return output

    except Exception as e:
        error_str = str(e)
        if 'Rate limit reached' in error_str:
            print("Rate limit reached. Retrying after a brief pause.")
            sleep_time = 10  # Default sleep time of 10 seconds
            # Extract retry time if provided in the error message
            match = re.search(r"Please try again in (\d+)ms", error_str)
            if match:
                sleep_time = int(match.group(1)) / 1000.0  # Convert to seconds
            time.sleep(sleep_time)
            return process_description(description)  # Retry after sleeping
        else:
            print(f"Error processing description: {description}. Error: {e}")
            return None

# Batch processing
for index, description in enumerate(df['Descriptions'], start=1):
    print(f"\nProcessing description {index} out of {len(df['Descriptions'])}")
    result = process_description(description)
    
    # If there's a result, format and store it; otherwise, store an error message
    if result:
        parsed_descriptions.append(result)
    else:
        parsed_descriptions.append("Error in processing this description")

# Add the parsed descriptions back to the DataFrame
df['Parsed Descriptions'] = parsed_descriptions

# Save the DataFrame back to an Excel sheet
df.to_csv(output_file, index=False)

print(f"\nProcessing complete! Results saved to '{output_file}'.")
