In [20]:
cd /workspaces/Quick-OpenAI-Insights # this only works on codespaces!

/workspaces/Quick-OpenAI-Insights


In [7]:
from src.preprocess import YTVideoTranscript
import openai

# import and load dotenv - required for loading environment variables
from dotenv import load_dotenv
load_dotenv()

import pandas as pd

In [25]:
def make_prompt(text: str):
    """
    Returns a prompt for the autocomplete api with a pre-defined json structure and the text to summarize.
    Args:
        text (str): the text to summarize
    """
    prompt = """
    What follows is a chunk of text. I want you to return a json string with the following structure. 
    Note that the source text contains no punctuation, and doesn't have well defined boundaries between sentences.
    Json structure:
    {{
        "topic": "a two or three word topic for the text",
        "tags": "a space separated list of tags for the text",
        "sentiment": "a sentiment score for the text, ranging from 0 to 1",
        "urgency": "an urgency score for the text, ranging from 0 to 1, where 0 is not urgent and 1 is very urgent",
        "descriptive_normative": "a descriptiveness and normativitiy score for the text, ranging from 0 to 1, where 0 is descriptive and 1 is normative",
        "questioning": "a questioning score for the text, ranging from 0 to 1, where 0 is not questioning and 1 is very questioning",
    }}

    text:
    \"\"\"
    {}
    \"\"\"
    """.format(text)

    return prompt

In [14]:
# get a rolled up dataframe to work with
textfile = YTVideoTranscript('data/raw/HMRC DALAS Transcript Raw.txt')
textfile.roll_up_df(chunksize=10)
df = textfile.get_data_frame()

In [26]:
new_prompt = make_prompt(df['text'][0])
print(new_prompt)


    What follows is a chunk of text. I want you to return a json string with the following structure. 
    Note that the source text contains no punctuation, and doesn't have well defined boundaries between sentences.
    Json structure:
    {
        "topic": "a two or three word topic for the text",
        "tags": "a space separated list of tags for the text",
        "sentiment": "a sentiment score for the text, ranging from 0 to 1",
        "urgency": "an urgency score for the text, ranging from 0 to 1, where 0 is not urgent and 1 is very urgent",
        "descriptive_normative": "a descriptiveness and normativitiy score for the text, ranging from 0 to 1, where 0 is descriptive and 1 is normative",
        "questioning": "a questioning score for the text, ranging from 0 to 1, where 0 is not questioning and 1 is very questioning",
    }

    text:
    """
    so I think we are fine to get started and the first thing on the agenda is oh the first thing on our agenda is a message fr

In [30]:
# call the autocomplete api using the make_prompt function
response = openai.Completion.create(
    engine="text-davinci-003",
    prompt=new_prompt,
    temperature=0.2, # low temperature means the model will be more conservative
    max_tokens=100,
    top_p=1,
    frequency_penalty=0,
    presence_penalty=0
)

# parse the response json text into a dict using the json library
import json
output_text = response['choices'][0]['text']

# Remove arbitrary indentation from the text
json_text = ""
for line in output_text.split("\n"):
    json_text += line.strip()

# Parse the JSON data using the loads() function
data = json.loads(json_text)

print(data)

{'topic': 'Legacy Migration', 'tags': 'government, CCS, hmrc, collaboration, Legacy estate, Legacy migration, Legacy encapsulation, Legacy remediation', 'sentiment': '0.7', 'urgency': '0.8', 'descriptive_normative': '0.5', 'questioning': '0.2'}
