# Extract Key Information

In [None]:
%load_ext autoreload
%autoreload 2

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

## Set up Azure OpenAI

In [None]:
import os
import openai
from dotenv import load_dotenv

# Set up Azure OpenAI
load_dotenv()
openai.api_type = "azure"
openai.api_base = "https://tutorial-openai-01-2023.openai.azure.com/"
openai.api_version = "2022-12-01"
openai.api_key = os.getenv("OPENAI_API_KEY")

## Load Data

In [None]:
import pandas as pd

df_orig = pd.read_csv("../data/bbc-news-data.csv", delimiter='\t', index_col=False)

In [None]:
df = df_orig.copy()
df

## Request to API

In [None]:
# create prompt
prompt_prefix = """ 
  Extract key information from this text
"""

prompt = prompt_prefix + df['title'].loc[0] + "\n" + df['content'].loc[0]
prompt

In [None]:
# Request API
response = openai.Completion.create(
  deployment_id="text-davinci-003", # has to be deployment_id
  prompt=prompt,
  temperature=1,
  max_tokens=100,
  top_p=1.0,
  frequency_penalty=0.0,
  presence_penalty=0
)

# print response
response['choices'][0]['text']

----------------

In [None]:
colname = 'key_info'
results = pd.DataFrame(columns=[colname], index=df.index)

prompt_prefix = """ 
  Extract key information from this text
"""

for idx, title, content in zip(df.index.values, df['title'].loc[df.index.values], df['content'].loc[df.index.values]):
  
  # build prompt
  prompt = prompt_prefix + title + "\n" + content

  try:
    # Request API
    response = openai.Completion.create(
      deployment_id="text-davinci-003", # has to be deployment_id
      prompt=prompt,
      temperature=1,
      max_tokens=100,
      top_p=1.0,
      frequency_penalty=0.0,
      presence_penalty=1
    )

      # response
    results[colname].loc[idx] = response['choices'][0]['text']
  except Exception as err:
    idx
    print(f"Unexpected {err=}, {type(err)=}")

In [None]:
results

In [None]:
df_results = pd.concat([df, results], axis=1)
df_results.shape
df_results

## Save results

In [None]:
fname = '../output/key_info.csv'
df_results.to_csv(fname, sep='\t')