# Summarise Documents

## Set up Azure OpenAI

In [1]:
# import openai
# from openai import AzureOpenAI
# import os 
# from azure.identity import ManagedIdentityCredential

# default_credential=ManagedIdentityCredential(client_id="XXX")
# token=default_credential.get_token("https://cognitiveservices.azure.com/.default")
# Resource_endpoint="XXX"

# client = AzureOpenAI(
#   azure_endpoint = Resource_endpoint, 
#   api_key=token.token,  
#   api_version="2023-05-15"
# )

In [1]:
import os
import openai
from openai import AzureOpenAI
from dotenv import load_dotenv

# Set up Azure OpenAI
load_dotenv("credentials.env")

openai.api_type = "azure"
    
client = AzureOpenAI(
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),  
    api_version="2025-01-01-preview", #latest GA API version: https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation
    azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
    )

## Load Data

In [2]:
import pandas as pd

df_orig = pd.read_csv("data/bbc-news-data.csv", delimiter='\t', index_col=False)

In [None]:
df = df_orig.copy()
df

## Create prompt

In [None]:
prompt_postfix = """ 
  \n\nTl;dr
"""

prompt = df['title'].loc[0] + "\n" + df['content'].loc[0] + prompt_postfix
prompt

## Requst to API

In [None]:
response = client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role": "user", "content": prompt}],
    temperature=0,
    max_tokens=50
)

print(response.choices[0].message.content.strip())


### Putting the codes together

In [7]:
results = pd.DataFrame(columns=['summary'], index=df.index)
#prompt postfix
prompt_postfix = "\n\nTl;dr"

for idx, title, content in zip(range(10), df['title'].loc[df.index.values], df['content'].loc[df.index.values]):
    # build prompt
    prompt = title + "\n" + content + prompt_postfix

    try:
        #Request API
        response = client.chat.completions.create(
            model="gpt-4o",
            messages=[{"role": "user", "content": prompt}],
            temperature=0,
            max_tokens=50
        )
        results.at[idx, 'summary'] = response.choices[0].message.content.strip()
    except Exception as err:
        print(f"Unexpected error at index {idx}: {err} ({type(err)})")


### Results

In [None]:
results.head(10)

### Adding results to dataframe

In [None]:
df_results = pd.concat([df.head(10), results.head(10)], axis=1)
df_results.shape
df_results

## Save results

In [10]:
fname = 'output/summaries.csv'
df_results.to_csv(fname, sep='\t')