# OpenAI Assistant with Files 
(RAG : Retrieval Augment Generation)


This notebook demonstrate the ability of AI retrieve the data from the file.

### Setup

In [1]:
from openai import OpenAI
from dotenv import find_dotenv, load_dotenv

In [2]:
load_dotenv()
client = OpenAI()  

### Create the Assistant

In [4]:
# helper function to get completions from OpenAI
def get_completion(prompt, model="gpt-3.5-turbo"):
    messages = [{"role": "user", "content": prompt}]
    response = client.chat.completions.create(
        model=model,
        messages=messages,
        temperature=0,
        max_tokens= 2000
    )
    return response.choices[0].message.content

In [7]:
## Create Assistant
assistant = client.beta.assistants.create(
  name="Paper Review Assistant",
  instructions="You are Academic expert. Use you knowledge base to answer questions about the paper.",
  model="gpt-3.5-turbo",
  tools=[{"type": "file_search"}],
)
assistant_id = assistant.id
print(assistant_id)

asst_y5C0vVmKLQFYOTmbpHEcj4om


In [8]:
# assistant_id = 'asst_y5C0vVmKLQFYOTmbpHEcj4om'

### Create the Vector Store

In [None]:
# # Create a vector store called "Paper Data"
# vector_store = client.beta.vector_stores.create(name="Paper Data")
 
# # Ready the files for upload to OpenAI 
# file_paths = ["/Users/taweesaksamanchuen/Dev/ApiAssistant/book/eyeThesis.pdf", "/Users/taweesaksamanchuen/Dev/ApiAssistant/book/fonThesis.pdf"]
# file_streams = [open(path, "rb") for path in file_paths]
 
# # Use the upload and poll SDK helper to upload the files, add them to the vector store,
# # and poll the status of the file batch for completion.
# file_batch = client.beta.vector_stores.file_batches.upload_and_poll(
#   vector_store_id=vector_store.id, files=file_streams
# )
 
# # You can print the status and the file counts of the batch to see the result of this operation. 
# print(file_batch.status)
# print(file_batch.file_counts)

In [9]:
vector_store_id = 'vs_xjYz5Ah1kDcSeWqip2duIxSw'

In [10]:
# Attach the vector store to the assistant
assistant = client.beta.assistants.update(
  assistant_id=assistant_id,
  tool_resources={"file_search": {"vector_store_ids": [vector_store_id]}},
)


In [15]:
# thesisName = "eyeThesis"
thesisName = "fonThesis"

### Create Thread

In [11]:
# Upload the user provided file to OpenAI
# message_file = client.files.create(
#   file=open("edgar/aapl-10k.pdf", "rb"), purpose="assistants"
# )
 
# Create a thread and attach the file to the message
thread = client.beta.threads.create(
  messages=[
    {
      "role": "user",
      "content": "Study the PDF file and summarize the key points.",
      # Attach the new file to the message.
    #   "attachments": [
    #     { "file_id": message_file.id, "tools": [{"type": "file_search"}] }
    #  ],
    }
  ]
)
 
# The thread now has a vector store with that file in its tool resources.
print(thread.tool_resources.file_search)
thread_id = thread.id
print(thread_id)

None
thread_WQMEiDboYVbMSEqScSX7sjMy


In [None]:
# #current thread id 
# thread_id = 'thread_pUGOPAakwhHWwgKwF9wztzl8'
# #delete thread
# client.beta.threads.delete(thread_id)


### helper function

In [12]:
from typing_extensions import override
from openai import AssistantEventHandler
 
class EventHandler(AssistantEventHandler):
    @override
    def on_text_created(self, text) -> None:
        print(f"\nassistant > ", end="", flush=True)

    @override
    def on_tool_call_created(self, tool_call):
        print(f"\nassistant > {tool_call.type}\n", flush=True)

    @override
    def on_message_done(self, message) -> None:
        # print a citation to the file searched
        message_content = message.content[0].text
        annotations = message_content.annotations
        citations = []
        for index, annotation in enumerate(annotations):
            message_content.value = message_content.value.replace(
                annotation.text, f"[{index}]"
            )
            if file_citation := getattr(annotation, "file_citation", None):
                cited_file = client.files.retrieve(file_citation.file_id)
                citations.append(f"[{index}] {cited_file.filename}")

        print(message_content.value)
        print("\n".join(citations))




In [13]:
def ask_assistant(instuction_message, user_message):
    message = client.beta.threads.messages.create(
    thread_id=thread_id,
    role="user",
    content= user_message,
    )
    with client.beta.threads.runs.stream(
        thread_id = thread_id,
        assistant_id = assistant_id,
        instructions= instuction_message,
        event_handler=EventHandler(),
    ) as stream:
        stream.until_done()
        return stream.get_final_messages()

### Let talk with Assistant

In [14]:
instuction_message = "Act as Academic Writer to create a paper from the given topic."
user_message = f"""from {thesisName},  what is the key point of the PDF file?"""
ask_assistant(instuction_message, user_message)


assistant > file_search


assistant > The key points of the thematic paper "Retail Business Chatbot Implementation with Dialogflow and IBM Watson" include:

1. **Evaluation of Chatbot Platforms**:
   - The study evaluates chatbot platforms based on confidence values, intent classification, accuracy, and user perceptions.
   - Aesthetic and minimalist design in chatbots are suggested to be given less importance compared to intent and context accuracy, which are deemed crucial factors for evaluation.
   - Emphasizes the significance of core functionality and accuracy while highlighting the need for user-friendly interfaces that prioritize seamless user experiences. 

2. **Implications**:
   - The findings underscore the importance of focusing on intent and context accuracy in designing chatbot interfaces, as these aspects play a significant role in enhancing flow accuracy and user experience.
   - The research provides practical implications for chatbot design and evaluation, guiding co

[Message(id='msg_oLSRFucXluH1VwT7xqrUoxpP', assistant_id='asst_y5C0vVmKLQFYOTmbpHEcj4om', attachments=[], completed_at=1714777756, content=[TextContentBlock(text=Text(annotations=[FileCitationAnnotation(end_index=2261, file_citation=FileCitation(file_id='file-ojMBVCyoJCwhQv4yY6cFBtCP', quote=None), start_index=2249, text='【5:1†source】', type='file_citation'), FileCitationAnnotation(end_index=2273, file_citation=FileCitation(file_id='file-ojMBVCyoJCwhQv4yY6cFBtCP', quote=None), start_index=2261, text='【5:2†source】', type='file_citation'), FileCitationAnnotation(end_index=2285, file_citation=FileCitation(file_id='file-ojMBVCyoJCwhQv4yY6cFBtCP', quote=None), start_index=2273, text='【5:3†source】', type='file_citation')], value='The key points of the thematic paper "Retail Business Chatbot Implementation with Dialogflow and IBM Watson" include:\n\n1. **Evaluation of Chatbot Platforms**:\n   - The study evaluates chatbot platforms based on confidence values, intent classification, accuracy, 