### Create an assistant

In [9]:
from openai import OpenAI
import os


assistant_name = 'sem-tab'
assistant_instruction = '....'
path = 'path'
vector_store_name = 'sem-tab-input'
key = os.environ.get("OPENAI_API_KEY")
temp = 0.2

client = OpenAI(api_key=key)
assistant = client.beta.assistants.create(
    name = assistant_name,
    instructions=assistant_instruction,
    tools=[{"type": "file_search"}],
    model="gpt-4-turbo",
    temperature=temp,
)

### Upload files and add them to a Vector Store

In [10]:
def get_file_paths(folder_path):
    file_paths = []
    # Iterate through all files in the folder
    for root, dirs, files in os.walk(folder_path):
        for file_name in files:
            # Get the absolute path of the file
            file_path = os.path.join(root, file_name)
            # Append the file path to the list
            file_paths.append(file_path)
    return file_paths

# create a Vector Store
vector_store = client.beta.vector_stores.create(name=vector_store_name)

folder = path
file_paths = get_file_paths(folder)
file_streams = [open(path, 'rb') for path in file_paths]
file_batch = client.beta.vector_stores.file_batches.upload_and_poll(
    vector_store_id=vector_store.id,
    files=file_streams
)

print(file_batch.status)
print(file_batch.file_counts)

completed
FileCounts(cancelled=0, completed=15, failed=0, in_progress=0, total=15)


### Update the assistant to use the new Vector Store

In [11]:
assistant = client.beta.assistants.update(
    assistant_id=assistant.id,
    tools=[{"type": "file_search"}],
    tool_resources={"file_search": {"vector_store_ids": [vector_store.id]}},
)

### Create a thread

In [12]:
thread = client.beta.threads.create()

### Add a message to the thread


#### define get responce function

In [13]:
def get_response(query,client,assistant,thread):
    # add message to the thread
    message = client.beta.threads.messages.create(
        thread_id=thread.id,
        role="user",
        content=query
    )

    # create a run
    run = client.beta.threads.runs.create_and_poll(
        thread_id=thread.id,
        assistant_id=assistant.id
    )

    # get messages
    messages = list(client.beta.threads.messages.list(thread_id=thread.id, run_id=run.id))
    message_content = messages[0].content[0].text
    # print(f'message_content: {message_content}')
    annotations = message_content.annotations
    citations = []
    for index, annotation in enumerate(annotations):
        message_content.value = message_content.value.replace(annotation.text, f"[{index}]")
        if file_citation := getattr(annotation, "file_citation", None):
            cited_file = client.files.retrieve(file_citation.file_id)
            citations.append(f"[{index}] {cited_file.filename}")

    # print(message_content.value)
    # print("\n".join(citations))
    return message_content.value,citations

#### query 1

In [14]:
query = "what is requirments engineering"
response = get_response(query,client,assistant,thread)
print("************")
print(f"query: {query}")
print(f'response: {response[0]}')
print(f"reference: {response[1]}")

************
query: what is requirments engineering
response: Requirements Engineering (RE) is a discipline within software engineering and systems engineering that focuses on determining the needs or conditions to meet for a new or altered product, taking account of the possibly conflicting requirements of the various stakeholders, such as beneficiaries or users. RE involves various activities such as requirements elicitation, requirements analysis, requirements specification, requirements validation, and requirements management. The goal is to produce a comprehensive and detailed set of requirements for the system that can serve as a basis for subsequent stages of product development, ensuring that the final product meets the needs of its users and stakeholders.
reference: []
