In [1]:
from openai import OpenAI
client = OpenAI()

assistant = client.beta.assistants.create(
  name="Editor SSCI Journal",
  instructions="""You are an academic editor with 30 years of experience, working for the Journal of Information and Processing. Your task is to help write a scholarly paper. The paper must adhere to high standards of academic writing, structure, and formatting, as expected in reputable peer-reviewed journals. Ensure the content is clear, well-organized, and meets the rigorous demands of an academic audience.
  """,
  tools=[{"type": "file_search"}],
  model="gpt-4o",
)


In [2]:
# Create a vector store caled "Financial Statements"
vector_store = client.beta.vector_stores.create(name="bibtext")
 
# Ready the files for upload to OpenAI
file_paths = ["Abstract_cleaned.json"]
file_streams = [open(path, "rb") for path in file_paths]
 
# Use the upload and poll SDK helper to upload the files, add them to the vector store,
# and poll the status of the file batch for completion.
file_batch = client.beta.vector_stores.file_batches.upload_and_poll(
  vector_store_id=vector_store.id, files=file_streams
)
 
# You can print the status and the file counts of the batch to see the result of this operation.
print(file_batch.status)
print(file_batch.file_counts)

completed
FileCounts(cancelled=0, completed=1, failed=0, in_progress=0, total=1)


In [3]:
assistant = client.beta.assistants.update(
  assistant_id=assistant.id,
  tool_resources={"file_search": {"vector_store_ids": [vector_store.id]}},
)

In [4]:

# Create a thread and attach the file to the message
thread = client.beta.threads.create(
  messages=[
    {
      "role": "user",
      "content": """
        Generative artificial intelligence (GAI) has rapidly evolved in recent years, driven by advances in computational power, particularly through the use of GPUs and neural networks.GAI is now widely applied across various business environments, including sales forecasting by Amazon, fraud detection by Alibaba, recommendation systems by Netflix, and image recognition for autonomous driving by Tesla. Furthermore, GAI can be implemented in numerous business departments, such as customer support through chatbots, fault detection via image recognition even replacing human in those jobs.
                """
    }
  ]
)
 
# The thread now has a vector store with that file in its tool resources.
print(thread.tool_resources.file_search)

None


In [5]:
run = client.beta.threads.runs.create_and_poll(
  thread_id=thread.id,
  assistant_id=assistant.id,
  instructions="""
  Task:
Read and classify all the abstracts in the provided JSON file, identifying the most relevant ones for rewriting the given paragraphs.
Rewrite the paragraphs based on the content of the most relevant abstracts, ensuring they align closely with the original information in the paragraphs.
Citations: When rewriting the paragraphs:
Cite the original sources using their full IDs from the JSON file (e.g., liang_uncovering_2019, gomez-uribe_netflix_2016).
Ensure that the citations retain the original source IDs from the file in the final output and are not replaced by numerical placeholders (e.g., [1], [2]).
Do not introduce hallucinated citations or unverified references. The citations must be directly linked to the sources from the file.
Criteria:
Select the abstracts that most closely match the content in your paragraphs, prioritizing relevance and accuracy.
Ensure that each citation is directly related to a specific point in the paragraph.
Avoid introducing any new or unrelated information from the abstracts.

Example of Desired Output:

Alibaba integrates GAI into its fraud detection systems, reducing financial risks by identifying fraudulent patterns more efficiently[liang_uncovering_2019].In the entertainment industry, platforms like Netflix utilize GAI-powered recommendation systems, enhancing user engagement by personalizing content suggestions based on viewing habits[gomez-uribe_netflix_2016].
"""
)

In [6]:
if run.status == 'completed': 
  messages = client.beta.threads.messages.list(
    thread_id=thread.id
  )
  print(messages)
else:
  print(run.status)

SyncCursorPage[Message](data=[Message(id='msg_jiFcJfLEcAbAubyEn3nKS5bD', assistant_id='asst_nYgvWS9oovt1F6V3aZG00iwJ', attachments=[], completed_at=None, content=[TextContentBlock(text=Text(annotations=[FileCitationAnnotation(end_index=629, file_citation=FileCitation(file_id='file-lAueu0lyGVDbWb4n6rO6PJsz'), start_index=616, text='【4:19†source】', type='file_citation'), FileCitationAnnotation(end_index=846, file_citation=FileCitation(file_id='file-lAueu0lyGVDbWb4n6rO6PJsz'), start_index=833, text='【4:19†source】', type='file_citation'), FileCitationAnnotation(end_index=1034, file_citation=FileCitation(file_id='file-lAueu0lyGVDbWb4n6rO6PJsz'), start_index=1021, text='【4:19†source】', type='file_citation'), FileCitationAnnotation(end_index=1244, file_citation=FileCitation(file_id='file-lAueu0lyGVDbWb4n6rO6PJsz'), start_index=1231, text='【4:19†source】', type='file_citation'), FileCitationAnnotation(end_index=1667, file_citation=FileCitation(file_id='file-lAueu0lyGVDbWb4n6rO6PJsz'), start_ind

In [7]:
for msg in messages.data:
    for content_block in msg.content:
        if hasattr(content_block, 'text'):
            print(content_block.text.value)

Generative artificial intelligence (GAI) has rapidly evolved in recent years, driven by advances in computational power, particularly through the use of GPUs and neural networks. GAI is now widely applied across various business environments, including sales forecasting by Amazon, fraud detection by Alibaba, recommendation systems by Netflix, and image recognition for autonomous driving by Tesla. For instance, Amazon uses time series modeling techniques like Holt-Winters exponential smoothing, neural network autoregression, and ARIMA to predict future sales, which helps in managing operations more efficiently【4:19†source】. Alibaba employs innovative graph learning algorithms to detect and prevent fraudulent claims by analyzing network information, significantly improving precision and coverage compared to previous methods【4:19†source】. Netflix leverages a combination of A/B testing and historical engagement data to enhance its recommendation algorithms, focusing on improving member ret