In [1]:
import sys

sys.path.append("../vector_db")

In [2]:
from qdrant.lib.llm import LLM, test
import tiktoken
import openai

sys.path.append("../code")
from util import extract_pdf_to_txt, extract_doi_to_txt, get_string_from_text_file

In [3]:
def create_prompt(
    input_file: str,
    prompt_file: str = "../llm_prompt.txt",
    num_chunks: int = -1,
    model_token_limit: int = 16385,
):
    model = "gpt-3.5-turbo"
    # Initialize the tokenizer
    tokenizer = tiktoken.encoding_for_model(model)

    input = get_string_from_text_file(input_file)
    # prompt = get_string_from_text_file(prompt_file) + input + "```"
    prompt = get_string_from_text_file(prompt_file)
    # Encode the text_data into token integers
    token_integers = tokenizer.encode(input)

    # Split the token integers into chunks based on max_tokens
    # chunk_size = max_tokens - len(token_integers)
    if num_chunks != -1:
        chunk_size = len(token_integers) // num_chunks
    else:
        chunk_size = model_token_limit - 50
    chunks = [
        token_integers[i : i + chunk_size]
        for i in range(0, len(token_integers), chunk_size)
    ]
    print("chunk size:", chunk_size)

    # Decode token chunks back to strings
    chunks = [tokenizer.decode(chunk) for chunk in chunks]
    print(f"{len(chunks)} chunks processing...")

    responses = []
    messages = [
        {"role": "user", "content": prompt},
        {
            "role": "user",
            "content": "To provide the context for the above prompt, I will send you text in parts. When I am finished, I will tell you 'ALL PARTS SENT'. Do NOT answer until you have received all the parts. You will be penalized otherwise.",
        },
    ]

    for i, chunk in enumerate(chunks):
        messages.append({"role": "user", "content": chunk})

        # Check if total tokens exceed the model's limit and remove oldest chunks if necessary
        while (
            sum(len(tokenizer.encode(msg["content"])) for msg in messages)
            > model_token_limit
        ):
            messages.pop(1)  # Remove the oldest chunk

        # client = LLM("../llm_prompt.txt", base="azure", use_model="gpt3")
        # client.prompt = "\n".join([message["content"] for message in messages])
        # response = client.get_response()
        # responses.append(response)
        response = openai.chat.completions.create(
            model="gpt-3.5-turbo", messages=messages
        )
        # chatgpt_response = response.choices[0].message["content"].strip()
        chatgpt_response = response.choices[0].message.content
        responses.append(chatgpt_response)
        print(f"{i}th chunk processed {responses}")

    # Add the final "ALL PARTS SENT" message
    messages.append({"role": "user", "content": "```\nALL PARTS SENT"})
    # client = LLM("../llm_prompt.txt", base="azure", use_model="gpt3")
    # client.prompt = "\n".join([message["content"] for message in messages])
    # response = client.get_response()
    # responses.append(response)
    response = openai.chat.completions.create(model="gpt-3.5-turbo", messages=messages)
    # final_response = response.choices[0].message["content"].strip()
    final_response = response.choices[0].message.content
    responses.append(final_response)
    return responses

In [4]:
create_prompt(
    input_file="../text/2020 ESC Guidelines for the diagnosis and management of atrial fibrillation developed in collaboration with the European Association for Cardio-Thoracic Surgery (EACTS).txt",
)

reading data from ../text/2020 ESC Guidelines for the diagnosis and management of atrial fibrillation developed in collaboration with the European Association for Cardio-Thoracic Surgery (EACTS).txt...
string successfully retrieved.

reading data from ../llm_prompt.txt...
string successfully retrieved.

chunk size: 16335
15 chunks processing...


KeyboardInterrupt: 

In [13]:
input = get_string_from_text_file(
    "../text/3D Convolutional Neural Networks for Human Action Recognition.txt"
)
prompt = get_string_from_text_file("../llm_prompt.txt")
with open("../llm_prompt_temp.txt", "w") as file:
    file.write(prompt + input + "\n```")
client = LLM("../llm_prompt_temp.txt", base="azure", use_model="gpt4")
client.get_response(max_tokens=3000)

reading data from ../text/3D Convolutional Neural Networks for Human Action Recognition.txt...
string successfully retrieved.

reading data from ../llm_prompt.txt...
string successfully retrieved.

{
  "filename": "3D Convolutional Neural Networks for Human Action Recognition",
  "extracted-section-list": ["Abstract", "Introduction", "3D Convolutional Neural Networks", "Related Work", "Experiments", "Conclusions and Discussions"],
  "target-section-extraction-result": {
    "introduction": {
      "extracted-text": "Recognizing human actions in real-world environment finds applications in a variety of domains including intelligent video surveillance, customer attributes, and shopping behavior analysis. However, accurate recognition of actions is a highly challenging task due to cluttered backgrounds, occlusions, and viewpoint variations, etc. Therefore, most of the existing approaches make certain assumptions about the circumstances under which the video was taken. However, such assump

'{\n  "filename": "3D Convolutional Neural Networks for Human Action Recognition",\n  "extracted-section-list": ["Abstract", "Introduction", "3D Convolutional Neural Networks", "Related Work", "Experiments", "Conclusions and Discussions"],\n  "target-section-extraction-result": {\n    "introduction": {\n      "extracted-text": "Recognizing human actions in real-world environment finds applications in a variety of domains including intelligent video surveillance, customer attributes, and shopping behavior analysis. However, accurate recognition of actions is a highly challenging task due to cluttered backgrounds, occlusions, and viewpoint variations, etc. Therefore, most of the existing approaches make certain assumptions about the circumstances under which the video was taken. However, such assumptions seldom hold in real-world environment. In addition, most of these approaches follow the conventional paradigm of pattern recognition, which consists of two steps in which the first step 

In [15]:
tokenizer = tiktoken.encoding_for_model("gpt-3.5-turbo")

prompt = get_string_from_text_file("../llm_prompt.txt") + "input" + "```"
# Encode the text_data into token integers
token_integers = tokenizer.encode(prompt)

# Split the token integers into chunks based on max_tokens
chunk_size = 2500 - len(tokenizer.encode(prompt))
chunks = [
    token_integers[i : i + chunk_size]
    for i in range(0, len(token_integers), chunk_size)
]

# Decode token chunks back to strings
chunks = [tokenizer.decode(chunk) for chunk in chunks]

reading data from ../llm_prompt.txt...
string successfully retrieved.



In [17]:
len(chunks)

1

In [5]:
llm = LLM(prompt_file="../llm_prompt.txt", base="azure", use_model="gpt3")

In [6]:
llm.get_response()

{
  "PaperTitle": "Exploring the Effects of Exercise on Mental Health",
  "ExtractedSections": [
    {
      "SectionName": "Introduction",
      "SectionContent": "The introduction section provides an overview of the importance of mental health and the potential benefits of exercise in improving mental well-being."
    },
    {
      "SectionName": "Method",
      "SectionContent": "The method section outlines the study design, participant recruitment process, exercise intervention protocol, and outcome measures used to assess mental health outcomes."
    },
    {
      "SectionName": "Result",
      "SectionContent": "The result section presents the findings of the study, including the effects of exercise on various mental health parameters such as anxiety, depression, and stress levels."
    },
    {
      "SectionName": "Conclusion",
      "SectionContent": "In conclusion, the study highlights the positive impact of exercise on mental health and emphasizes the importance of incorpo

'{\n  "PaperTitle": "Exploring the Effects of Exercise on Mental Health",\n  "ExtractedSections": [\n    {\n      "SectionName": "Introduction",\n      "SectionContent": "The introduction section provides an overview of the importance of mental health and the potential benefits of exercise in improving mental well-being."\n    },\n    {\n      "SectionName": "Method",\n      "SectionContent": "The method section outlines the study design, participant recruitment process, exercise intervention protocol, and outcome measures used to assess mental health outcomes."\n    },\n    {\n      "SectionName": "Result",\n      "SectionContent": "The result section presents the findings of the study, including the effects of exercise on various mental health parameters such as anxiety, depression, and stress levels."\n    },\n    {\n      "SectionName": "Conclusion",\n      "SectionContent": "In conclusion, the study highlights the positive impact of exercise on mental health and emphasizes the imp

In [3]:
test("../vector_db/qdrant/prompt/enhance_vectordb_query.txt", use_model="gpt4")

# TITLE
The Impact of Climate Change on Agricultural Productivity: A Global Perspective

# Abstract Example
This research paper examines the effects of climate change on agricultural productivity from a global perspective. It utilizes comprehensive data sets from various regions around the world to analyze the correlation between changing weather patterns and crop yields. The study also explores potential adaptation strategies that could mitigate the negative impacts of climate change on agriculture. The findings suggest that while climate change poses significant challenges to global food security, there are viable solutions that can help sustain agricultural productivity in the face of these challenges.

# OUTPUT FORMAT
- Keywords: Climate Change, Agricultural Productivity, Global Perspective, Adaptation Strategies, Food Security
- Reason: These keywords are chosen as they represent the main themes of the research paper. They provide a clear understanding of the paper's focus on the 

In [1]:
import os

In [3]:
os.getenv("OPEN_AI_KEY")

In [5]:
os.environ["OPENAI_API_KEY"]

'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'