In [70]:
import os
from dotenv import load_dotenv

import tiktoken
import openai
from serpapi import GoogleSearch

env_path="../src/.env"
load_dotenv(dotenv_path=env_path)

openai_api_key = os.environ.get("OPENAI_API_KEY")
serpapi_api_key = os.environ.get("SERPAPI_API_KEY")


## Qestion

In [None]:
question = "OpenAI ChatGPT"

## Search in google

In [7]:
params = {
  "api_key": serpapi_api_key,
  "engine": "google",
  "q": question,
  "location": "Austin, Texas, United States",
  "google_domain": "google.com",
  "gl": "us",
  "hl": "en"
}


search = GoogleSearch(params)
results = search.get_dict()

## Create prompt

In [64]:
knowledge_graph_info = f'Info: {results["knowledge_graph"]["description"]}'
# results["knowledge_graph"]["description"]
# results["knowledge_graph"]["source"]["name"]
# results["knowledge_graph"]["source"]["link"]


related_questions_info = [
   f'Question: {related_questions["question"]}\nAnswer: {related_questions["snippet"]}' for related_questions in results["related_questions"]
]
# for related_questions in results["related_questions"]:
#     print(related_questions["question"])
#     print(related_questions["snippet"])
#     print("="*30)

search_results_info = [
    # f'Source:\nTitle: {item["title"]}\nURL: {item["link"]}\nContent: {item["snippet"]}' for item in results["organic_results"]
    f'Source:\nTitle: {item["title"]}\nContent: {item["snippet"]}' for item in results["organic_results"]
]
search_results_related_questons_info = [
    f'Question: {related_questions["question"]}\nAnswer: {related_questions["snippet"]}'
    for item in results["organic_results"]
    if item.get("related_questions", None) is not None
    for related_questions in item["related_questions"]
]

# for item in results["organic_results"]:
#     print(item["position"])
#     print(item["link"])
#     print(item["title"])
#     print(item["snippet"])

#     if item.get("related_questions", None) is not None:
#         for related_questions in item["related_questions"]:
#             print(related_questions["question"])
#             print(related_questions["snippet"])

#     print("="*30)

# knowledge_graph_info
# related_questions_info
# pprint(search_results_info)
# pprint(search_results_related_questons_info)


prompt = (
    "Use the following sources to answer the MAIN QUESTION:\n\n" + 
    "\n\n".join([knowledge_graph_info]) + "\n\n" +
    "\n\n".join(search_results_info) + 
    "\n\n".join(related_questions_info) + 
    "\n\n".join(search_results_related_questons_info) + 
    "\n\nMAIN QUESTION: " + question + 
    "\n\nAnswer:")

print(len(prompt))
print("="*30)
print(prompt)

3842
Use the following sources to answer the MAIN QUESTION:

Info: ChatGPT is an artificial intelligence chatbot developed by OpenAI and released in November 2022. It is built on top of OpenAI's GPT-3.5 and GPT-4 families of large language models and has been fine-tuned using both supervised and reinforcement learning techniques.

Source:
Title: Introducing ChatGPT - OpenAI
Content: ChatGPT is a sibling model to InstructGPT, which is trained to follow an instruction in a prompt and provide a detailed response.

Source:
Title: ChatGPT: Everything you need to know about the AI-powered ...
Content: ChatGPT, OpenAI's text-generating AI chatbot, has taken the world by storm. It's able to write essays, code and more given short text ...

Source:
Title: Everything We Know About OpenAI's ChatGPT - Gizmodo
Content: Everybody's talking about ChatGPT, the powerful new AI chatbot from OpenAI that generates text. Here are the basics you need to know.

Source:
Title: What is ChatGPT and why does it 

## OpenAI Tokenizer

In [106]:
def show_tokens(model:str="cl100k_base"):
    encoding = tiktoken.encoding_for_model(model)
    tokens_ids = encoding.encode(value)

    # for token_id in tokens_ids:
    #     token = encoding.decode([token_id])
    #     print(f"Token ID: {token_id}, Token: {token}")
    print(f"\n\n{model}")
    print(f"Total length: {len(tokens_ids)} tokens. Length:{len(value.split())} words")
    print("|".join([encoding.decode([token_id]) for token_id in tokens_ids]))
    print("|".join([str(token_id) for token_id in tokens_ids]))


value = "Test text english. Тестовый текст по русски. 2019, 2 0 1 9"

show_tokens("text-davinci-003") # smae "text-davinci-002"
show_tokens("gpt-3.5-turbo") # same "gpt-4"




text-davinci-003
Total length: 37 tokens. Length:12 words
Test| text| english|.| �|�|е|с|т|о�|�|ы|�|�| |т|е|к|с|т| �|�|о| |р|у|с|с|к|и|.| 2019|,| 2| 0| 1| 9
14402|2420|46932|13|12466|95|16843|21727|20375|25443|110|45035|140|117|220|20375|16843|31583|21727|20375|12466|123|15166|220|21169|35072|21727|21727|31583|18849|13|13130|11|362|657|352|860


gpt-3.5-turbo
Total length: 26 tokens. Length:12 words
Test| text| english|.| Т|ест|ов|ый| текст| по| р|ус|ски|.| |201|9|,| |2| |0| |1| |9
2323|1495|30063|13|51418|37277|6856|35723|71995|18154|18600|44155|80112|13|220|679|24|11|220|17|220|15|220|16|220|24


In [112]:
def num_tokens_from_messages(messages, model:str = "cl100k_base", mode:str="ChatCompletion") -> int:
    try:
        encoding = tiktoken.encoding_for_model(model)
    except KeyError:
        encoding = tiktoken.get_encoding("cl100k_base")

    if mode =="ChatCompletion":
        num_tokens = 0
        for message in messages:
            num_tokens += 4  # every message follows <im_start>{role/name}\n{content}<im_end>\n
            for key, value in message.items():
                num_tokens += len(encoding.encode(value))
                if key == "name":  # if there's a name, the role is omitted
                    num_tokens += -1  # role is always required and always 1 token
        num_tokens += 2  # every reply is primed with <im_start>assistant
    else:
        num_tokens = len(encoding.encode(messages))
        
    return num_tokens

num_tokens_from_messages(value, "gpt-3.5-turbo","Completion")

26

## OpenAI Results

### Completion by "text-davinci-003"

In [140]:
# Check if there are any results
if results:

    print("Tokens len:",
        num_tokens_from_messages(prompt, "text-davinci-003","Completion")
        )

    # Use OpenAI's GPT-3 API to answer the question
    openai.api_key = openai_api_key
    response = openai.Completion.create(
        engine="text-davinci-003",
        prompt=prompt,
        max_tokens=1024,
        n=1,
        stop=None,
        temperature=0.7,
    )

    # Print the usage
    print(response["usage"])

    # Print the answer from OpenAI
    answer = response["choices"][0]["text"].strip()
    print(f"Answer: {answer}")
    
else:
    # Print an error message if there are no results
    print("Error: No results found for the given query.")

Tokens len: 954
{
  "completion_tokens": 105,
  "prompt_tokens": 954,
  "total_tokens": 1059
}
Answer: OpenAI ChatGPT is an artificial intelligence chatbot developed by OpenAI and released in November 2022. It is built on top of OpenAI's GPT-3.5 and GPT-4 families of large language models and has been fine-tuned using both supervised and reinforcement learning techniques. OpenAI ChatGPT is designed to understand and generate human-like responses to text inputs. The basic version of OpenAI ChatGPT is free to use, but the Plus version is offered as a paid subscription.


In [143]:
response

<OpenAIObject text_completion id=cmpl-78YXDOaSbSvuOzxqzWh8cyjo2a8bk at 0x21217286ca0> JSON: {
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "logprobs": null,
      "text": " OpenAI ChatGPT is an artificial intelligence chatbot developed by OpenAI and released in November 2022. It is built on top of OpenAI's GPT-3.5 and GPT-4 families of large language models and has been fine-tuned using both supervised and reinforcement learning techniques. OpenAI ChatGPT is designed to understand and generate human-like responses to text inputs. The basic version of OpenAI ChatGPT is free to use, but the Plus version is offered as a paid subscription."
    }
  ],
  "created": 1682273731,
  "id": "cmpl-78YXDOaSbSvuOzxqzWh8cyjo2a8bk",
  "model": "text-davinci-003",
  "object": "text_completion",
  "usage": {
    "completion_tokens": 105,
    "prompt_tokens": 954,
    "total_tokens": 1059
  }
}

### ChatCompletion by "gpt-3.5-turbo"

In [144]:
chat_prompt = [
    dict(role="user",content=prompt)
]

# Check if there are any results
if results:
    print("Tokens len:",
            num_tokens_from_messages(chat_prompt, "gpt-3.5-turbo","ChatCompletion")
            )

    # Use OpenAI's GPT-3.5 Turbo API to answer the question
    openai.api_key = openai_api_key
    
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=chat_prompt,
        max_tokens=1024,
        n=1,
        stop=None,
        temperature=0.7,
    )

    # Print the usage
    print(response["usage"])

    # Print the answer from OpenAI
    answer = response["choices"][0]["message"]["content"]

    print(f"Answer: {answer}")
    
else:
    # Print an error message if there are no results
    print("Error: No results found for the given query.")

Tokens len: 910
{
  "completion_tokens": 130,
  "prompt_tokens": 911,
  "total_tokens": 1041
}
Answer: ChatGPT is an artificial intelligence chatbot developed by OpenAI that uses GPT-3.5 and GPT-4 language models and has been fine-tuned using both supervised and reinforcement learning techniques. It is available to the public for free through the OpenAI website and can be used for natural language processing tasks such as answering questions, composing emails, essays, and code. There is also a paid subscription version called ChatGPT Plus, which includes access to GPT-4. OpenAI was co-founded by Elon Musk in 2015 and initially established as a nonprofit organization, but relinquished its nonprofit status in 2019.


In [145]:
response

<OpenAIObject chat.completion id=chatcmpl-78YYoV21kO0KMz82h6PhSa3IK1Pbp at 0x2121752d8a0> JSON: {
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "message": {
        "content": "ChatGPT is an artificial intelligence chatbot developed by OpenAI that uses GPT-3.5 and GPT-4 language models and has been fine-tuned using both supervised and reinforcement learning techniques. It is available to the public for free through the OpenAI website and can be used for natural language processing tasks such as answering questions, composing emails, essays, and code. There is also a paid subscription version called ChatGPT Plus, which includes access to GPT-4. OpenAI was co-founded by Elon Musk in 2015 and initially established as a nonprofit organization, but relinquished its nonprofit status in 2019.",
        "role": "assistant"
      }
    }
  ],
  "created": 1682273830,
  "id": "chatcmpl-78YYoV21kO0KMz82h6PhSa3IK1Pbp",
  "model": "gpt-3.5-turbo-0301",
  "object": "cha