## Install packages

In [None]:
!pip install --quiet "evadb[vision,notebook]"
!pip install --force-reinstall PyMuPDF
!pip install fitz
!pip install numpy

## Setup EvaDB


In [2]:
import evadb
import warnings
warnings.filterwarnings("ignore")

cursor = evadb.connect().cursor()

## Download the data

In [None]:
!wget -nc "https://www.dropbox.com/scl/fi/5qplwqa5vwxaqh740e8sm/medium-algorithm.pdf?rlkey=fu38yw4mvik3zfg8b3snnskbm&dl=0"
!mv "medium-algorithm.pdf?rlkey=fu38yw4mvik3zfg8b3snnskbm&dl=0" data-structures.pdf

## Load the data

In [None]:
cursor.query("DROP TABLE IF EXISTS ALGO").df()
cursor.query("LOAD PDF 'data-structures.pdf' INTO ALGO").df()

## Show ALGO table


In [5]:
cursor.query("SELECT * FROM ALGO").df()

Unnamed: 0,_row_id,name,page,paragraph,data
0,1,data-structures.pdf,1,1,Top Data Structures & Algorithms inJava That Y...
1,1,data-structures.pdf,1,2,Swatee Chand · Follow
2,1,data-structures.pdf,1,3,"Published in Edureka · 16 min read · Jul 19, 2019"
3,1,data-structures.pdf,1,4,60
4,1,data-structures.pdf,1,6,Data Structures and Algorithms in Java — Edureka
...,...,...,...,...,...
441,1,data-structures.pdf,15,42,What if you don’t like to practice 100s ofcodi...
442,1,data-structures.pdf,15,43,"5 min read · Sep 15, 2022"
443,1,data-structures.pdf,16,1,681--28
444,1,data-structures.pdf,16,2,See more recommendations


## Create Text Summarization and Classification Functions



In [None]:
cursor.query("""
    CREATE FUNCTION IF NOT EXISTS TextSummarizer
    TYPE HuggingFace
    TASK 'summarization'
    MODEL 'facebook/bart-large-cnn'
""").df()

## Get Summaries


In [None]:
result = cursor.query("""
    SELECT data, TextSummarizer(data)
    FROM ALGO
    WHERE page < 13
""").df()

In [8]:
result.to_csv('result_data.csv', index=False)

## Import SK Packages

In [None]:
!python -m pip install semantic-kernel==0.3.10.dev0
import semantic_kernel as sk

from semantic_kernel import (
    ChatPromptTemplate,
    SemanticFunctionConfig,
    PromptTemplateConfig,
)

## Save Processed Text Summarization Values

In [125]:
import csv

file_path = "result_data.csv"

# List to store the data from the second column
data_second_column = []
counter = 0
# Reading the CSV file
with open(file_path, mode='r', newline='') as file:
    reader = csv.reader(file)
    for row in reader:
      if counter > 35 and counter < 300:
        if len(row) > 1:
            data_second_column.append(row[1])
        counter += 1

processed = ''.join(data_second_column)

## Setup and Connect to Semantic Kernel

In [117]:
kernel = sk.Kernel()
api_key = "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"

## Create My GPT and Semantic Function with Enchancement Through Prompting and Tuning Along With Leveraging Text Summarization Feature from EvaDB

In [None]:
from semantic_kernel.connectors.ai.open_ai import OpenAIChatCompletion

kernel.add_chat_service("chat-gpt", OpenAIChatCompletion("gpt-3.5-turbo", api_key))

In [151]:
chat_config_dict = {
    "schema": 1,
    # The type of prompt
    "type": "completion",
    # A description of what the semantic function does
    "description": "A chatbot which provides information about different data structures.",
    # Specifies which model service(s) to use
    "default_services": ["chat-gpt"],
    # The parameters that will be passed to the connector and model service
    "completion": {
        "temperature": 0.3,
        "top_p": 1,
        "max_tokens": 2500,
        "number_of_responses": 1,
        "presence_penalty": 0,
        "frequency_penalty": 0,
    },
    # Defines the variables that are used inside of the prompt
    "input": {
        "parameters": [
            {
                "name": "input",
                "description": "The input given by the user",
                "defaultValue": "",
            },
            {
                "name": "history",
                "description": "Previous interactions between the user and chatbot",
                "defaultValue": "",
            },
        ]
    },
}

In [152]:
context = kernel.create_new_context()
context["history"] = ""

In [153]:
from semantic_kernel import PromptTemplateConfig, SemanticFunctionConfig, PromptTemplate


def create_semantic_function_config(prompt_template, prompt_config_dict, kernel):
    prompt_template_config = PromptTemplateConfig.from_dict(prompt_config_dict)
    prompt_template = sk.PromptTemplate(
        template=prompt_template,
        prompt_config=prompt_template_config,
        template_engine=kernel.prompt_template_engine,
    )
    return SemanticFunctionConfig(prompt_template_config, prompt_template)

In [154]:
prompt = "Please anaylze this data to help you with your responses: {}.".format(processed) + """
      You are a chatbot that will answer the questions the user will ask.
      The user can ask about sorting algorithm, different data structures, and run time complexity.
      Always ask the user if they have any more questions or follow-up questions!
      If you do not know the answer, then politely let the user know you do not know.
      -----------

      {{$history}}
      User: {{$input}}
      ChatBot: """

function_config = create_semantic_function_config(
    prompt, chat_config_dict, kernel
)
chatbot = kernel.register_semantic_function(
    skill_name="SimpleChatbot",
    function_name="simple_chatbot",
    function_config=function_config,
)

In [155]:
async def chat(input_text, context, verbose=True):
    # Save new message in the context variables
    context["input"] = input_text

    # Process the user message and get an answer
    answer = await chatbot.invoke_async(context=context)

    # Show the response
    print(f"ChatBot: {answer}")

    # Append the new interaction to the chat history
    context["history"] += f"\nUser: {input_text}\nChatBot: {answer}\n"

In [156]:
await chat("What is a Binary Tree? Please provide the response in detail and in-depth.", context)

ChatBot: A binary tree is a type of data structure in computer science that is composed of nodes. Each node in a binary tree can have at most two children, referred to as the left child and the right child. The topmost node in a binary tree is called the root node.

The structure of a binary tree allows for efficient searching, insertion, and deletion operations. It is commonly used in various applications such as representing hierarchical relationships, organizing data for efficient searching, and implementing binary search trees.

In a binary tree, each node can have zero, one, or two children. The left child is always smaller than the parent node, while the right child is always greater than the parent node. This property is known as the binary search tree property and allows for efficient searching and sorting operations.

Binary trees can be classified into different types based on their structure. Some common types include:

1. Full Binary Tree: A binary tree in which every node 

## Comparing GPT-3 Base model with No Tuning/Prompting

In [157]:
new_kernel = sk.Kernel()
new_kernel.add_chat_service("chat-gpt", OpenAIChatCompletion("gpt-3.5-turbo", api_key))
prompt = new_kernel.create_semantic_function("What is a Binary Tree? Please provide the response in detail and in-depth. Do not use more than 2000 tokens for this answer!")
print(prompt())

A binary tree is a type of tree data structure in which each node has at most two children, referred to as the left child and the right child. It is called a "binary" tree because the maximum number of children a node can have is two.

The structure of a binary tree consists of nodes connected by edges. Each node contains a value or data, and it may or may not have children. The topmost node of the tree is called the root node. If a node does not have any children, it is called a leaf node or a terminal node.

The left child of a node is positioned to the left, and the right child is positioned to the right. These children can themselves be the root of their own binary subtrees, forming a recursive structure. This recursive nature allows for efficient traversal and manipulation of the binary tree.

Binary trees have various applications in computer science and data structures. They are commonly used for efficient searching, sorting, and indexing operations. Binary search trees, a speci