## Install packages

In [None]:
!pip install --quiet "evadb[vision,notebook]"
!pip install --force-reinstall PyMuPDF
!pip install fitz
!pip install numpy

## Setup EvaDB


In [2]:
import evadb
import warnings
warnings.filterwarnings("ignore")

cursor = evadb.connect().cursor()

## Download the data

In [None]:
!wget -nc "https://www.dropbox.com/scl/fi/5qplwqa5vwxaqh740e8sm/medium-algorithm.pdf?rlkey=fu38yw4mvik3zfg8b3snnskbm&dl=0"
!mv "medium-algorithm.pdf?rlkey=fu38yw4mvik3zfg8b3snnskbm&dl=0" data-structures.pdf

## Load the data

In [None]:
cursor.query("DROP TABLE IF EXISTS ALGO").df()
cursor.query("LOAD PDF 'data-structures.pdf' INTO ALGO").df()

## Show ALGO table


In [5]:
cursor.query("SELECT * FROM ALGO").df()

Unnamed: 0,_row_id,name,page,paragraph,data
0,1,data-structures.pdf,1,1,Top Data Structures & Algorithms inJava That Y...
1,1,data-structures.pdf,1,2,Swatee Chand · Follow
2,1,data-structures.pdf,1,3,"Published in Edureka · 16 min read · Jul 19, 2019"
3,1,data-structures.pdf,1,4,60
4,1,data-structures.pdf,1,6,Data Structures and Algorithms in Java — Edureka
...,...,...,...,...,...
441,1,data-structures.pdf,15,42,What if you don’t like to practice 100s ofcodi...
442,1,data-structures.pdf,15,43,"5 min read · Sep 15, 2022"
443,1,data-structures.pdf,16,1,681--28
444,1,data-structures.pdf,16,2,See more recommendations


## Create Text Summarization and Classification Functions



In [None]:
cursor.query("""
    CREATE FUNCTION IF NOT EXISTS TextSummarizer
    TYPE HuggingFace
    TASK 'summarization'
    MODEL 'facebook/bart-large-cnn'
""").df()

## Get Summaries


In [None]:
result = cursor.query("""
    SELECT data, TextSummarizer(data)
    FROM ALGO
    WHERE page < 13
""").df()

In [8]:
result.to_csv('result_data.csv', index=False)

## Import SK Packages

In [158]:
!python -m pip install semantic-kernel==0.3.10.dev0
import semantic_kernel as sk

from semantic_kernel import (
    ChatPromptTemplate,
    SemanticFunctionConfig,
    PromptTemplateConfig,
)



## Save Processed Text Summarization Values

In [159]:
import csv

file_path = "result_data.csv"

# List to store the data from the second column
data_second_column = []
counter = 0
# Reading the CSV file
with open(file_path, mode='r', newline='') as file:
    reader = csv.reader(file)
    for row in reader:
      if counter > 35 and counter < 300:
        if len(row) > 1:
            data_second_column.append(row[1])
        counter += 1

processed = ''.join(data_second_column)

## Setup and Connect to Semantic Kernel

In [160]:
kernel = sk.Kernel()
api_key = "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"

## Create My GPT and Semantic Function with Enchancement Through Prompting and Tuning Along With Leveraging Text Summarization Feature from EvaDB

In [161]:
from semantic_kernel.connectors.ai.open_ai import OpenAIChatCompletion

kernel.add_chat_service("chat-gpt", OpenAIChatCompletion("gpt-3.5-turbo", api_key))

<semantic_kernel.kernel.Kernel at 0x785d2a10c550>

In [162]:
chat_config_dict = {
    "schema": 1,
    # The type of prompt
    "type": "completion",
    # A description of what the semantic function does
    "description": "A chatbot which provides information about different data structures.",
    # Specifies which model service(s) to use
    "default_services": ["chat-gpt"],
    # The parameters that will be passed to the connector and model service
    "completion": {
        "temperature": 0.3,
        "top_p": 1,
        "max_tokens": 2000,
        "number_of_responses": 1,
        "presence_penalty": 0,
        "frequency_penalty": 0,
    },
    # Defines the variables that are used inside of the prompt
    "input": {
        "parameters": [
            {
                "name": "input",
                "description": "The input given by the user",
                "defaultValue": "",
            },
            {
                "name": "history",
                "description": "Previous interactions between the user and chatbot",
                "defaultValue": "",
            },
        ]
    },
}

In [163]:
context = kernel.create_new_context()
context["history"] = ""

In [164]:
from semantic_kernel import PromptTemplateConfig, SemanticFunctionConfig, PromptTemplate


def create_semantic_function_config(prompt_template, prompt_config_dict, kernel):
    prompt_template_config = PromptTemplateConfig.from_dict(prompt_config_dict)
    prompt_template = sk.PromptTemplate(
        template=prompt_template,
        prompt_config=prompt_template_config,
        template_engine=kernel.prompt_template_engine,
    )
    return SemanticFunctionConfig(prompt_template_config, prompt_template)

In [165]:
prompt = "Please anaylze this data to help you with your responses: {}.".format(processed) + """
      You are a chatbot that will answer the questions the user will ask.
      The user can ask about sorting algorithm, different data structures, and run time complexity.
      Always ask the user if they have any more questions or follow-up questions!
      If you do not know the answer, then politely let the user know you do not know.
      -----------

      {{$history}}
      User: {{$input}}
      ChatBot: """

function_config = create_semantic_function_config(
    prompt, chat_config_dict, kernel
)
chatbot = kernel.register_semantic_function(
    skill_name="SimpleChatbot",
    function_name="simple_chatbot",
    function_config=function_config,
)

In [167]:
async def chat(input_text, context, verbose=True):
    # Save new message in the context variables
    context["input"] = input_text

    # Process the user message and get an answer
    answer = await chatbot.invoke_async(context=context)

    # Show the response
    print(f"ChatBot: {answer}")

    # Append the new interaction to the chat history
    context["history"] += f"\nUser: {input_text}\nChatBot: {answer}\n"

In [170]:
await chat("What is a list and why is it important?", context)

ChatBot: A list is a data structure that allows you to store and organize multiple elements in a specific order. It is important because it provides a way to manage and manipulate collections of data efficiently. Lists are used in various programming languages and are considered fundamental data structures.

Lists are important because they offer flexibility in terms of adding, removing, and accessing elements. They can store different types of data, such as numbers, strings, or even other lists. Lists can be modified easily, allowing for dynamic changes to the data they hold.

One of the key advantages of lists is their ability to maintain the order of elements. This means that the position of each element in the list is preserved, allowing for easy retrieval and manipulation. For example, if you have a list of names, you can easily access a specific name by its index.

Lists also provide a way to iterate over the elements, allowing you to perform operations on each item in the list. 

## Comparing GPT-3 Base model with No Tuning/Prompting

In [169]:
new_kernel = sk.Kernel()
new_kernel.add_chat_service("chat-gpt", OpenAIChatCompletion("gpt-3.5-turbo", api_key))
prompt = new_kernel.create_semantic_function("What is a list and why is it important? Please provide the response in detail and in-depth.")
print(prompt())

A list is a data structure that stores a collection of elements or items in a specific order. It is an essential concept in computer science and programming, as it allows us to organize and manipulate data efficiently. Lists are used in various programming languages and are fundamental to solving many computational problems.

Lists are important for several reasons:

1. Storage and Organization: Lists provide a way to store and organize multiple items of data. They allow us to group related data together, making it easier to manage and access the information. For example, a list can be used to store a collection of names, numbers, or any other type of data.

2. Flexibility: Lists are dynamic data structures, meaning they can grow or shrink in size as needed. This flexibility allows us to add or remove elements from the list easily. We can append new items to the end of the list or insert them at specific positions. This dynamic nature makes lists suitable for scenarios where the number