## Imports

In [1]:
import json
import pandas as pd

## Read Data

In [2]:
def read_json_file(file_path):
    """
    Reads JSON data from a file and returns it as a Python dictionary.

    :param file_path: Path to the JSON file.
    :return: Parsed JSON data as a dictionary or list.
    """
    try:
        with open(file_path, 'r', encoding='utf-8') as json_file:
            data = json.load(json_file)
            return data
    except FileNotFoundError:
        print(f"Error: The file {file_path} was not found.")
    except json.JSONDecodeError:
        print(f"Error: The file {file_path} contains invalid JSON.")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")

In [3]:
data = read_json_file("notebooks_summary.json")

In [None]:
df.head(3)

Unnamed: 0,local_path,name,table_of_content,practice_exercises,github_link
0,downloaded_files/Lectures/01 Introduction/01 E...,01 Early Programming,,[],https://github.com/pytopia/Python-Programming/...
1,downloaded_files/Lectures/01 Introduction/02 P...,02 Programming Languages,"[The First Programming Languages, Types of Pro...",[],https://github.com/pytopia/Python-Programming/...
2,downloaded_files/Lectures/01 Introduction/03 W...,03 Why Choose Python,"[Python's Simplicity and Versatility, Strong C...",[],https://github.com/pytopia/Python-Programming/...


In [6]:
df['table_of_content'] = df['table_of_content'].apply(lambda x: " ".join(x))

In [8]:
df.head(3)

Unnamed: 0,local_path,name,table_of_content,practice_exercises,github_link
0,downloaded_files/Lectures/01 Introduction/01 E...,01 Early Programming,,,https://github.com/pytopia/Python-Programming/...
1,downloaded_files/Lectures/01 Introduction/02 P...,02 Programming Languages,The First Programming Languages Types of Progr...,,https://github.com/pytopia/Python-Programming/...
2,downloaded_files/Lectures/01 Introduction/03 W...,03 Why Choose Python,Python's Simplicity and Versatility Strong Com...,,https://github.com/pytopia/Python-Programming/...


## Embeddings

In [9]:
# pip install openai==0.28

In [10]:
# pip uninstall openai -y
# pip install openai==0.28

True

In [15]:
df.head()

Unnamed: 0,local_path,name,table_of_content,practice_exercises,github_link,embeddings
0,downloaded_files/Lectures/01 Introduction/01 E...,01 Early Programming,,,https://github.com/pytopia/Python-Programming/...,"[0.001556192641146481, -0.016987796872854233, ..."
1,downloaded_files/Lectures/01 Introduction/02 P...,02 Programming Languages,The First Programming Languages Types of Progr...,,https://github.com/pytopia/Python-Programming/...,"[0.023670831695199013, -0.009352004155516624, ..."
2,downloaded_files/Lectures/01 Introduction/03 W...,03 Why Choose Python,Python's Simplicity and Versatility Strong Com...,,https://github.com/pytopia/Python-Programming/...,"[0.016650978475809097, -0.001550941844470799, ..."
3,downloaded_files/Lectures/01 Introduction/04 P...,04 Python Features and Applications,Dynamic Typing and Automatic Memory Management...,,https://github.com/pytopia/Python-Programming/...,"[-0.0022560248617082834, 0.007630623411387205,..."
4,downloaded_files/Lectures/01 Introduction/05 H...,05 How Long Does it Take to Learn Python,Factors that Influence Learning Journey Learni...,,https://github.com/pytopia/Python-Programming/...,"[0.02089807763695717, 0.006843096110969782, 0...."


## Postgres pg-vector

In [None]:
# pip install psycopg2-binary

In [17]:
import psycopg2

In [18]:
# pip install psycopg2-binary

In [19]:
connection = psycopg2.connect(
        dbname="postgres",
        user="postgres",
        password="mypass",
        host="localhost",  # or the database server IP address
        port="5432"        # default port for PostgreSQL
    )

In [20]:
cur = connection.cursor()
#install pgvector
cur.execute("CREATE EXTENSION IF NOT EXISTS vector");
connection.commit()

In [21]:
from pgvector.psycopg2 import register_vector

In [22]:


register_vector(connection)

In [23]:
df.sample(3)

Unnamed: 0,local_path,name,table_of_content,practice_exercises,github_link,embeddings
82,downloaded_files/Lectures/10 Modular Programmi...,04 The Python Module Search Path,Understanding Python's Import System The Modul...,,https://github.com/pytopia/Python-Programming/...,"[0.017396213486790657, 0.004049348179250956, 0..."
59,downloaded_files/Lectures/07 Functions/07 Lamb...,07 Lambda Functions,Understanding Lambda Functions What Are Lambda...,## <a id='toc6_'></a> ## <a id='toc6_'></a> La...,https://github.com/pytopia/Python-Programming/...,"[0.03805854916572571, 0.010886633768677711, 0...."
92,downloaded_files/Lectures/11 Advanced Topics/1...,13 Error Handling with try and Except,,,https://github.com/pytopia/Python-Programming/...,"[0.001556192641146481, -0.016987796872854233, ..."


### Create Table

In [31]:
# Create table to store embeddings and metadata
table_create_command = """
CREATE TABLE embeddings (
            id bigserial primary key, 
            local_path text,
            name text,
            table_of_content text,
            practice_exercises text,
            github_link text,
            embedding vector(1536)
            );
            """
cur.execute(table_create_command)
cur.close()
connection.commit()

In [52]:
import numpy as np
from psycopg2.extras import execute_values

In [53]:
#Batch insert embeddings and metadata from dataframe into PostgreSQL database
register_vector(connection)
cur = connection.cursor()
# Prepare the list of tuples to insert
data_list = [(row['local_path'], row['name'], row['table_of_content'], row['practice_exercises'], row['github_link'], np.array(row['embeddings'])) for index, row in df.iterrows()]
# Use execute_values to perform batch insertion
execute_values(cur, "INSERT INTO embeddings (local_path, name, table_of_content, practice_exercises, github_link, embedding) VALUES %s", data_list)
# Commit after we insert all embeddings
connection.commit()

## User Query Similarity Search

In [24]:
import numpy as np

In [25]:
#Helper function: Get top 3 most similar documents from the database
def get_top3_similar_docs(query_embedding, connection):
    embedding_array = np.array(query_embedding)
    # Register pgvector extension
    register_vector(connection)
    cur = connection.cursor()
    # Get the top 3 most similar documents using the KNN <=> operator
    cur.execute("SELECT table_of_content, name, practice_exercises, github_link FROM embeddings ORDER BY embedding <=> %s LIMIT 3", (embedding_array,))
    top3_docs = cur.fetchall()
    return top3_docs

In [26]:
# Helper function: get text completion from OpenAI API
# Note we're using the latest gpt-3.5-turbo-0613 model
def get_completion_from_messages(messages, model="gpt-4o", temperature=0, max_tokens=1000):
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=temperature, 
        max_tokens=max_tokens, 
    )
    return response.choices[0].message["content"]

In [27]:
# Helper function: get embeddings for a text
def get_embeddings(text):
    response = openai.Embedding.create(
        model="text-embedding-ada-002",
        input = text.replace("\n"," ")
    )
    embedding = response['data'][0]['embedding']
    return embedding

In [28]:
related_docs = get_top3_similar_docs(get_embeddings("Best Practices for Function Arguments in Python"), connection)

In [29]:
related_docs

[('Positional Arguments Keyword Arguments Default Parameters Best Practices for Function Arguments in Python Positional Arguments Keyword Arguments Default Parameters Practice Exercise: A Day at the Zoo Solution',
  '03 Argument Passing',
  '## <a id=\'toc5_\'></a> ## <a id=\'toc5_\'></a> You are planning a visit to the local zoo with a group of students. The zoo has several sections for different kinds of animals, and each section has feeding times, special shows, and educational talks. To maximize the visit, you decide to write a Python program that helps organize the day\'s activities based on the group\'s preferences. *Tasks:** 1. Write a function named `schedule_visit` that takes three parameters: `section` (the section of the zoo to visit, e.g., "Reptiles", "Birds"), `time` (the time you plan to visit that section), and `activity` with a default value of "Feeding". The function should print a message summarizing the visit plan for that section. 2. Call the `schedule_visit` functi

In [30]:
# Function to process input with retrieval of most similar documents from the database
def process_input_with_retrieval(user_input):
    delimiter = "```"
    #Step 1: Get documents related to the user input from database
    related_docs = get_top3_similar_docs(get_embeddings(user_input), connection)
    #print(related_docs)
    # Step 2: Get completion from OpenAI API
    # Set system message to help set appropriate tone and context for model
    system_message = f"""
    You are a friendly Pytopia chatbot. \
    You can answer questions about Pytopia Python Programming Course Notebook and Github Repository, its features and its use cases. \
    You respond in a concise, technically credible tone. \
    """
    # Prepare messages to pass to model
    # We use a delimiter to help the model understand the where the user_input starts and ends
    messages = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": f"{delimiter}{user_input}{delimiter}"},
        {"role": "assistant", "content": f"Relevant Pytopia Python Programming Course Notebooks and GitHub repository information: \n {related_docs[0][0]} \n {related_docs[1][0]} {related_docs[2][0]}"}
    ]
    final_response = get_completion_from_messages(messages)
    return final_response

In [31]:
result = process_input_with_retrieval("approach for Function Arguments in Python")

In [32]:
from pprint import pprint

In [33]:
print(result)

In Python, function arguments can be categorized into several types, each serving a specific purpose. Here's a concise overview of the different types of function arguments and best practices for using them:

1. **Positional Arguments**: These are the most common type of arguments. They are passed to a function in the order in which they are defined. For example:
   ```python
   def greet(name, age):
       print(f"Hello, {name}! You are {age} years old.")
   greet("Alice", 30)
   ```

2. **Keyword Arguments**: These arguments are passed to a function by explicitly specifying the parameter name, allowing you to skip the order. For example:
   ```python
   greet(age=30, name="Alice")
   ```

3. **Default Parameters**: These parameters have default values specified in the function definition. If no argument is provided for a default parameter, the default value is used. For example:
   ```python
   def greet(name, age=25):
       print(f"Hello, {name}! You are {age} years old.")
   greet