In [2]:
from llama_index.core import SimpleDirectoryReader, StorageContext
from llama_index.core import VectorStoreIndex
from llama_index.vector_stores.postgres import PGVectorStore
import openai
import logging
import sys
import textwrap
#documents = SimpleDirectoryReader("/Users/jiawen/Desktop/USYD_S3/capstone_lammaindex").load_data()
#index = VectorStoreIndex.from_documents(documents)

In [9]:
documents = SimpleDirectoryReader("/Users/jiawen/Desktop/USYD_S3/capstone_txt").load_data()

In [4]:
openai.api_key = "###"
index = VectorStoreIndex.from_documents(documents)
query_engine = index.as_query_engine(response_mode="tree_summarize",
    verbose=True,)
response = query_engine.query("What did the author do growing up?")
print(response)

The author focused on writing and programming before college. They wrote short stories and later started programming on an IBM 1401 in 9th grade using an early version of Fortran. The author then transitioned to working with microcomputers, building one from a kit and later getting a TRS-80 to further their programming skills.


In [5]:
import os.path
from llama_index.core import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    StorageContext,
    load_index_from_storage,
)

# check if storage already exists
PERSIST_DIR = "./storage"
if not os.path.exists(PERSIST_DIR):
    # load the documents and create the index
    documents = SimpleDirectoryReader("/Users/jiawen/Desktop/USYD_S3/capstone_lammaindex").load_data()
    index = VectorStoreIndex.from_documents(documents)
    # store it for later
    index.storage_context.persist(persist_dir=PERSIST_DIR)
else:
    # load the existing index
    storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
    index = load_index_from_storage(storage_context)

# Either way we can now query the index
query_engine = index.as_query_engine()
#response = query_engine.query("What did the author do growing up?")
response = query_engine.query("What did the author do growing up?")
print(response)

The author worked on writing and programming before college.


In [5]:
from llama_index import ServiceContext, set_global_service_context
from llama_index.llms import OpenAI
# define LLM: https://gpt-index.readthedocs.io/en/latest/core_modules/model_modules/llms/usage_custom.html
llm = OpenAI(model="gpt-3.5-turbo", temperature=0, max_tokens=256)

# configure service context
service_context = ServiceContext.from_defaults(llm=llm, chunk_size=800, chunk_overlap=20)
# set_global_service_context(service_context)
index = VectorStoreIndex.from_documents(documents, service_context=service_context)
query_engine = index.as_query_engine(streaming=True)
response = query_engine.query("What did the author do growing up?")
response.print_response_stream()

The author focused on writing and programming before college.

## Create the Database

In [7]:
import psycopg2
connection_string="postgresql://postgres:jerrica!@localhost:5432"
conn = psycopg2.connect(connection_string)
conn.autocommit = True
db_name = "vector_db"
with conn.cursor() as c:
    c.execute(f"DROP DATABASE IF EXISTS {db_name}")
    c.execute(f"CREATE DATABASE {db_name}")

## Create the index

In [10]:
from sqlalchemy import make_url

url = make_url(connection_string)
vector_store = PGVectorStore.from_params(
    database=db_name,
    host=url.host,
    password=url.password,
    port=url.port,
    user=url.username,
    table_name="paul_graham_essay",
    embed_dim=1536,  # openai embedding dimension
)

storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context, show_progress=True
)
query_engine = index.as_query_engine()


Parsing nodes:   0%|          | 0/1 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/22 [00:00<?, ?it/s]

In [11]:
response = query_engine.query("What did the author do?")

In [12]:
print(textwrap.fill(str(response), 100))

The author worked on writing and programming before college. Initially, the author wrote short
stories and later started programming on an IBM 1401 using an early version of Fortran. The author
then transitioned to working with microcomputers, where they began writing simple games, programs,
and a word processor. In college, the author initially planned to study philosophy but eventually
switched to studying AI due to a lack of interest in philosophy courses. The author was inspired to
work on AI after encountering influential works like a novel by Heinlein and a PBS documentary.
