# LLM - Schedule

### Setup

In [1]:
# %pip list | grep google-cloud-aiplatform
# %pip list | grep google-api-core
# %pip list | grep chromadb
# %pip list | grep langchain
# %pip list | grep unstructured

In [2]:
#%pip install chromadb

In [3]:
import os
import json
import shutil
import warnings
from dotenv import load_dotenv
import pandas as pd

# GCS Bucket
from google.cloud import storage
from google.api_core.exceptions import NotFound

# Langchain
from langchain.embeddings import VertexAIEmbeddings
from langchain.llms import VertexAI
from langchain.chains import RetrievalQA
from langchain.document_loaders import GCSDirectoryLoader
from langchain.memory import ConversationBufferMemory
from langchain.chains import RetrievalQA, ConversationalRetrievalChain
from langchain.prompts import PromptTemplate

# Chroma DB as Vector Store Database
from langchain.vectorstores import Chroma

# Using Vertex AI
import vertexai
from google.cloud import aiplatform

print(f"Vertex AI SDK version: {aiplatform.__version__}")

Vertex AI SDK version: 1.59.0


In [4]:
pd.set_option('display.max_colwidth', 120)

In [5]:
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", message=".*deprecated.*", category=UserWarning)

### Notebook Environment

In [6]:
load_dotenv()

PROJECT_ID = os.getenv('PROJECT_ID')
LOCATION = os.getenv('LOCATION')

vertexai.init(project=PROJECT_ID, location=LOCATION)

BUCKET = os.getenv('BUCKET')

### Create JSON File for Optimization Model Output

In [7]:
def create_bucket_if_not_exists(bucket_name, project_id, location):
    
    # Create Client
    storage_client = storage.Client(project=project_id)

    # Get Bucket Object
    bucket = storage_client.bucket(bucket_name)

    try:
        # Check if Bucket Exists
        storage_client.get_bucket(bucket_name)
        print(f"Bucket Already Exists")
    except NotFound:
        # If Not Found, Create the Bucket
        bucket.location = location  # Set the Bucket's Location
        storage_client.create_bucket(bucket, project=project_id)
        print(f"Bucket Created")

project_id = PROJECT_ID
bucket_name = BUCKET
location = LOCATION
create_bucket_if_not_exists(bucket_name, project_id, location)

Bucket Already Exists


In [8]:
def create_json(json_object, filename, project_id=PROJECT_ID, bucket_name=BUCKET):
    '''
    This Function Will Create JSON Object In
    Google Cloud Storage
    '''
    client = storage.Client(project=project_id)
    bucket = client.bucket(bucket_name)
    
    # Create Blob
    blob = bucket.blob(filename)
    
    # Upload Blob 
    blob.upload_from_string(
        data=json.dumps(json_object),
        content_type='application/json'
        )
    result = filename + ' upload complete'
    return {'response' : result}

# JSON Object
json_object = [
    {"title": "Study Math", "date": "2023-10-01", "time": "08:00-08:30"},
    {"title": "Study Biology", "date": "2023-10-01", "time": "10:40-11:10"},
    {"title": "Study Physics", "date": "2023-10-01", "time": "11:20-11:50"},
    {"title": "Study Chemistry", "date": "2023-10-01", "time": "12:00-12:30"},
    {"title": "Study Math", "date": "2023-10-02", "time": "08:00-08:30"},
    {"title": "Study Biology", "date": "2023-10-02", "time": "08:40-09:10"},
    {"title": "Study Physics", "date": "2023-10-02", "time": "09:20-09:50"},
    {"title": "Study Chemistry", "date": "2023-10-02", "time": "10:00-10:30"},
    {"title": "Study Math", "date": "2023-10-03", "time": "08:00-08:30"},
    {"title": "Study Biology", "date": "2023-10-03", "time": "08:40-09:10"},
    {"title": "Study Physics", "date": "2023-10-03", "time": "09:20-09:50"},
    {"title": "Study Chemistry", "date": "2023-10-03", "time": "10:00-10:30"},
    {"title": "Study Math", "date": "2023-10-04", "time": "08:00-08:30"},
    {"title": "Study Biology", "date": "2023-10-04", "time": "08:40-09:10"},
    {"title": "Study Physics", "date": "2023-10-04", "time": "09:20-09:50"},
    {"title": "Study Chemistry", "date": "2023-10-04", "time": "11:40-12:10"},
    {"title": "Study Math", "date": "2023-10-05", "time": "08:00-08:30"},
    {"title": "Study Biology", "date": "2023-10-05", "time": "08:40-09:10"},
    {"title": "Study Physics", "date": "2023-10-05", "time": "09:20-09:50"},
    {"title": "Study Chemistry", "date": "2023-10-05", "time": "10:00-10:30"},
    {"title": "Study Computer Science", "date": "2023-10-06", "time": "09:40-10:10"},
    {"title": "Study English", "date": "2023-10-06", "time": "10:20-10:50"},
    {"title": "Study History", "date": "2023-10-06", "time": "11:00-11:30"},
    {"title": "Study Psychology", "date": "2023-10-06", "time": "11:40-12:10"},
    {"title": "Study Math", "date": "2023-10-07", "time": "08:00-08:30"},
    {"title": "Study Biology", "date": "2023-10-07", "time": "08:40-09:10"},
    {"title": "Study Physics", "date": "2023-10-07", "time": "09:20-09:50"},
    {"title": "Study Chemistry", "date": "2023-10-07", "time": "10:00-10:30"}
]

# Insert Events Between Study Sessions
def insert_events(schedule):
    updated_schedule = []
    prev_end_time = "08:00"  # Starting time
    for event in schedule:
        start_time = event["time"].split("-")[0]
        if start_time > prev_end_time:
            updated_schedule.append({"title": "Meeting", "date": event["date"], "time": f"{prev_end_time}-{start_time}"})
        updated_schedule.append(event)
        prev_end_time = event["time"].split("-")[1]
    return updated_schedule

# Add Events Between Study Sessions
updated_schedule_data = insert_events(json_object)

# Convert Schedule Data to JSON Format
json_data = json.dumps(updated_schedule_data, indent=4)

# Write JSON Data to A File
with open("schedule_with_events.json", "w") as json_file:
    json_file.write(json_data)

# Set File Name for JSON Object
filename = 'schedule_test.json'

# Run the Function and Pass the JSON Object
print(create_json(json_object, filename))

{'response': 'schedule_test.json upload complete'}


### Load JSON File From GCS Bucket

In [9]:
def load_json_from_bucket(bucket_name, filename, project_id):

    # Create Client
    storage_client = storage.Client(project=project_id)
    
    # Get Bucket Object
    bucket = storage_client.bucket(bucket_name)
    
    # Get Blob Object
    blob = bucket.blob(filename)
    
    # Download the JSON File
    json_data = json.loads(blob.download_as_string())
    
    return json_data

project_id = PROJECT_ID
bucket_name = BUCKET

filename = "schedule_test.json"

# Load JSON file From GCS Bucket
json_data = load_json_from_bucket(bucket_name, filename, project_id)

# Print Loaded JSON Data
print(json_data)

[{'title': 'Study Math', 'date': '2023-10-01', 'time': '08:00-08:30'}, {'title': 'Study Biology', 'date': '2023-10-01', 'time': '10:40-11:10'}, {'title': 'Study Physics', 'date': '2023-10-01', 'time': '11:20-11:50'}, {'title': 'Study Chemistry', 'date': '2023-10-01', 'time': '12:00-12:30'}, {'title': 'Study Math', 'date': '2023-10-02', 'time': '08:00-08:30'}, {'title': 'Study Biology', 'date': '2023-10-02', 'time': '08:40-09:10'}, {'title': 'Study Physics', 'date': '2023-10-02', 'time': '09:20-09:50'}, {'title': 'Study Chemistry', 'date': '2023-10-02', 'time': '10:00-10:30'}, {'title': 'Study Math', 'date': '2023-10-03', 'time': '08:00-08:30'}, {'title': 'Study Biology', 'date': '2023-10-03', 'time': '08:40-09:10'}, {'title': 'Study Physics', 'date': '2023-10-03', 'time': '09:20-09:50'}, {'title': 'Study Chemistry', 'date': '2023-10-03', 'time': '10:00-10:30'}, {'title': 'Study Math', 'date': '2023-10-04', 'time': '08:00-08:30'}, {'title': 'Study Biology', 'date': '2023-10-04', 'time':

### JSON File Prep

In [10]:
# Define Text Embeddings Model
embeddings = VertexAIEmbeddings()

print(embeddings)

Model_name will become a required arg for VertexAIEmbeddings starting from Feb-01-2024. Currently the default is set to textembedding-gecko@001


project=None location='us-central1' request_parallelism=5 max_retries=6 stop=None model_name='textembedding-gecko@001' client=<vertexai.language_models.TextEmbeddingModel object at 0x7f09e9a06d40> client_preview=None temperature=0.0 max_output_tokens=128 top_p=0.95 top_k=40 credentials=None n=1 streaming=False instance={'max_batch_size': 250, 'batch_size': 250, 'min_batch_size': 5, 'min_good_batch_size': 5, 'lock': <unlocked _thread.lock object at 0x7f09e9c47900>, 'batch_size_validated': False, 'task_executor': <concurrent.futures.thread.ThreadPoolExecutor object at 0x7f09e9bfee30>, 'embeddings_task_type_supported': False} show_progress_bar=False


In [11]:
def json_to_text(data):
    """Convert a list of dictionaries to a human-readable string format."""
    text_output = ""
    for entry in data:
        event_description = f"On {entry['date']}, {entry['title']} is scheduled from {entry['time']}.\n"
        text_output += event_description
    return text_output

# Convert JSON to text
formatted_text = json_to_text(json_data)
print(formatted_text)

On 2023-10-01, Study Math is scheduled from 08:00-08:30.
On 2023-10-01, Study Biology is scheduled from 10:40-11:10.
On 2023-10-01, Study Physics is scheduled from 11:20-11:50.
On 2023-10-01, Study Chemistry is scheduled from 12:00-12:30.
On 2023-10-02, Study Math is scheduled from 08:00-08:30.
On 2023-10-02, Study Biology is scheduled from 08:40-09:10.
On 2023-10-02, Study Physics is scheduled from 09:20-09:50.
On 2023-10-02, Study Chemistry is scheduled from 10:00-10:30.
On 2023-10-03, Study Math is scheduled from 08:00-08:30.
On 2023-10-03, Study Biology is scheduled from 08:40-09:10.
On 2023-10-03, Study Physics is scheduled from 09:20-09:50.
On 2023-10-03, Study Chemistry is scheduled from 10:00-10:30.
On 2023-10-04, Study Math is scheduled from 08:00-08:30.
On 2023-10-04, Study Biology is scheduled from 08:40-09:10.
On 2023-10-04, Study Physics is scheduled from 09:20-09:50.
On 2023-10-04, Study Chemistry is scheduled from 11:40-12:10.
On 2023-10-05, Study Math is scheduled from 

In [12]:
# Assuming Each Entry Should Be Treated As A Separate Document
texts = [f"On {entry['date']}, {entry['title']} is scheduled from {entry['time']}." for entry in json_data]

In [13]:
texts

['On 2023-10-01, Study Math is scheduled from 08:00-08:30.',
 'On 2023-10-01, Study Biology is scheduled from 10:40-11:10.',
 'On 2023-10-01, Study Physics is scheduled from 11:20-11:50.',
 'On 2023-10-01, Study Chemistry is scheduled from 12:00-12:30.',
 'On 2023-10-02, Study Math is scheduled from 08:00-08:30.',
 'On 2023-10-02, Study Biology is scheduled from 08:40-09:10.',
 'On 2023-10-02, Study Physics is scheduled from 09:20-09:50.',
 'On 2023-10-02, Study Chemistry is scheduled from 10:00-10:30.',
 'On 2023-10-03, Study Math is scheduled from 08:00-08:30.',
 'On 2023-10-03, Study Biology is scheduled from 08:40-09:10.',
 'On 2023-10-03, Study Physics is scheduled from 09:20-09:50.',
 'On 2023-10-03, Study Chemistry is scheduled from 10:00-10:30.',
 'On 2023-10-04, Study Math is scheduled from 08:00-08:30.',
 'On 2023-10-04, Study Biology is scheduled from 08:40-09:10.',
 'On 2023-10-04, Study Physics is scheduled from 09:20-09:50.',
 'On 2023-10-04, Study Chemistry is scheduled 

In [14]:
vector_db = Chroma.from_texts(texts, embeddings)

In [15]:
print(f'Number of Events in Collection: {vector_db._collection.count():,}')

Number of Events in Collection: 28


In [16]:
# Expose Index to the Retriever
retriever = vector_db.as_retriever(
    search_type="similarity", search_kwargs={"k": 3} #k: Number of Documents to return, defaults to 4.
)

## LLM

In [17]:
llm = VertexAI(
    model_name="text-bison-32k",
    max_output_tokens=256,
    temperature=0.1,
    top_p=0.8,
    top_k=40,
    verbose=True,
)

In [18]:
template = """Use the provided JSON file, which is an output of the optimization model, to answer questions about a person's calendar. If you don't know the answer, 
just say that you don't know, don't try to make up an answer. \
Keep the answer as concise as possible. Always say "Nick: Please let me know what else I can help with you." at the end of the answer, but print this in a new line and there should be a space between the line that comes before this. 
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

In [19]:
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)

In [20]:
# Uses LLM to Synthesize Results From the Search Index
# Vertex PaLM Text API for LLM
qa = RetrievalQA.from_chain_type(
    llm=llm, 
    retriever=retriever, 
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT},
    memory=memory
)

##### Question 1

In [21]:
question = "What events do I have on 2023-10-07?"
user_input = {"query": question}
response = qa(user_input)

print(f"Nick: {response['query']}\n")
print(f"Ed: {response['result']}\n")

Nick: What events do I have on 2023-10-07?

Ed:  On 2023-10-07, you have the following events:

- Study Math from 08:00-08:30
- Study Physics from 09:20-09:50
- Study Chemistry from 10:00-10:30

Nick: Please let me know what else I can help with you.



In [22]:
print(f"{response['chat_history']}\n")

[HumanMessage(content='What events do I have on 2023-10-07?'), AIMessage(content=' On 2023-10-07, you have the following events:\n\n- Study Math from 08:00-08:30\n- Study Physics from 09:20-09:50\n- Study Chemistry from 10:00-10:30\n\nNick: Please let me know what else I can help with you.')]



##### Question 2

In [23]:
question = "What about on 2023-10-06?"
user_input = {"query": question}
response = qa(user_input)

print(f"Nick: {response['query']}\n")
print(f"Ed: {response['result']}\n")

Nick: What about on 2023-10-06?

Ed:  On 2023-10-06, Study Computer Science is scheduled from 09:40-10:10.
On 2023-10-06, Study English is scheduled from 10:20-10:50.

Nick: Please let me know what else I can help with you.



In [24]:
print(f"{response['chat_history']}\n")

[HumanMessage(content='What events do I have on 2023-10-07?'), AIMessage(content=' On 2023-10-07, you have the following events:\n\n- Study Math from 08:00-08:30\n- Study Physics from 09:20-09:50\n- Study Chemistry from 10:00-10:30\n\nNick: Please let me know what else I can help with you.'), HumanMessage(content='What about on 2023-10-06?'), AIMessage(content=' On 2023-10-06, Study Computer Science is scheduled from 09:40-10:10.\nOn 2023-10-06, Study English is scheduled from 10:20-10:50.\n\nNick: Please let me know what else I can help with you.')]



##### Question 3

In [25]:
question = "Today is 2023-10-06. What do I have tomorrow?"
user_input = {"query": question}
response = qa(user_input)

print(f"Nick: {response['query']}\n")
print(f"Ed: {response['result']}\n")

Nick: Today is 2023-10-06. What do I have tomorrow?

Ed:  On 2023-10-07, you have Study Math from 08:00-08:30.
Nick: Please let me know what else I can help with you.



In [26]:
print(f"{response['chat_history']}\n")

[HumanMessage(content='What events do I have on 2023-10-07?'), AIMessage(content=' On 2023-10-07, you have the following events:\n\n- Study Math from 08:00-08:30\n- Study Physics from 09:20-09:50\n- Study Chemistry from 10:00-10:30\n\nNick: Please let me know what else I can help with you.'), HumanMessage(content='What about on 2023-10-06?'), AIMessage(content=' On 2023-10-06, Study Computer Science is scheduled from 09:40-10:10.\nOn 2023-10-06, Study English is scheduled from 10:20-10:50.\n\nNick: Please let me know what else I can help with you.'), HumanMessage(content='Today is 2023-10-06. What do I have tomorrow?'), AIMessage(content=' On 2023-10-07, you have Study Math from 08:00-08:30.\nNick: Please let me know what else I can help with you.')]



## Memory

In [27]:
memory.load_memory_variables({})

{'chat_history': [HumanMessage(content='What events do I have on 2023-10-07?'),
  AIMessage(content=' On 2023-10-07, you have the following events:\n\n- Study Math from 08:00-08:30\n- Study Physics from 09:20-09:50\n- Study Chemistry from 10:00-10:30\n\nNick: Please let me know what else I can help with you.'),
  HumanMessage(content='What about on 2023-10-06?'),
  AIMessage(content=' On 2023-10-06, Study Computer Science is scheduled from 09:40-10:10.\nOn 2023-10-06, Study English is scheduled from 10:20-10:50.\n\nNick: Please let me know what else I can help with you.'),
  HumanMessage(content='Today is 2023-10-06. What do I have tomorrow?'),
  AIMessage(content=' On 2023-10-07, you have Study Math from 08:00-08:30.\nNick: Please let me know what else I can help with you.')]}

In [28]:
display(pd.DataFrame(memory.load_memory_variables({})["chat_history"]))

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,"(content, What events do I have on 2023-10-07?)","(additional_kwargs, {})","(response_metadata, {})","(type, human)","(name, None)","(id, None)","(example, False)",,,
1,"(content, On 2023-10-07, you have the following events:\n\n- Study Math from 08:00-08:30\n- Study Physics from 09:2...","(additional_kwargs, {})","(response_metadata, {})","(type, ai)","(name, None)","(id, None)","(example, False)","(tool_calls, [])","(invalid_tool_calls, [])","(usage_metadata, None)"
2,"(content, What about on 2023-10-06?)","(additional_kwargs, {})","(response_metadata, {})","(type, human)","(name, None)","(id, None)","(example, False)",,,
3,"(content, On 2023-10-06, Study Computer Science is scheduled from 09:40-10:10.\nOn 2023-10-06, Study English is sch...","(additional_kwargs, {})","(response_metadata, {})","(type, ai)","(name, None)","(id, None)","(example, False)","(tool_calls, [])","(invalid_tool_calls, [])","(usage_metadata, None)"
4,"(content, Today is 2023-10-06. What do I have tomorrow?)","(additional_kwargs, {})","(response_metadata, {})","(type, human)","(name, None)","(id, None)","(example, False)",,,
5,"(content, On 2023-10-07, you have Study Math from 08:00-08:30.\nNick: Please let me know what else I can help with ...","(additional_kwargs, {})","(response_metadata, {})","(type, ai)","(name, None)","(id, None)","(example, False)","(tool_calls, [])","(invalid_tool_calls, [])","(usage_metadata, None)"
