In [2]:
import streamlit as st

from langchain.document_loaders import RecursiveUrlLoader, TextLoader, JSONLoader, PyPDFLoader
from langchain.document_transformers import Html2TextTransformer
from langchain.docstore.document import Document
from langchain_community.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
from langchain.vectorstores.faiss import FAISS
from langchain.vectorstores import Chroma
from langchain.callbacks import StreamlitCallbackHandler
from langchain.tools import tool
from langchain.tools.json.tool import JsonSpec
from langchain.agents import OpenAIFunctionsAgent, AgentExecutor, load_tools
#from langchain_experimental.tools.python.tool import PythonREPLTool
from langchain.agents.agent_toolkits import create_retriever_tool, JsonToolkit
from langchain.agents.openai_functions_agent.agent_token_buffer_memory import (AgentTokenBufferMemory,)
from langchain.schema import SystemMessage, AIMessage, HumanMessage
from langchain.prompts import MessagesPlaceholder, ChatPromptTemplate
from langsmith import Client
from langchain_community.vectorstores.redis import Redis
from langchain_community.chat_models.openai import ChatOpenAI

import os, openai, requests, json, zeep, datetime, pandas as pd
from requests.auth import HTTPBasicAuth
from dotenv import load_dotenv, find_dotenv
from zeep.wsse.username import UsernameToken

_ = load_dotenv(find_dotenv()) # read local .env file
openai.api_key  = os.getenv('OPENAI_API_KEY')
#model = "gpt-4-1106-preview"
model = "gpt-3.5-turbo"

embeddings = OpenAIEmbeddings()
chat = ChatOpenAI(temperature=0.0, model=model)

TENANT = 'wdmarketdesk_dpt1'
WD_USER_ID = os.getenv('WD_USER_ID')
WD_PWD = os.getenv('WD_PWD')
WD_Worker_URL = "https://impl-services1.wd12.myworkday.com/ccx/service/customreport2/wdmarketdesk_dpt1/xjin-impl/Worker_Data_2?format=json"
WD_Absence_URL = "https://impl-services1.wd12.myworkday.com/ccx/service/customreport2/wdmarketdesk_dpt1/xjin-impl/Worker_Absence_Data?format=json"
WD_COMP_URL = "https://impl-services1.wd12.myworkday.com/ccx/service/customreport2/wdmarketdesk_dpt1/xjin-impl/Worker_Comp_Data?format=json"
WD_STAFFING_WSDL_URL = "https://impl-services1.wd12.myworkday.com/ccx/service/wdmarketdesk_dpt1/Staffing/v41.1?wsdl"
WD_HR_WSDL_URL = "https://impl-services1.wd12.myworkday.com/ccx/service/wdmarketdesk_dpt1/Human_Resources/v42.0?wsdl"

REDIS_URL = os.getenv('REDIS_URL')

basicAuth = HTTPBasicAuth(WD_USER_ID, WD_PWD)
#wd_hr_client = zeep.Client(WD_HR_WSDL_URL, wsse=UsernameToken(WD_USER_ID + '@' + TENANT, WD_PWD)) 
#wd_staffing_client = zeep.Client(WD_STAFFING_WSDL_URL, wsse=UsernameToken(WD_USER_ID + '@' + TENANT, WD_PWD))

  warn_deprecated(
  warn_deprecated(


### Clean-Up Indexes as Needed

In [59]:
#rds.drop_index('worker_hr', delete_documents=True)
#rds_policies.drop_index('hr_policies', delete_documents=True)

#print(chat)

True

### Redis Standard Cache

In [38]:
from langchain.cache import RedisCache
from langchain.globals import set_llm_cache
import redis

redis_client = redis.Redis.from_url(REDIS_URL)

set_llm_cache(RedisCache(redis_client))

### Clean-Up Redis Standard Cache

In [41]:
from langchain.globals import get_llm_cache

redis_cache = get_llm_cache()

import re

patterns = [r'worker', r'redis', r'doc']  # Add your patterns here
keys = None

keys = redis_cache.redis.keys()

filtered_keys = [key for key in keys if not any(re.search(pattern, key.decode('utf-8')) for pattern in patterns)]

print(filtered_keys)

for key in filtered_keys:
    redis_cache.redis.delete(key)
    pass   


[]


### Redis Semantic Cache

In [29]:
from langchain.cache import RedisSemanticCache
from langchain.globals import set_llm_cache

set_llm_cache(
    RedisSemanticCache(redis_url=REDIS_URL, embedding=OpenAIEmbeddings(), score_threshold=0.001)
)

  warn_deprecated(


### Clean-Up Redis Semantic Cache Keys

In [12]:
from langchain.globals import get_llm_cache

redis_cache = get_llm_cache()

import re

patterns = [r'worker', r'redis', r'doc']  # Add your patterns here
keys = None

keys = list(redis_cache._cache_dict.keys())
print(f"Keys from Semantic Cache: {keys} and type: {type(keys)}")

filtered_keys = [key for key in keys if not any(re.search(pattern, key) for pattern in patterns)]

print(filtered_keys)

for key in filtered_keys:
    redis_cache._cache_dict[key].drop_index(
        index_name=key, delete_documents=True, redis_url=REDIS_URL
    )
    del redis_cache._cache_dict[key]
    pass   


Keys from Semantic Cache: [] and type: <class 'list'>
[]


In [60]:
def get_worker_data_raas():
    response = requests.get(WD_Worker_URL, auth = basicAuth)
    responseJson = json.dumps(json.loads(response.content))
    
    return response #responseJson

In [61]:
"""Initializes with all of worker data. If any information is not found, \
please use this tool as the default tool to look for the data needed. \
Do not try to get the same data more than 2 times.
"""
print("Initializing with Worker data in Redis")
worker_json = get_worker_data_raas()
#worker_df = pd.DataFrame(worker_json)

#print(f"Worker DF: {worker_df}")

Initializing with Worker data in Redis


In [62]:
worker_data = json.loads(worker_json.content)
worker_df = pd.DataFrame(worker_data['Report_Entry'])

print(worker_data['Report_Entry'][0])
print(len(worker_data['Report_Entry']))

worker_content = worker_df.apply(lambda row: ', '.join([f'{k} is {v}' for k, v in row.map(str).items()]), axis=1).transform(''.join)
print(worker_content[0])
print(type(worker_content))

{'Employee_Type': 'Regular', 'country': 'United States of America', 'city': 'San Francisco', 'Team_Members': 'Robert Hsing; Pedro Santiago; Tammy Calhoun; Henry Lynch; Logan McNeil; Alex Garcia', 'Cost_Center': '41600 HR Services', 'Address_-_Formatted_Line_1': '42 Laurel Street', 'Employee_ID': '21001', 'Employee_Legal_Full_Name': 'Logan McNeil', 'State_ISO_Code': 'CA', 'Manager_Name': 'Joy Banks', 'businessTitle': 'Vice President, Human Resources', 'FTE': '1', 'Company_Name': 'Global Modern Services, Inc. (USA)', 'Job_Profile': 'Vice President, Human Resources', 'Age': '52', 'Time_Type': 'Full time'}
501
Employee_Type is Regular, country is United States of America, city is San Francisco, Team_Members is Robert Hsing; Pedro Santiago; Tammy Calhoun; Henry Lynch; Logan McNeil; Alex Garcia, Cost_Center is 41600 HR Services, Address_-_Formatted_Line_1 is 42 Laurel Street, Employee_ID is 21001, Employee_Legal_Full_Name is Logan McNeil, State_ISO_Code is CA, Manager_Name is Joy Banks, busi

In [63]:
rds = Redis.from_texts(
    worker_content,
    embeddings,
    metadatas=worker_data['Report_Entry'],
    redis_url="redis://redis-10042.c280.us-central1-2.gce.cloud.redislabs.com:10042",
    password="1iI48215k0GAEC3gzmpfPrXD2UDXYOYN",
    index_name="worker_hr"
)

In [64]:
rds.write_schema("worker_hr.yaml")

### Load New HR Policies

In [65]:
loader = PyPDFLoader("docs/HR_Policies.pdf")
pages = loader.load()

In [66]:
for page in pages[len(pages) - 10:len(pages)]:
    print(page.metadata)

policies_content = []
policies_metadata = []
for page in pages:
    policies_content.append(page.page_content)
    policies_metadata.append(page.metadata)

{'source': 'docs/HR_Policies.pdf', 'page': 126}
{'source': 'docs/HR_Policies.pdf', 'page': 127}
{'source': 'docs/HR_Policies.pdf', 'page': 128}
{'source': 'docs/HR_Policies.pdf', 'page': 129}
{'source': 'docs/HR_Policies.pdf', 'page': 130}
{'source': 'docs/HR_Policies.pdf', 'page': 131}
{'source': 'docs/HR_Policies.pdf', 'page': 132}
{'source': 'docs/HR_Policies.pdf', 'page': 133}
{'source': 'docs/HR_Policies.pdf', 'page': 134}
{'source': 'docs/HR_Policies.pdf', 'page': 135}


In [67]:
rds_policies = Redis.from_texts(
    policies_content,
    embeddings,
    metadatas=policies_metadata,
    redis_url="redis://redis-10042.c280.us-central1-2.gce.cloud.redislabs.com:10042",
    password="1iI48215k0GAEC3gzmpfPrXD2UDXYOYN",
    index_name="hr_policies"
)

In [68]:
rds_policies.write_schema("hr_policies.yaml")

### Load Admin-Guides

In [3]:
loader = PyPDFLoader("docs/Admin-Guide-Human-Capital-Management.pdf")
pages = loader.load()

In [4]:
for page in pages[len(pages) - 10:len(pages)]:
    print(page.metadata)

admin_content = []
admin_metadata = []
for page in pages:
    admin_content.append(page.page_content)
    admin_metadata.append(page.metadata)

{'source': 'docs/Admin-Guide-Human-Capital-Management.pdf', 'page': 2412}
{'source': 'docs/Admin-Guide-Human-Capital-Management.pdf', 'page': 2413}
{'source': 'docs/Admin-Guide-Human-Capital-Management.pdf', 'page': 2414}
{'source': 'docs/Admin-Guide-Human-Capital-Management.pdf', 'page': 2415}
{'source': 'docs/Admin-Guide-Human-Capital-Management.pdf', 'page': 2416}
{'source': 'docs/Admin-Guide-Human-Capital-Management.pdf', 'page': 2417}
{'source': 'docs/Admin-Guide-Human-Capital-Management.pdf', 'page': 2418}
{'source': 'docs/Admin-Guide-Human-Capital-Management.pdf', 'page': 2419}
{'source': 'docs/Admin-Guide-Human-Capital-Management.pdf', 'page': 2420}
{'source': 'docs/Admin-Guide-Human-Capital-Management.pdf', 'page': 2421}


In [7]:
rds_admin_guides = Redis.from_texts(
    admin_content,
    embeddings,
    metadatas=admin_metadata,
    redis_url="redis://redis-10042.c280.us-central1-2.gce.cloud.redislabs.com:10042",
    password="1iI48215k0GAEC3gzmpfPrXD2UDXYOYN",
    index_name="admin_guides"
)

In [8]:
rds_admin_guides.write_schema("admin_guides.yaml")

### Load Existing Worker HR Data

In [69]:
rds = Redis.from_existing_index(
    embeddings,
    index_name="worker_hr",
    redis_url="redis://redis-10042.c280.us-central1-2.gce.cloud.redislabs.com:10042",
    password="1iI48215k0GAEC3gzmpfPrXD2UDXYOYN",
    schema="worker_hr.yaml",
)

### Load Existing HR Policies

In [3]:
rds_policies = Redis.from_existing_index(
    embeddings,
    index_name="hr_policies",
    redis_url="redis://redis-10042.c280.us-central1-2.gce.cloud.redislabs.com:10042",
    password="1iI48215k0GAEC3gzmpfPrXD2UDXYOYN",
    schema="hr_policies.yaml",
)

### Load Existing Admin Guides

In [None]:
rds_admin_guides = Redis.from_existing_index(
    embeddings,
    index_name="admin_guides",
    redis_url="redis://redis-10042.c280.us-central1-2.gce.cloud.redislabs.com:10042",
    password="1iI48215k0GAEC3gzmpfPrXD2UDXYOYN",
    schema="admin_guides.yaml",
)

In [4]:
search_term = "41600 HR Services"
results = rds.similarity_search(search_term, k=3)
for result in results:
    print(result.metadata['Employee_ID'])


21001
21145
21519


In [5]:
results = rds.similarity_search_with_score(search_term, k=5, distance_threshold=0.30)
for result in results:
    print(f"Model: {result[0].metadata['Employee_ID']} --- Score: {result[1]}")

Model: 21001 --- Score: 0.1892
Model: 21145 --- Score: 0.1893
Model: 21519 --- Score: 0.1924
Model: 21143 --- Score: 0.194
Model: 21144 --- Score: 0.1969


In [None]:
from langchain.vectorstores.redis import RedisText, RedisNum, RedisTag

context = ""
query = "what is the address of Charles Bradley"
is_emp_id = RedisNum("Employee_ID") == 21072
results = rds.similarity_search_with_score(query, k=3) #, filter=is_emp_id if is_emp_id!=0 else None)
for result in results:
    print(f"Model: {result[0].metadata['Employee_ID']} --- Score: {result[1]} --- Type: {type(result[0])}")
    print(f"Content: {result[0].page_content}")
    context += result[0].page_content

In [None]:
employee_id = RedisNum("Employee_ID") == 21072
retriever = rds.as_retriever(search_type="similarity", 
                             search_kwargs={"k": 4, "filter": employee_id if employee_id!=0 else None})
docs = retriever.get_relevant_documents("")
for doc in docs:
    print(doc.metadata['Employee_ID'])

### Search EE Info with Citation    

In [28]:
template_string = """
Consider the information below: \
{context}
Provide answer to the {query}. Be very specific and relevant. \
For each answer, provide source in brackets based on what is in {sources}. \
"""

prompt_template = ChatPromptTemplate.from_template(template_string)
customer_messages = prompt_template.format_messages(context=context, 
                                                    query=query, sources={', '.join([result[0].metadata["Employee_ID"] for result in results])})

customer_response = chat(customer_messages)
print(customer_response.content)

The address of Charles Bradley is 48 Harbor Drive, Greenwich, CT, United States of America. [Employee_ID: 21072]


### Search Policies with Citation

In [34]:
context = ""
#query = "what are the leaves"
#query = "how can we appeal to the grievance decision"
query = "what are leave policies"
#query = "what are study leave policies"
query = "what are annual leave policies"

results_policies = rds_policies.similarity_search_with_score(query, k=3)
for result in results_policies:
    #print(result[0].metadata["source"])
    #print(result[0].page_content)
    context += result[0].page_content

In [None]:
%%time
template_string = """
Consider the information below: \
{context}
Provide answer to the {query}. Be very specific and relevant. \
For each answer, provide source in brackets. \
Do not repeat the same source information in the same line.
In the final response, always replace word KPPRA with GMS
"""

prompt_template = ChatPromptTemplate.from_template(template_string)
customer_messages = prompt_template.format_messages(context=context, 
                                                    query=query, 
                                                    sources="") #{', '.join([result[0].metadata["source"]
                                                                 #       for result in results_policies])})

customer_response = chat(customer_messages)
print(customer_response.content)

In [None]:
%%time
query_2 = "what are all the leave policies in the company"
#query_2 = "how can we appeal to the decisions made for grievance in the company"

customer_messages_2 = prompt_template.format_messages(context=context, 
                                                    query=query_2, 
                                                    sources="") #{', '.join([result[0].metadata["source"]
                                                                 #       for result in results_policies])})
breakpoint()
customer_response_2 = chat(customer_messages_2)
print(customer_response_2.content)

### Search Admin Guides

In [27]:
context = ""
#query = "how to manage flexible working arrangements"
#query = "what is the best way to manage return to work"
query = "what is the best way to handle requesting employees to return to work instead of working from home"
query = "how to configure security settings for the system"
query = "what are the options for different id types"

results = rds_admin_guides.similarity_search_with_score(query, k=3)
for result in results:
    #print(result[0].metadata)
    print(result[0].page_content)
    context += result[0].page_content
    #context += result[0].metadata['source']
    #context += result[0].metadata['page']

 | Worker Information  | 55
2.Access the Create Authority  task.
Configure licensing authorities for countries and country regions. When you edit a person type's ID
information, except beneficiaries, you can select the authority that issued the license ID.
3.Create Identification Document Types  on page 55.
4.Edit Business Processes .
Configure business processes and their security policies in the Personal Data functional area.
•Individual business processes for different ID types:
•Edit Government IDs
•Edit ID Information
•Edit Licenses
•Edit Other IDs
•Passports and Visa Change
•Edit ID Information  business process to modify any of the 4 ID types in 1 business process event.
Related Information
Reference
Reference: ID Management  on page 57
Workday Community: Global Matrix
Workday Community: National IDs in Workday
Create Identification Document Types
Prerequisites
•Configure reasons for using IDs.
•Configure licensing authorities if you're creating a license ID type.
•Security: Set

In [28]:
%%time
template_string = """
Consider the information below: \
{context}
Provide answer to the {query}. Be very specific and relevant. \
For each answer, provide source including page number in brackets. \
Do not repeat the same source information in the same line.
"""

prompt_template = ChatPromptTemplate.from_template(template_string)
customer_messages = prompt_template.format_messages(context=context, 
                                                    query=query, 
                                                    sources="") #{', '.join([result[0].metadata["source"]
                                                                 #       for result in results_policies])})

customer_response = chat(customer_messages)
print(customer_response.content)

  warn_deprecated(


The options for different ID types include:

1. Maintain Additional Government ID Types [Steps: Create Identification Document Types]
2. Maintain Custom ID Types [Steps: Create Identification Document Types]
3. Maintain License ID Types [Steps: Create Identification Document Types]
4. Maintain Passport ID Types [Steps: Create Identification Document Types]
5. Maintain Visa ID Types [Steps: Create Identification Document Types]

(Source: Workday Community: Global Matrix, Page 55)
CPU times: total: 188 ms
Wall time: 3.04 s
