In [71]:
import streamlit as st

from langchain.document_loaders import RecursiveUrlLoader, TextLoader, JSONLoader
from langchain.document_transformers import Html2TextTransformer
from langchain.docstore.document import Document
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
from langchain.vectorstores.faiss import FAISS
from langchain.vectorstores import Chroma
from langchain.callbacks import StreamlitCallbackHandler
from langchain.tools import tool
from langchain.tools.json.tool import JsonSpec
from langchain.agents import OpenAIFunctionsAgent, AgentExecutor, load_tools
#from langchain_experimental.tools.python.tool import PythonREPLTool
from langchain.agents.agent_toolkits import create_retriever_tool, JsonToolkit
from langchain.agents.openai_functions_agent.agent_token_buffer_memory import (AgentTokenBufferMemory,)
from langchain.chat_models import ChatOpenAI
from langchain.schema import SystemMessage, AIMessage, HumanMessage
from langchain.prompts import MessagesPlaceholder
from langsmith import Client
from langchain_community.vectorstores.redis import Redis

import os, openai, requests, json, zeep, datetime, pandas as pd
from requests.auth import HTTPBasicAuth
from dotenv import load_dotenv, find_dotenv
from zeep.wsse.username import UsernameToken

_ = load_dotenv(find_dotenv()) # read local .env file
openai.api_key  = os.getenv('OPENAI_API_KEY')
#model = "gpt-4-1106-preview"
model = "gpt-3.5-turbo"

embeddings = OpenAIEmbeddings()

TENANT = 'wdmarketdesk_dpt1'
WD_USER_ID = os.getenv('WD_USER_ID')
WD_PWD = os.getenv('WD_PWD')
WD_Worker_URL = "https://impl-services1.wd12.myworkday.com/ccx/service/customreport2/wdmarketdesk_dpt1/xjin-impl/Worker_Data_2?format=json"
WD_Absence_URL = "https://impl-services1.wd12.myworkday.com/ccx/service/customreport2/wdmarketdesk_dpt1/xjin-impl/Worker_Absence_Data?format=json"
WD_COMP_URL = "https://impl-services1.wd12.myworkday.com/ccx/service/customreport2/wdmarketdesk_dpt1/xjin-impl/Worker_Comp_Data?format=json"
WD_STAFFING_WSDL_URL = "https://impl-services1.wd12.myworkday.com/ccx/service/wdmarketdesk_dpt1/Staffing/v41.1?wsdl"
WD_HR_WSDL_URL = "https://impl-services1.wd12.myworkday.com/ccx/service/wdmarketdesk_dpt1/Human_Resources/v42.0?wsdl"

basicAuth = HTTPBasicAuth(WD_USER_ID, WD_PWD)
#wd_hr_client = zeep.Client(WD_HR_WSDL_URL, wsse=UsernameToken(WD_USER_ID + '@' + TENANT, WD_PWD)) 
#wd_staffing_client = zeep.Client(WD_STAFFING_WSDL_URL, wsse=UsernameToken(WD_USER_ID + '@' + TENANT, WD_PWD))

In [131]:
#rds.drop_index('worker_hr', delete_documents=True)

True

In [132]:
def get_worker_data_raas():
    response = requests.get(WD_Worker_URL, auth = basicAuth)
    responseJson = json.dumps(json.loads(response.content))
    
    return response #responseJson

In [133]:
"""Initializes with all of worker data. If any information is not found, \
please use this tool as the default tool to look for the data needed. \
Do not try to get the same data more than 2 times.
"""
print("Initializing with Worker data in Redis")
worker_json = get_worker_data_raas()
#worker_df = pd.DataFrame(worker_json)

#print(f"Worker DF: {worker_df}")

Initializing with Worker data in Redis


In [134]:
worker_data = json.loads(worker_json.content)
worker_df = pd.DataFrame(worker_data['Report_Entry'])

print(worker_data['Report_Entry'][0])
print(len(worker_data['Report_Entry']))

worker_content = worker_df.apply(lambda row: ', '.join([f'{k} is {v}' for k, v in row.map(str).items()]), axis=1).transform(''.join)
print(worker_content[0])
print(type(worker_content))

{'Employee_Type': 'Regular', 'country': 'United States of America', 'city': 'San Francisco', 'Team_Members': 'Robert Hsing; Pedro Santiago; Tammy Calhoun; Henry Lynch; Logan McNeil; Alex Garcia', 'Cost_Center': '41600 HR Services', 'Address_-_Formatted_Line_1': '42 Laurel Street', 'Employee_ID': '21001', 'Employee_Legal_Full_Name': 'Logan McNeil', 'State_ISO_Code': 'CA', 'Manager_Name': 'Joy Banks', 'businessTitle': 'Vice President, Human Resources', 'FTE': '1', 'Company_Name': 'Global Modern Services, Inc. (USA)', 'Job_Profile': 'Vice President, Human Resources', 'Age': '52', 'Time_Type': 'Full time'}
488
Employee_Type is Regular, country is United States of America, city is San Francisco, Team_Members is Robert Hsing; Pedro Santiago; Tammy Calhoun; Henry Lynch; Logan McNeil; Alex Garcia, Cost_Center is 41600 HR Services, Address_-_Formatted_Line_1 is 42 Laurel Street, Employee_ID is 21001, Employee_Legal_Full_Name is Logan McNeil, State_ISO_Code is CA, Manager_Name is Joy Banks, busi

In [135]:
rds = Redis.from_texts(
    worker_content,
    embeddings,
    metadatas=worker_data['Report_Entry'],
    redis_url="redis://redis-10042.c280.us-central1-2.gce.cloud.redislabs.com:10042",
    password="1iI48215k0GAEC3gzmpfPrXD2UDXYOYN",
    index_name="worker_hr"
)

In [136]:
rds.write_schema("worker_hr.yaml")

### Load Existing Worker HR Data

In [141]:
rds = Redis.from_existing_index(
    embeddings,
    index_name="worker_hr",
    redis_url="redis://redis-10042.c280.us-central1-2.gce.cloud.redislabs.com:10042",
    password="1iI48215k0GAEC3gzmpfPrXD2UDXYOYN",
    schema="worker_hr.yaml",
)

In [137]:
search_term = "41600 HR Services"
results = rds.similarity_search(search_term, k=3)
for result in results:
    print(result.metadata['Employee_ID'])


21001
21145
21519


In [125]:
results = rds.similarity_search_with_score(search_term, k=5, distance_threshold=0.30)
for result in results:
    print(f"Model: {result[0].metadata['Employee_ID']} --- Score: {result[1]}")

Model: 21001 --- Score: 0.1894
Model: 21145 --- Score: 0.1908
Model: 21519 --- Score: 0.1913
Model: 21148 --- Score: 0.198
Model: 21143 --- Score: 0.2006


In [126]:
from langchain.vectorstores.redis import RedisText, RedisNum, RedisTag

query = "what is the employee id of Alexooo Man"
is_emp_id = RedisNum("Employee_ID") == 21072
results = rds.similarity_search_with_score("", k=3, filter=is_emp_id if is_emp_id!=0 else None)
for result in results:
    print(f"Model: {result[0].metadata['Employee_ID']} --- Score: {result[1]}")

Model: 21072 --- Score: 0.2104


In [142]:
employee_id = RedisNum("Employee_ID") == 21072
retriever = rds.as_retriever(search_type="similarity", 
                             search_kwargs={"k": 4, "filter": employee_id if employee_id!=0 else None})
docs = retriever.get_relevant_documents("")
for doc in docs:
    print(doc.metadata['Employee_ID'])

21072
