In [35]:
# Installations
#!pip install python-dotenv
#!pip install langchain
#!pip install langchain_openai
#!pip install langchain-core
#!pip install langchain_text_splitters
#!pip install langchain_community
#!pip install -qU langchain-community faiss-cpu
#!pip install decouple


[notice] A new release of pip is available: 23.1.2 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
# Imports
import os
import json
from dotenv import load_dotenv
from langchain_openai import AzureChatOpenAI
from time import sleep
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage

In [2]:
# Load in environment variables
load_dotenv(override=True)

llm = AzureChatOpenAI(
    azure_endpoint=os.environ['AZURE_OPENAI_ENDPOINT'],
    api_key=os.environ['AZURE_OPENAI_APIKEY'],
    deployment_name=os.environ['AZURE_OPENAI_DEPLOYMENT_NAME'],
    model_name=os.environ['AZURE_OPENAI_MODEL_NAME'],
    api_version=os.environ['AZURE_OPENAI_API_VERSION'],
    temperature=0
)

In [3]:
env = f"""
{os.environ['AZURE_OPENAI_ENDPOINT']}
{os.environ['AZURE_OPENAI_APIKEY']}
{os.environ['AZURE_OPENAI_DEPLOYMENT_NAME']}
{os.environ['AZURE_OPENAI_MODEL_NAME']}
{os.environ['AZURE_OPENAI_API_VERSION']}
"""
print(env)


https://roydon-fyp-model-1.openai.azure.com/
98285108c5a842ae9e80b2092cfab2d8
FYP-azureopenai-gpt35-usnorth
gpt-35-turbo
2024-06-01



# Testing out prompting

In [40]:
# Instruction prompt
name = "Roydon"
persona = f"""You are an assistant whom will faciliate the conversation between a mute and a normal person. The mute persons name is {name} and the normal person is indicated as other person."""
task = """ You should be generating 3 responses which the mute person could choose from and the responses generated should follow the context of the conversation. 
        The topic should be interpreted from the conversation.
        If no topic could be interpreted, provide default responses that a person would start with such as greetings. 
        The responses should be what a person would say and should not include actions in a third person view. Your persona would be from the perspective of the mute person.
        In the case the responses are not chosen, the mute person could type their own response. Do take note of this response and continue the conversation from the response selected or typed out by the mute person.
        Ensure the responses generated will allow the conversation to flow smoothly."""
context = ""
condition = """It must be in english. An example of the 3 generated response would be in the format of 1 single string "Response 1: what you generated Response 2: what you generated Response 3: what you generated" all in one line."""

# Construct message object
instruction = f"{persona} {task} {condition}"
messages = [SystemMessage(content=instruction)]


response = llm.invoke("Say something in 10 words")

## OUTPUT
print("Print as Object: \n",response)
print("="*40)
print("Print as JSON: \n",response.to_json())
print("="*40)
print("Print content only: \n",response.content)



Print as Object: 
 content='Life is short, make the most of every moment given.' response_metadata={'token_usage': {'completion_tokens': 12, 'prompt_tokens': 13, 'total_tokens': 25}, 'model_name': 'gpt-35-turbo', 'system_fingerprint': 'fp_e49e4201a9', 'prompt_filter_results': [{'prompt_index': 0, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': False, 'detected': False}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}], 'finish_reason': 'stop', 'logprobs': None, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'protected_material_code': {'filtered': False, 'detected': False}, 'protected_material_text': {'filtered': False, 'detected': False}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}} 

In [41]:
# Prompt Engineered

#print(instruction)
query = input("Enter your message: ")

while query != "exit":
    print(f"Human : {query}\n")
    usermsg = HumanMessage(content=query)
    messages.append(usermsg)

    response = llm.invoke(messages)
    sleep(1)
    print(f"AI    : {response.content}\n")
    messages.append(response)

    ## re-prompt
    query = input("Enter your message (enter exit to end): ")

print("Conversation ended.")


Human : hi how have you been

AI    : Response 1: I've been good, thanks for asking. Response 2: Not too bad, just keeping busy. Response 3: I've been doing well, how about you?

Human : hi



KeyboardInterrupt: 

In [42]:
# Print Chatlog
print("Final Conversation Log")
print(messages)

Final Conversation Log
[SystemMessage(content='You are an assistant whom will faciliate the conversation between a mute and a normal person. The mute persons name is Roydon and the normal person is indicated as other person.  You should be generating 3 responses which the mute person could choose from and the responses generated should follow the context of the conversation. \n        The topic should be interpreted from the conversation.\n        If no topic could be interpreted, provide default responses that a person would start with such as greetings. \n        The responses should be what a person would say and should not include actions in a third person view. Your persona would be from the perspective of the mute person.\n        In the case the responses are not chosen, the mute person could type their own response. Do take note of this response and continue the conversation from the response selected or typed out by the mute person.\n        Ensure the responses generated will

# Generating json file mock data

In [1]:
import json

In [8]:
# Sample declaration: The person is Roydon, with a personality of optimistic. The topic is on football and he is an Arsenal Fan 
"The person is , with a personality of . The topic is on ."

# Instruction prompt
persona = """You would be assisting in creating mock data to be used"""
task = """ I am trying to create mock data to showcase a person's personality through the conversations that he has. I will provide you with the topic and the persons personality. Generate replies with another person based on that."""
condition = """You would be generating a conversation of at least 40 different back and forth replies for the person Roydon.
For example, 1 reply between 2 people, Xavier and Roydon would be:
Roydon says something, then Xavier replies. Each reply would count as 1. """
#context = f"Person name is {name}, with a personality of {personality}. The topic is on {topic}"

# Construct message object
instruction = f"{persona} {task} {condition}"
messages = [SystemMessage(content=instruction)]

query = input("Enter your message: ")

print(f"Human : {query}\n")
usermsg = HumanMessage(content=query)
messages.append(usermsg)

response = llm.invoke(messages)

# Parsing the conversation
lines = response.content.split('\n')
responses = {}
response_index = 1  # To number each response pair

# Iterate over lines in pairs
for i in range(0, len(lines), 2):
    response_key = f"Response {response_index}"
    responses[response_key] = {}

    # Process first speaker
    if i < len(lines) and ": " in lines[i]:
        speaker, message = lines[i].split(": ", 1)
        responses[response_key][speaker] = message.strip()

    # Process second speaker, if exists
    if i + 1 < len(lines) and ": " in lines[i + 1]:
        speaker, message = lines[i + 1].split(": ", 1)
        responses[response_key][speaker] = message.strip()

    response_index += 1

with open('C:\\Roydon\\Github\\FYP_Application\\MuteCompanion\\backend\mockdata\\pet2.json', 'a') as f:
    json.dump(responses, f, indent=4)
    f.write("\n") 

Human : The person is Roydon, with a mood of excited . The topic is on what Roydon plans to do with his new golden retriever dog. Make up 3 new things Roydon plans to do..



# Using an embedding model

In [3]:
from langchain_core.document_loaders import BaseLoader
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [4]:
documents = []
for filename in os.listdir('C:\\Roydon\\Github\\FYP_Application\\MuteCompanion\\backend\mockdata\\'):
    if filename.endswith(".json"):
        with open(f'C:\\Roydon\\Github\\FYP_Application\\MuteCompanion\\backend\mockdata\\{filename}') as f:
            data = json.load(f)
            for response_label, conversation in data.items():
                doc_content = json.dumps(conversation)
                doc_metadata = {"label": response_label, "source": filename}
                documents.append(Document(page_content=doc_content, metadata=doc_metadata))

print(len(documents))

61


In [5]:
documents[0].page_content

'{"Roydon": "Hey there! Can\'t wait for the new football season to start, hoping for a great one for Arsenal!", "John": "Hey Roydon! Yeah, it\'s always exciting to see how your team will perform. Optimistic as always, I see!"}'

In [6]:
def parse_json_to_dict(json_string):
    return json.loads(json_string)

def convert_dict_to_text(conversation_dict):
    text = ""
    for speaker, message in conversation_dict.items():
        text += f"{speaker}: {message}\n\n"
    return text.strip()

In [10]:
test = convert_dict_to_text(parse_json_to_dict(documents[0].page_content))
print(test)

Roydon: Hey there! Can't wait for the new football season to start, hoping for a great one for Arsenal!

John: Hey Roydon! Yeah, it's always exciting to see how your team will perform. Optimistic as always, I see!


In [9]:
# Set up the text splitter
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)

# Process and chunk each document
chunked_documents = []
for document in documents:
    conversation_dict = parse_json_to_dict(document.page_content)
    conversation_text = convert_dict_to_text(conversation_dict)
    chunks = splitter.split_text(conversation_text)
    chunked_documents.extend(chunks)

# Optionally, print out some chunked documents to verify
for doc in chunked_documents[:5]:  # Print first 5 chunked documents
    print("Chunked Document:", doc)

print(len(chunked_documents))

Chunked Document: Roydon: Hey there! Can't wait for the new football season to start, hoping for a great one for Arsenal!

John: Hey Roydon! Yeah, it's always exciting to see how your team will perform. Optimistic as always, I see!
Chunked Document: Roydon: Absolutely! Gotta stay positive, right? What team do you support?

John: I'm a Manchester United fan, so we might have some friendly rivalry this season!
Chunked Document: Roydon: Haha, definitely! Looking forward to some intense matches between our teams. Who do you think will be Arsenal's key player this season?

John: I have a feeling Aubameyang will continue to shine for Arsenal. His goals are always a game-changer!
Chunked Document: Roydon: I couldn't agree more! Aubameyang is a true asset to the team. Do you think Arsenal will make it to the top four this season?

John: It's definitely possible with the right strategy and teamwork. What are your thoughts on Arsenal's new signings?
Chunked Document: Roydon: I'm optimistic about

In [7]:
from langchain_openai.embeddings import AzureOpenAIEmbeddings
embeddings = AzureOpenAIEmbeddings(azure_endpoint=os.environ['AZURE_OPENAI_ENDPOINT'], 
                                   api_key=os.environ['AZURE_OPENAI_APIKEY'], 
                                   model=os.environ['TEXT_EMBEDDING_MODEL_NAME'],
                                   azure_deployment=os.environ['TEXT_EMBEDDING_DEPLOYMENT_NAME'])

embeddings

AzureOpenAIEmbeddings(client=<openai.resources.embeddings.Embeddings object at 0x00000204D3BE8D50>, async_client=<openai.resources.embeddings.AsyncEmbeddings object at 0x00000204D3C2AC50>, model='text-embedding-ada-002', dimensions=None, deployment='FYP-azureopenai-text-embedding-centralnorthus', openai_api_version='2023-05-15', openai_api_base=None, openai_api_type='azure', openai_proxy='', embedding_ctx_length=8191, openai_api_key=SecretStr('**********'), openai_organization=None, allowed_special=None, disallowed_special=None, chunk_size=2048, max_retries=2, request_timeout=None, headers=None, tiktoken_enabled=True, tiktoken_model_name=None, show_progress_bar=False, model_kwargs={}, skip_empty=False, default_headers=None, default_query=None, retry_min_seconds=4, retry_max_seconds=20, http_client=None, http_async_client=None, check_embedding_ctx_length=True, azure_endpoint='https://roydon-fyp-model-1.openai.azure.com/', azure_ad_token=None, azure_ad_token_provider=None, validate_base_

In [5]:
for doc in documents:
    doc.metadata['file_name'] = doc.metadata['source']
    print(doc)

page_content='{"Roydon": "Hey there! Can't wait for the new football season to start, hoping for a great one for Arsenal!", "John": "Hey Roydon! Yeah, it's always exciting to see how your team will perform. Optimistic as always, I see!"}' metadata={'label': 'Response 1', 'source': 'football.json', 'file_name': 'football.json'}
page_content='{"Roydon": "Absolutely! Gotta stay positive, right? What team do you support?", "John": "I'm a Manchester United fan, so we might have some friendly rivalry this season!"}' metadata={'label': 'Response 2', 'source': 'football.json', 'file_name': 'football.json'}
page_content='{"Roydon": "Haha, definitely! Looking forward to some intense matches between our teams. Who do you think will be Arsenal's key player this season?", "John": "I have a feeling Aubameyang will continue to shine for Arsenal. His goals are always a game-changer!"}' metadata={'label': 'Response 3', 'source': 'football.json', 'file_name': 'football.json'}
page_content='{"Roydon": "I

In [8]:
# Local vector store
from langchain_community.vectorstores import FAISS

faiss_vectorstore_v3 = FAISS.from_documents(documents, embeddings)

faiss_vectorstore_v3

<langchain_community.vectorstores.faiss.FAISS at 0x204d3901150>

In [39]:
# Local vector store
from langchain_community.vectorstores import FAISS

faiss_vectorstore_text = FAISS.from_texts(chunked_documents, embeddings)

faiss_vectorstore_text

<langchain_community.vectorstores.faiss.FAISS at 0x2534ee60850>

In [9]:
## Saving Vector Store
faiss_vectorstore_v3.save_local("C:\\Roydon\\Github\\FYP_Application\\MuteCompanion\\backend\\vector_store\\vectorstores\\faiss_vs_v3")

## Saving Vector Store Text (Testing)
#faiss_vectorstore_text.save_local("C:\\Roydon\\Github\\FYP_Application\\MuteCompanion\\backend\\vector_store\\vectorstores\\faiss_vs_text")

# Load Vector Store & Context Retrieval

In [42]:
from langchain_community.vectorstores import FAISS
from langchain_openai.embeddings import AzureOpenAIEmbeddings

embeddings = AzureOpenAIEmbeddings(azure_endpoint=os.environ['AZURE_OPENAI_ENDPOINT'], 
                                   api_key=os.environ['AZURE_OPENAI_APIKEY'], 
                                   model=os.environ['TEXT_EMBEDDING_MODEL_NAME'],
                                   azure_deployment=os.environ['TEXT_EMBEDDING_DEPLOYMENT_NAME'])

loaded_faiss_vs = FAISS.load_local("C:\\Roydon\\Github\\FYP_Application\\MuteCompanion\\backend\\vector_store\\vectorstores\\faiss_vs", embeddings=embeddings, allow_dangerous_deserialization=True)
loaded_faiss_vs_text = FAISS.load_local("C:\\Roydon\\Github\\FYP_Application\\MuteCompanion\\backend\\vector_store\\vectorstores\\faiss_vs_text", embeddings=embeddings, allow_dangerous_deserialization=True)


In [43]:
query = "How have you been Roydon, what are you up to?"
context = loaded_faiss_vs.similarity_search(query, k=5)

for con in context:
    print("=" * 30, "\n", con.metadata)
    print(con.page_content)

 {'label': 'Response 1', 'source': 'football2.json'}
{"Roydon": "Hey there! Did you catch the Arsenal game last night? What a thrilling match!", "John": "Hey Roydon! Yes, I watched it. Arsenal played really well, didn't they?"}
 {'label': 'Response 1', 'source': 'football.json'}
{"Roydon": "Hey there! Can't wait for the new football season to start, hoping for a great one for Arsenal!", "John": "Hey Roydon! Yeah, it's always exciting to see how your team will perform. Optimistic as always, I see!"}
 {'label': 'Response 1', 'source': 'pet.json'}
{"Roydon": "Guess what, I just got a new pet dog!", "Jacob": "That's awesome! What breed is it?"}
 {'label': 'Response 9', 'source': 'travel.json'}
{"Roydon": "Thanks, Dory. I appreciate you being there for me, even when I'm at my angriest."}
 {'label': 'Response 2', 'source': 'football.json'}
{"Roydon": "Absolutely! Gotta stay positive, right? What team do you support?", "John": "I'm a Manchester United fan, so we might have some friendly rival

In [45]:
query = "How have you been Roydon, what are you up to?"
context = loaded_faiss_vs_text.similarity_search(query, k=5)

for con in context:
    print("=" * 30, "\n")
    print(con.page_content)


Roydon: Hey there! Did you catch the Arsenal game last night? What a thrilling match!

John: Hey Roydon! Yes, I watched it. Arsenal played really well, didn't they?

Roydon: Hey there! Can't wait for the new football season to start, hoping for a great one for Arsenal!

John: Hey Roydon! Yeah, it's always exciting to see how your team will perform. Optimistic as always, I see!

Roydon: Guess what, I just got a new pet dog!

Jacob: That's awesome! What breed is it?

Roydon: Hey Yas, I'm still fuming about my terrible experience in Thailand, but you know what? I'm super excited about my upcoming trip to Japan!

Yas: Oh no, what happened in Thailand? But that's great to hear about Japan! What are you looking forward to the most?

Roydon: Thanks, Dory. I appreciate you being there for me, even when I'm at my angriest.


# Testing the RAG System Query

In [8]:
from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
from langchain_core.prompts import PromptTemplate

from langchain_community.vectorstores import FAISS
from langchain_openai.embeddings import AzureOpenAIEmbeddings

# PROMPT Engineering
name = "Roydon"
other_person = "Jacob"
persona = f"""You are an assistant whom will faciliate the conversation between a mute and a normal person. The mute persons name is {name} and the normal person is indicated as {other_person}."""
task = """ You should be generating 3 responses which the mute person could choose from and the responses generated should follow the context of the conversation. 
        The topic should be interpreted from the conversation.
        If no topic could be interpreted, provide default responses that a person would start with such as greetings. 
        The responses should be what a person would say and should not include actions in a third person view. Your persona would be from the perspective of the mute person.
        In the case the responses are not chosen, the mute person could type their own response. Do take note of this response and continue the conversation from the response selected or typed out by the mute person.
        Ensure the responses generated will allow the conversation to flow smoothly."""
condition = """It must be in english. An example of the 3 generated response would be in the format of 1 single string "Response 1: what you generated Response 2: what you generated Response 3: what you generated" all in one line."""

# Construct message object
instruction = f"{persona} {task} {condition}"
messages = [SystemMessage(content=instruction)]

## Retrieve and consolidate context based on query
query = "How have you been Roydon?"
loaded_faiss_vs = FAISS.load_local("C:\\Roydon\\Github\\FYP_Application\\MuteCompanion\\backend\\vector_store\\vectorstores\\faiss_vs", embeddings=embeddings, allow_dangerous_deserialization=True)
context = loaded_faiss_vs.similarity_search(query, k=3)
contexts = ""

for con in context:
    contexts += con.page_content

context_query = f"""
    Please generate the responses for {name} based on the following context provided below.\n
    Context: {contexts}
    Query: {query}
"""

## Append the context and user query to chatlog
messages.append(HumanMessage(content=context_query))

answer = llm.invoke(messages)
print(answer.content)


NameError: name 'embeddings' is not defined

In [16]:
!pip uninstall decouple
#!pip install python-decouple

^C


In [15]:
import openai
from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
from langchain_core.prompts import PromptTemplate
from langchain_community.vectorstores import FAISS
from langchain_openai.embeddings import AzureOpenAIEmbeddings
from decouple import config

embeddings = AzureOpenAIEmbeddings(azure_endpoint=os.environ['AZURE_OPENAI_ENDPOINT'], 
                                   api_key=os.environ['AZURE_OPENAI_APIKEY'], 
                                   model=os.environ['TEXT_EMBEDDING_MODEL_NAME'],
                                   azure_deployment=os.environ['TEXT_EMBEDDING_DEPLOYMENT_NAME'])

# Environment variables
openai.organisation = config("OPEN_AI_ORG")
openai.api_key = config("OPEN_AI_KEY")

# PROMPT Engineering
name = "Roydon"
other_person = "Jacob"
persona = f"""You are an assistant whom will faciliate the conversation between a mute and a normal person. The mute persons name is {name} and the normal person is indicated as {other_person}."""
task = """ You should be generating 3 responses which the mute person could choose from and the responses generated should follow the context of the conversation. 
        The topic should be interpreted from the conversation.
        If no topic could be interpreted, provide default responses that a person would start with such as greetings. 
        The responses should be what a person would say and should not include actions in a third person view. Your persona would be from the perspective of the mute person.
        In the case the responses are not chosen, the mute person could type their own response. Do take note of this response and continue the conversation from the response selected or typed out by the mute person.
        Ensure the responses generated will allow the conversation to flow smoothly."""
condition = """It must be in english. An example of the 3 generated response would be in the format of 1 single string "Response 1: what you generated Response 2: what you generated Response 3: what you generated" all in one line."""

# Construct message object
instruction = f"{persona} {task} {condition}"
messages = [SystemMessage(content=instruction)]

## Retrieve and consolidate context based on query
query = "How have you been Roydon?"
loaded_faiss_vs = FAISS.load_local("C:\\Roydon\\Github\\FYP_Application\\MuteCompanion\\backend\\vector_store\\vectorstores\\faiss_vs", embeddings=embeddings, allow_dangerous_deserialization=True)
context = loaded_faiss_vs.similarity_search(query, k=3)
contexts = ""

for con in context:
    contexts += con.page_content

context_query = f"""
    Please generate the responses for {name} based on the following context provided below.\n
    Context: {contexts}
    Query: {query}
"""

## Append the context and user query to chatlog
messages.append(HumanMessage(content=context_query))

raw_response = openai.chat.completions.create(
            model="gpt-3.5-turbo",
            messages = messages 
)

response_choices = raw_response.choices[0].message.content

print(response_choices)

ImportError: cannot import name 'config' from 'decouple' (c:\Users\roydo\AppData\Local\Programs\Python\Python311\Lib\site-packages\decouple\__init__.py)

# Adding and deleting from vector store

In [1]:
from dotenv import load_dotenv
from langchain_openai.embeddings import AzureOpenAIEmbeddings

from langchain_community.vectorstores import FAISS
import os
from uuid import uuid4

from langchain_core.documents import Document

In [2]:
# Load in environment variables
load_dotenv(override=True)

embeddings = AzureOpenAIEmbeddings(azure_endpoint=os.environ['AZURE_OPENAI_ENDPOINT'], 
                                   api_key=os.environ['AZURE_OPENAI_APIKEY'], 
                                   model=os.environ['TEXT_EMBEDDING_MODEL_NAME'],
                                   azure_deployment=os.environ['TEXT_EMBEDDING_DEPLOYMENT_NAME'])


In [3]:
loaded_faiss_vs = FAISS.load_local("C:\\Roydon\\Github\\FYP_Application\\MuteCompanion\\backend\\vector_store\\vectorstores\\faiss_vs", embeddings=embeddings, allow_dangerous_deserialization=True)

In [23]:
vectorized_response = """{"John": "testing out", "Roydon": "testing testing"}"""
response_label = "Response 1"
filename = "current_conversation.json"
doc_metadata = {"label": response_label, "source": filename,  'file_name': filename}

response_document = Document(page_content=vectorized_response, metadata=doc_metadata)

documents = [response_document]
ids = [uuid4()]

loaded_faiss_vs.add_documents(documents=documents, ids=ids)

[UUID('257c5c24-764e-4535-bc5d-f2ff0e616dc8')]

In [7]:
# Query directly
results = loaded_faiss_vs.similarity_search(
    "",
    k=1,
    filter={"source": "current_conversation.json"},
)

print(results)

[]


In [9]:
# Delete the document:
loaded_faiss_vs.delete(ids=['e2846e7a-6e9c-4beb-9a2c-259d7eb41fb6'])

ValueError: Some specified ids do not exist in the current store. Ids not found: {'e2846e7a-6e9c-4beb-9a2c-259d7eb41fb6'}

: 

In [26]:
# Query directly
results = loaded_faiss_vs.similarity_search(
    """{"John": "testing out", "Roydon": "testing testing"}""",
    k=1,
    filter={"source": filename},
)

print(results)

[]
