# Generative AI - Prompt Agent

In [218]:
import os
from dotenv import load_dotenv
import openai
from langchain_openai import ChatOpenAI
# JSON loader
from langchain_community.document_loaders import JSONLoader
import json
from pathlib import Path
# Vector DB
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
# prompt
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.chains import create_retrieval_chain, create_history_aware_retriever
from langchain_core.messages import HumanMessage, AIMessage
from langchain_core.prompts import ChatPromptTemplate
# summarization
from langchain.prompts import PromptTemplate, ChatPromptTemplate
from langchain.chains import LLMChain, MapReduceChain, load_summarize_chain
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain_community.document_loaders import TextLoader

# 1. Static variable

In [219]:
vectorDB_declare = "faiss_vector_declare_index_db"
vectorDB_handle = "faiss_vector_handle_index_db"
vectorDB_exif_keyword = "faiss_vector_exif_keyword_index_db"
code_purpose_directory = "code_purpose"
code_purpose_file = code_purpose_directory+"//code_purpose.txt"

# 2. Function

In [220]:
# Setup model
load_dotenv()
api_key = os.getenv('OPENAI_API_KEY')
openai.api_key = api_key
llm = ChatOpenAI(model="gpt-4-turbo",temperature=0)
# Setup embedding
embeddings = OpenAIEmbeddings()
# loading database
db_connect = FAISS.load_local(vectorDB_declare, embeddings,allow_dangerous_deserialization=True)
print(db_connect.index.ntotal) 
#print(db_connect.index_to_docstore_id)

64495


In [221]:
# Function create directory
def create_directory(directory_path):
    if not os.path.exists(directory_path):
        os.makedirs(directory_path)
        print(f"Directory '{directory_path}' created successfully.")
    else:
        print(f"Directory '{directory_path}' already exists.")
# Funtion write string to text file
def write_string_to_file(string, file_path):
    with open(file_path, 'w') as file:
        file.write(string)
    print(f"String written to '{file_path}' successfully.")
# Function delete file
def delete_file(file_path):
    if os.path.exists(file_path):
        os.remove(file_path)
        print(f"File '{file_path}' deleted successfully.")
    else:
        print(f"File '{file_path}' does not exist.")

# 3. Main

In [222]:
# Create directory
create_directory(code_purpose_directory)

Directory 'code_purpose' already exists.


In [223]:
# Create retriever
retriever = db_connect.as_retriever()
retriever

VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x7f856e884f40>)

In [224]:
query = """
"final e[] array = { new e(\"NewSubfileType\", 254, 4), new e(\"SubfileType\", 255, 4), new e(\"ImageWidth\", 256, 3, 4), new e(\"ImageLength\", 257, 3, 4), new e(\"BitsPerSample\", 258, 3), new e(\"Compression\", 259, 3), new e(\"PhotometricInterpretation\", 262, 3), new e(\"ImageDescription\", 270, 2), new e(\"Make\", 271, 2), new e(\"Model\", 272, 2), new e(\"StripOffsets\", 273, 3, 4), new e(\"Orientation\", 274, 3), new e(\"SamplesPerPixel\", 277, 3), new e(\"RowsPerStrip\", 278, 3, 4), new e(\"StripByteCounts\", 279, 3, 4), new e(\"XResolution\", 282, 5), new e(\"YResolution\", 283, 5), new e(\"PlanarConfiguration\", 284, 3), new e(\"ResolutionUnit\", 296, 3), new e(\"TransferFunction\", 301, 3), new e(\"Software\", 305, 2), new e(\"DateTime\", 306, 2), new e(\"Artist\", 315, 2), new e(\"WhitePoint\", 318, 5), new e(\"PrimaryChromaticities\", 319, 5), new e(\"SubIFDPointer\", 330, 4), new e(\"JPEGInterchangeFormat\", 513, 4), new e(\"JPEGInterchangeFormatLength\", 514, 4), new e(\"YCbCrCoefficients\", 529, 5), new e(\"YCbCrSubSampling\", 530, 3), new e(\"YCbCrPositioning\", 531, 3), new e(\"ReferenceBlackWhite\", 532, 5), new e(\"Copyright\", 33432, 2), new e(\"ExifIFDPointer\", 34665, 4), new e(\"GPSInfoIFDPointer\", 34853, 4), new e(\"SensorTopBorder\", 4, 4), new e(\"SensorLeftBorder\", 5, 4), new e(\"SensorBottomBorder\", 6, 4), new e(\"SensorRightBorder\", 7, 4), new e(\"ISO\", 23, 3), new e(\"JpgFromRaw\", 46, 7), new e(\"Xmp\", 700, 1) };",
"""
docs = retriever.invoke(query)
print(docs)

[Document(page_content='final e[] array = { new e("NewSubfileType", 254, 4), new e("SubfileType", 255, 4), new e("ImageWidth", 256, 3, 4), new e("ImageLength", 257, 3, 4), new e("BitsPerSample", 258, 3), new e("Compression", 259, 3), new e("PhotometricInterpretation", 262, 3), new e("ImageDescription", 270, 2), new e("Make", 271, 2), new e("Model", 272, 2), new e("StripOffsets", 273, 3, 4), new e("Orientation", 274, 3), new e("SamplesPerPixel", 277, 3), new e("RowsPerStrip", 278, 3, 4), new e("StripByteCounts", 279, 3, 4), new e("XResolution", 282, 5), new e("YResolution", 283, 5), new e("PlanarConfiguration", 284, 3), new e("ResolutionUnit", 296, 3), new e("TransferFunction", 301, 3), new e("Software", 305, 2), new e("DateTime", 306, 2), new e("Artist", 315, 2), new e("WhitePoint", 318, 5), new e("PrimaryChromaticities", 319, 5), new e("SubIFDPointer", 330, 4), new e("JPEGInterchangeFormat", 513, 4), new e("JPEGInterchangeFormatLength", 514, 4), new e("YCbCrCoefficients", 529, 5), new

In [225]:
# First, we need a prompt that we can pass into an LLM to generate this search query
prompt = ChatPromptTemplate.from_messages([
    MessagesPlaceholder(variable_name="chat_history"),
    ("user","{input}"),
    ("user","Given the above programming code block, generate a search query to look up information relevant to the conversation")
])
print(prompt)

input_variables=['chat_history', 'input'] input_types={'chat_history': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]]} messages=[MessagesPlaceholder(variable_name='chat_history'), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], template='{input}')), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='Given the above programming code block, generate a search query to look up information relevant to the conversation'))]


In [226]:
prompt = ChatPromptTemplate.from_messages(
    [
        ("placeholder", "{chat_history}"),
        ("user", "{input}"),
        (
            "user",
            "Given the above conversation, generate a search query to look up to get information relevant to the conversation",
        ),
    ]
)

retriever_chain = create_history_aware_retriever(llm, retriever, prompt)

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "Answer the user's questions based on the below context:\n\n{context}",
        ),
        ("placeholder", "{chat_history}"),
        ("user", "{input}"),
    ]
)
document_chain = create_stuff_documents_chain(llm, prompt)

qa = create_retrieval_chain(retriever_chain, document_chain)

In [227]:
question = f"""
What is Exif metadata in this list (Make, Model, Software, GPS, Datetime) included in this code block below? \n
{docs}
"""
#print(question)
result = qa.invoke({"input": question})
code_purpose = result["answer"]
print(code_purpose)
#print(type(code_purpose))
write_string_to_file(code_purpose, code_purpose_file)

The Exif metadata included in the code block you provided are as follows:

1. **Make** - Tagged with the identifier `271` and a type of `2`, which typically represents ASCII strings. This tag stores the manufacturer of the camera that captured the image.

2. **Model** - Tagged with the identifier `272` and a type of `2`. This tag stores the model number or name of the camera that captured the image.

3. **Software** - Tagged with the identifier `305` and a type of `2`. This tag stores the software or firmware used to generate the image.

4. **DateTime** - Tagged with the identifier `306` and a type of `2`. This tag stores the date and time when the image was created.

5. **GPSInfoIFDPointer** - Tagged with the identifier `34853` and a type of `4`. This is a pointer to a specific IFD (Image File Directory) that contains GPS information related to the image. This tag itself does not store GPS data but points to where it can be found.

These tags are part of the Exif (Exchangeable Image F

In [228]:
# Summary
prompt_template = """
You are provided with a paragraph listing the types of EXIF metadata in the code block.
----------
{text}
----------
Question: Examine the paragraph and summarize which types of EXIF metadata the above paragraph relates to in the inclusion list (Make, Model, Software, GPS, Datetime) if EXIF metadata exists. 
Please only respond with the EXIF metadata name in this list (Make, Model, Software, GPS, Datetime).
EXIF metadata name:
"""
print(prompt_template)


You are provided with a paragraph listing the types of EXIF metadata in the code block.
----------
{text}
----------
Question: Examine the paragraph and summarize which types of EXIF metadata the above paragraph relates to in the inclusion list (Make, Model, Software, GPS, Datetime) if EXIF metadata exists. 
Please only respond with the EXIF metadata name in this list (Make, Model, Software, GPS, Datetime).
EXIF metadata name:



In [229]:
prompt = PromptTemplate.from_template(prompt_template)
prompt

PromptTemplate(input_variables=['text'], template='\nYou are provided with a paragraph listing the types of EXIF metadata in the code block.\n----------\n{text}\n----------\nQuestion: Examine the paragraph and summarize which types of EXIF metadata the above paragraph relates to in the inclusion list (Make, Model, Software, GPS, Datetime) if EXIF metadata exists. \nPlease only respond with the EXIF metadata name in this list (Make, Model, Software, GPS, Datetime).\nEXIF metadata name:\n')

In [230]:
prompt = PromptTemplate(template=prompt_template,input_variables=["text"])
stuff_chain = load_summarize_chain(llm,
                                   chain_type="stuff",
                                   prompt=prompt,
                                   verbose = True
                                  )
print(stuff_chain.llm_chain.prompt.template)


You are provided with a paragraph listing the types of EXIF metadata in the code block.
----------
{text}
----------
Question: Examine the paragraph and summarize which types of EXIF metadata the above paragraph relates to in the inclusion list (Make, Model, Software, GPS, Datetime) if EXIF metadata exists. 
Please only respond with the EXIF metadata name in this list (Make, Model, Software, GPS, Datetime).
EXIF metadata name:



In [231]:
loader = TextLoader(code_purpose_file)
code_purpose_docs = loader.load()
print(code_purpose_docs)
output_summary = stuff_chain.invoke(code_purpose_docs)
output_summary

[Document(page_content='The Exif metadata included in the code block you provided are as follows:\n\n1. **Make** - Tagged with the identifier `271` and a type of `2`, which typically represents ASCII strings. This tag stores the manufacturer of the camera that captured the image.\n\n2. **Model** - Tagged with the identifier `272` and a type of `2`. This tag stores the model number or name of the camera that captured the image.\n\n3. **Software** - Tagged with the identifier `305` and a type of `2`. This tag stores the software or firmware used to generate the image.\n\n4. **DateTime** - Tagged with the identifier `306` and a type of `2`. This tag stores the date and time when the image was created.\n\n5. **GPSInfoIFDPointer** - Tagged with the identifier `34853` and a type of `4`. This is a pointer to a specific IFD (Image File Directory) that contains GPS information related to the image. This tag itself does not store GPS data but points to where it can be found.\n\nThese tags are pa

{'input_documents': [Document(page_content='The Exif metadata included in the code block you provided are as follows:\n\n1. **Make** - Tagged with the identifier `271` and a type of `2`, which typically represents ASCII strings. This tag stores the manufacturer of the camera that captured the image.\n\n2. **Model** - Tagged with the identifier `272` and a type of `2`. This tag stores the model number or name of the camera that captured the image.\n\n3. **Software** - Tagged with the identifier `305` and a type of `2`. This tag stores the software or firmware used to generate the image.\n\n4. **DateTime** - Tagged with the identifier `306` and a type of `2`. This tag stores the date and time when the image was created.\n\n5. **GPSInfoIFDPointer** - Tagged with the identifier `34853` and a type of `4`. This is a pointer to a specific IFD (Image File Directory) that contains GPS information related to the image. This tag itself does not store GPS data but points to where it can be found.\

In [232]:
delete_file(code_purpose_file)

File 'code_purpose//code_purpose.txt' deleted successfully.
