In [1]:
import uuid
import os
import json
from langchain.retrievers.multi_vector import MultiVectorRetriever
from langchain.storage import InMemoryStore
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.schema.document import Document
from datasets import Dataset
from langchain_community.llms import Ollama
from langchain.llms import Ollama
import sys



In [2]:
__import__('pysqlite3')
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')

In [3]:
def read_txtelements(output_folder, filename, delimiter):
    with open(os.path.join(output_folder, filename), "r") as f:
        return [s.strip() for s in f.read().split(delimiter) if s.strip()]

def read_jsonelements(output_folder, filename):
    with open(os.path.join(output_folder, filename), "r") as json_file:
        return json.load(json_file)

In [4]:
def add_data(image_text_summaries, image_text_elements, image_summaries, retriever): 
    # Add texts
    # if text_summaries:
    #     doc_ids = [str(uuid.uuid4()) for _ in text_elements]
    #     summary_texts = [
    #         Document(page_content=s, metadata={id_key: doc_ids[i]})
    #         for i, s in enumerate(text_summaries)]
    #     retriever.vectorstore.add_documents(summary_texts)
    #     retriever.docstore.mset(list(zip(doc_ids, text_elements)))

    # Add image texts
    if image_text_summaries:
        doc_ids = [str(uuid.uuid4()) for _ in image_text_elements]
        summary_texts = [
            Document(page_content=s, metadata={id_key: doc_ids[i]})
            for i, s in enumerate(image_text_summaries)]
        retriever.vectorstore.add_documents(summary_texts)
        retriever.docstore.mset(list(zip(doc_ids, image_text_elements)))

    # Add tables
    # if table_summaries:
    #     table_ids = [str(uuid.uuid4()) for _ in table_elements]
    #     summary_tables = [
    #         Document(page_content=s, metadata={id_key: table_ids[i]})
    #         for i, s in enumerate(table_summaries)]
    #     retriever.vectorstore.add_documents(summary_tables)
    #     retriever.docstore.mset(list(zip(table_ids, table_elements)))

    # Add images
    if image_summaries:
        img_ids = [str(uuid.uuid4()) for _ in image_summaries]
        summary_img = [
            Document(page_content=s, metadata={id_key: img_ids[i]})
            for i, s in enumerate(image_summaries)]
        retriever.vectorstore.add_documents(summary_img)
        retriever.docstore.mset(list(zip(img_ids, image_summaries)))  
    return retriever

In [5]:
def process_subfolder(subfolder_path, retriever):
    print(f"Processing subfolder: {subfolder_path}")
    delimiter = "~~~"
    # text_elements = read_elements(output_folder, "text_elements.txt")
    # table_elements = read_elements(output_folder, "table_elements.txt")
    # text_summaries = read_elements(output_folder, "text_summaries.txt", delimiter)
    # table_summaries = read_elements(output_folder, "table_summaries.txt", delimiter)
    image_text_elements = read_jsonelements(subfolder_path, "imagestexts.json")
    image_text_elements = [i['text'] for i in image_text_elements]
    image_text_summaries = read_txtelements(subfolder_path, "imagestexts_summary.txt", delimiter)
    image_summaries = read_txtelements(subfolder_path, "image_summaries.txt", delimiter)

    retriever = add_data(image_text_summaries, image_text_elements, image_summaries, retriever)
    return retriever

In [6]:
vectorstore = Chroma(collection_name="summaries", embedding_function = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2"))
store = InMemoryStore()  
id_key = "doc_id"
retriever = MultiVectorRetriever(vectorstore=vectorstore, docstore=store, id_key=id_key,)
retriever.search_kwargs['k'] = 4

In [7]:
output_folder = "/home/vqa/RAG/10_manuals_256_summaries"

In [8]:
for item in os.listdir(output_folder):
    item_path = os.path.join(output_folder, item)
    if os.path.isdir(item_path):
        # Now item_path is a subdirectory within output_folder
        # For each subdirectory, perform the operations you need
        process_subfolder(item_path, retriever)

Processing subfolder: /home/vqa/RAG/10_manuals_256_summaries/samsung_cell phone accessories manuals_de30be54-38b5-46f4-8dc4-a2376a6e571d_extracted
Processing subfolder: /home/vqa/RAG/10_manuals_256_summaries/bose_headphones manuals_9917ef89-897c-6524-2502-2b0a91ec7d62_extracted
Processing subfolder: /home/vqa/RAG/10_manuals_256_summaries/owg_en_wms_soundlink_adapterkit_extracted
Processing subfolder: /home/vqa/RAG/10_manuals_256_summaries/bose_headphones_manuals_6d12e20d-cf8d-4337-b073-53d7e85e4163_extracted
Processing subfolder: /home/vqa/RAG/10_manuals_256_summaries/samsung_video gaming accessories manuals_f0ee75e7-18ff-4260-ac5c-3db2ec0f8fd4_extracted
Processing subfolder: /home/vqa/RAG/10_manuals_256_summaries/sony_laptop_manuals_a02cf092-3538-4646-ab93-8cae84a07ad2_extracted
Processing subfolder: /home/vqa/RAG/10_manuals_256_summaries/og_wave-bma_en_extracted
Processing subfolder: /home/vqa/RAG/10_manuals_256_summaries/samsung_audio_box_eo-sb330_um_sea_rev.1.0_140728_screen_extrac

In [9]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
# from langchain_core.output_parsers import OutputParser
from langchain_core.prompts import PromptTemplate
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.chat_models import ChatOllama
from langchain_core.prompts import ChatPromptTemplate

In [10]:
# Prompt template
template = """Answer the question based only on the following context, which can include text and tables:
{context}
Question: {question}
Answer:
"""

In [11]:
# prompt = PromptTemplate.from_template(template)
prompt = ChatPromptTemplate.from_template(template)

In [12]:
model = Ollama(model="llama2:7b-chat")
# model = Ollama(model="llama2", verbose=True)

In [13]:
# RAG pipeline
chain = (
    {"context": retriever, 
     "question": RunnablePassthrough(),}
    | prompt
    | model
    | StrOutputParser())

In [14]:
from datasets import Dataset
import pandas as pd
import json
from tabulate import tabulate

In [15]:

data = []
with open('10_manuals.jsonl', 'r') as f:
# with open('20_manuals.jsonl', 'r') as f:
    for line in f:

        json_data = json.loads(line)
        id = json_data['id'][:-6]
        qa_data = json_data['qa_data']
        for i in qa_data:
            question = i['question']['text']
            answer = i['answer']['text']
        data.append({'id': id, 'question' : question, 'ground_truth': answer})

df = pd.DataFrame(data)
df = pd.DataFrame(data, columns = ['id', 'question', 'ground_truth'])
print(tabulate(df, headers='keys', tablefmt='psql'))

n = len(pd.unique(df['id']))
print("No.of.unique values :", n)

+-----+-------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|     | id                                                                            | question                                                                                                                        | ground_truth                                                                                                                                                                                                                                                                      

In [16]:
import pandas as pd

unique_values = (pd.unique(df['id']))
print(unique_values)
unique_values_df = pd.DataFrame({'unique_values': unique_values})
merged_df = pd.merge(df, unique_values_df, how='inner', left_on='id', right_on='unique_values')
df = merged_df.groupby('unique_values').head(10)



['bose_headphones manuals_9917ef89-897c-6524-2502-2b0a91ec7d62'
 'bose_headphones_manuals_6d12e20d-cf8d-4337-b073-53d7e85e4163'
 'dell_cell phone manuals_4686e2e1-87a4-4f6a-bf20-61c646c11bb9'
 'og_wave-bma_en' 'owg_en_wms_soundlink_adapterkit'
 'samsung_audio_box_eo-sb330_um_sea_rev.1.0_140728_screen'
 'samsung_cell phone accessories manuals_de30be54-38b5-46f4-8dc4-a2376a6e571d'
 'samsung_vacuum cleaner manuals_8cb9360e-cafe-4c53-9d35-ef193667e586'
 'samsung_video gaming accessories manuals_f0ee75e7-18ff-4260-ac5c-3db2ec0f8fd4'
 'sony_laptop_manuals_a02cf092-3538-4646-ab93-8cae84a07ad2']


In [17]:
df.drop(columns=['unique_values'], inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop(columns=['unique_values'], inplace=True)


In [18]:
# print(len(retriever.get_relevant_documents("Which battery does the CD player use?")))

In [28]:
# answer = chain.invoke("")
# print(answer)

Based on the context provided, my answer to your question is:

The image is a close-up of a word written in bold text.


In [20]:
# answer = chain.invoke("For the Dell cellphone, what does a WARNING indicate?")
# print(answer)




In [21]:
# answer = chain.invoke("For the Dell cellphone, what is the function of the Camera button?")
# print(answer)


Based on the provided context, the function of the Camera button on the Dell cellphone can be inferred as follows:

1. The image shows two buttons located at the bottom part of the device: one closer to the left side and another near the center-right area. Since the button closer to the left side is labeled "Camera," it suggests that this button might control the phone's camera functionality.
2. The presence of a keypad on the cellphone suggests that it may be an older model, possibly from the early 2000s or late 1990s. In those times, the keypad was a common design feature for mobile phones, and the Camera button could have been used to activate the camera function.
3. The image also shows several wires connected to various components on the phone, indicating that charging or data transfer may be possible through these connections. However, given the age of the device, it is more likely that the wires are for charging purposes only.
4. Finally, the tiny person visible in the image may

In [22]:
# answer = chain.invoke("For the Dell cellphone, what should I do before using the smartphone?")
# print(answer)


Before using your Dell Mini 3i smartphone, you should follow the instructions provided in the "Setting Up Your Mini 3i Smartphone" document. This will help you properly set up and configure your device for optimal performance and safety. Specifically, you should:

1. Read the user manual carefully to understand the features and capabilities of your smartphone.
2. Ensure that your smartphone is compatible with your service provider's network.
3. Activate your smartphone by following the instructions provided by your service provider.
4. Set up your smartphone's security features, such as a password or fingerprint recognition, to protect your device and personal data.
5. Familiarize yourself with the smartphone's physical layout and button functions to avoid accidental calls or text messages.

By following these steps, you can ensure a smooth and safe transition into using your Dell Mini 3i smartphone.


In [23]:
# answer = chain.invoke("For the Dell cellphone, what is the function of the icons?")
# print(answer)


Based on the context provided, it seems that the image depicts a smartphone with various options and settings visible on its screen. The image includes several labeled numbers, which may indicate measurements or specifications related to the phone's design or dimensions. Additionally, there are two smaller figures visible at the top-left corner of the image, which might represent more options or settings related to the app being used or accessed.

As for the function of the icons on the screen, it is likely that they are part of the interface or illustrations that accompany the user experience. The icons may represent different features or functions of the phone, such as a camera, internet browser, or music player. Alternatively, the icons could be used to indicate different settings or preferences related to the app being used. Without more information or context, it is difficult to provide a definitive answer to this question.


In [29]:
answer = chain.invoke("For the Samsung vacuum cleaner, can I plug in my vacuum cleaner directly?")
print(answer)


For safety reasons, it is important to only use the adapter provided by Samsung for this product. Plugging in any other adapter may cause damage or electric shock, which could result in injury or even death. Additionally, connecting multiple controllers to a single adapter can lead to abnormal heat generation or other issues, so it's crucial to follow the recommended usage guidelines to avoid any risks. To ensure your safety and the proper functioning of your vacuum cleaner, always use only the standard adapter provided by Samsung.


In [30]:
answer = chain.invoke("For the Samsung vacuum cleaner, what should I do if the Mini Turbo Brush is clogged or debris is wrapped around the brush roll?")
print(answer)


For the Samsung vacuum cleaner, there are a few steps you can take to remove debris from the Mini Turbo Brush when it becomes clogged or wrapped around the brush roll:

1. Turn off the vacuum cleaner and unplug it: Before attempting to clean the Mini Turbo Brush, make sure the vacuum cleaner is turned off and unplugged from the wall outlet. This will prevent any accidental start-ups or electrical shocks.
2. Remove any loose debris: Use your hands or a soft brush to gently remove any loose debris or hair that may be wrapped around the brush roll. Avoid using force or scrubbing too hard, as this can damage the brush or the vacuum cleaner.
3. Use compressed air: If there are small debris particles stuck inside the Mini Turbo Brush, try using a can of compressed air to blow them out. Hold the can upside down and spray the debris away from the brush roll.
4. Use a soft-bristled brush: For more stubborn debris or hair that is wrapped around the brush roll, use a soft-bristled brush (like a m

In [31]:
answer = chain.invoke("For the Samsung vacuum cleaner, where should I install the Combination Tool?")
print(answer)

The Combination Tool for the Samsung vacuum cleaner can be installed in various locations depending on your cleaning needs and preferences. Here are some options to consider:

1. Extension Wand: The Combination Tool can be attached to the extension wand of the vacuum cleaner, providing you with more reach and flexibility when cleaning hard-to-reach areas such as stairs, ceilings, and corners.
2. Dusting Brush: You can install the Combination Tool on the dusting brush of the vacuum cleaner for quick and easy cleaning of upholstery, curtains, and other delicate surfaces.
3. Floor Cleaner Tool: Attach the Combination Tool to the floor cleaner tool of the vacuum cleaner for deeper cleaning of carpets, rugs, and hardwood floors. This tool is particularly useful for removing dirt and debris from tight spaces and crevices.
4. Handheld Vacuum: If you want a more portable cleaning solution, you can detach the Combination Tool from the vacuum cleaner and use it as a handheld vacuum for cleaning 

In [32]:
answer = chain.invoke("For the Samsung vacuum cleaner, what should I do after removing a screw? ")
print(answer)


After removing a screw for a Samsung vacuum cleaner, there are several steps you should take to ensure proper maintenance and safety:

1. Check the manual: Before attempting any repairs or maintenance on your vacuum cleaner, consult the user manual provided by Samsung. The manual will outline the recommended procedures for servicing your specific model of vacuum cleaner.
2. Unplug the vacuum: Always unplug your vacuum cleaner from the wall outlet before performing any maintenance or repairs. This will help prevent accidental start-up or electrical shock.
3. Remove any attachments: If you are planning to remove any attachments, such as crevice tools or upholstery brushes, do so carefully and according to the manufacturer's instructions.
4. Inspect for damage: Use a flashlight to inspect the internal components of your vacuum cleaner for any signs of damage or wear. Check for loose or damaged parts, such as belts, filters, or motor brushes.
5. Replace worn parts: If you find any worn or 

In [33]:
answer = chain.invoke("For the Sony laptop, how can I adjust the 3D settings?")
print(answer)


To adjust the 3D settings on your Sony laptop, follow these steps:

1. Click on the Start menu and select Settings.
2. In the Settings window, click on the "Display" option.
3. In the Display window, click on the "3D" tab.
4. Under the "3D" tab, you will see options to adjust the 3D settings. You can choose the type of 3D content you want to play, such as Blu-ray 3D discs or 3D photos.
5. Adjust the brightness and contrast of the 3D image to your liking.
6. If you are using a 3D display, you can adjust the 3D depth level to enhance the 3D experience.
7. Once you have made your adjustments, click "Apply" to save your changes.

Note: The exact steps may vary depending on the Sony laptop model and its specific features. Refer to the user manual or online support resources for more detailed instructions.


In [34]:
answer = chain.invoke("For the Sony laptop, I like suring the internet on the computer tilting my head. If I do the same thing while viewing 3D images, what would happen?")
print(answer)


When viewing 3D images on your Sony laptop, it's important to follow the instructions in the manuals that came with your computer to avoid any discomfort or eye strain. Some people may experience eye strain, fatigue, or nausea while watching 3D video images or using stereoscopic 3D-related software.

To minimize the risk of discomfort, Sony recommends that all viewers take regular breaks while watching 3D video images or using stereoscopic 3D-related software. The length and frequency of necessary breaks will vary from person to person, so you should decide what works best for you. If you experience any discomfort, stop watching the 3D video images or using stereoscopic 3D-related software until the discomfort ends; consult a doctor if necessary.

It's also important to position yourself correctly while viewing 3D content. Sony suggests that you sit at a comfortable distance from the screen, ideally between 1.5 and 2.5 times the screen's diagonal measurement. You should also ensure tha

In [35]:
answer = chain.invoke("For the Sony laptop, how can I view 3D photos or videos?")
print(answer)


To view 3D photos or videos on your Sony laptop, follow these steps:

1. Ensure that your laptop has the latest drivers and software updates installed.
2. Connect your laptop to a 3D-enabled display device (such as a 3D TV or monitor) using an HDMI cable or wireless connection.
3. Open the Photos or Videos application on your laptop.
4. Locate the 3D content you wish to view (e.g., 3D photos or videos).
5. Click on the "Play" button to start playing the content.
6. Adjust the 3D settings in the display device's settings menu according to your preference. Some common options include adjusting the 3D depth, brightness, and contrast.
7. Enjoy your 3D content!

Note: If you don't have a 3D-enabled display device, you can also view 3D content on your laptop using built-in stereoscopic 3D capabilities. To enable this feature, go to the Display settings in your laptop's settings menu and select "Stereoscopic 3D" or a similar option.


In [36]:
answer = chain.invoke("For the Sony laptop, why I cannot find the 3D button and the 3D indicator on my computer when I display 3D images on 3D TV?")
print(answer)


The 3D function on the Sony laptop is designed to work with compatible 3D content and 3D-capable TVs. If you are not seeing the 3D button or indicator on your computer when displaying 3D images on a 3D TV, there could be several reasons why:

1. Compatibility issues: Make sure that the 3D content you are trying to play is compatible with the Sony laptop's 3D function. Some content may not be supported or may require additional software or hardware configurations.
2. 3D TV compatibility: Ensure that your 3D TV is compatible with the Sony laptop's 3D signal. Check the TV's manual or manufacturer's website for information on its 3D capabilities and connectivity options.
3. HDMI connection: Make sure that the Sony laptop is connected to the 3D TV via an HDMI cable. If the connection is not proper, you may not see the 3D button or indicator on the computer.
4. Display settings: Check your computer's display settings to ensure that it is set to display 3D content correctly. You may need to a

In [24]:
# MAKE THE DATEFRAME INTO A LIST OF TUPLES

qa_list = [(row['question'], row['ground_truth']) for index, row in df.iterrows()]
print(qa_list)



In [25]:

import os

# Update with your API URL if using a hosted instance of Langsmith.
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
# os.environ["LANGCHAIN_API_KEY"] = "ls__9478a4fed44b477bbd2a4040c50cc935"  # Cissis
os.environ["LANGCHAIN_API_KEY"] = "ls__ed021d06cdab4f0b837aa6b4bf174513"  # Cissis 2
# os.environ["LANGCHAIN_API_KEY"] = "ls__dfec5f97d1de407f93106c572de7ca06" # Martas
project_name = "summary_rag"  # Update with your project name

In [26]:
from langsmith import Client

client = Client()

In [27]:
import uuid

dataset_name = f"Retrieval QA Questions {str(uuid.uuid4())}"
dataset = client.create_dataset(dataset_name=dataset_name)
for q, a in qa_list:
    client.create_example(
        inputs={"question": q}, outputs={"answer": a}, dataset_id = dataset.id
    )

KeyboardInterrupt: 

In [None]:
print(qa_list)

In [None]:
from langchain.smith import RunEvalConfig
from langchain.evaluation import EvaluatorType

eval_config = RunEvalConfig(
    evaluators=[
        RunEvalConfig.LabeledCriteria("relevance"),
        RunEvalConfig.LabeledCriteria("coherence"), 
        "cot_qa"
        ],
    eval_llm = ChatOllama(model="llama2"),
)

In [None]:
gen_ans = []
questions = []

In [None]:
def predict(inputs: dict):
    gen_an = chain.invoke(inputs["question"])
    questions.append(inputs["question"])
    gen_ans.append(gen_an)
    return gen_an

In [None]:
print(gen_ans)

In [None]:
_ = await client.arun_on_dataset(
    dataset_name=dataset_name,
    llm_or_chain_factory= predict, #lambda: chain,
    evaluation=eval_config,
)


__________________

In [None]:
print(len(gen_ans))

In [None]:
print(len(questions))

In [None]:
questions

In [None]:
filtered_df = df[df['question'].isin(questions)]

# Reorder the rows based on the order of questions in 'qs'
ordered_df = filtered_df.set_index('question').reindex(questions).reset_index()

ordered_df

In [None]:
ground_truth = ordered_df['ground_truth'].tolist()
ground_truths = [[value] for value in ground_truth]
print(ground_truths)

In [None]:
from bert_score import score

# Example texts
generated = gen_ans

# Calculate BERTScore
P, R, F1 = score(generated, ground_truths, lang='en', verbose=True)

# Print scores
print(f"Precision: {P.mean()}")
print(f"Recall: {R.mean()}")
print(f"F1 Score: {F1.mean()}")