In [14]:
import os
from dotenv import load_dotenv

# Load environment variables from the .env file
load_dotenv()

# Access the OpenAI key
openai_key = os.getenv("OPENAI_API_KEY")
activeloop_key = os.getenv("ACTIVELOOP_TOKEN")


In [160]:
from langchain.chains import RetrievalQA
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import OpenAIChat
from langchain.vectorstores.deeplake import DeepLake
from langchain.document_loaders import PyPDFLoader
import random
from langchain.embeddings import OpenAIEmbeddings
from tqdm import tqdm
llm = ChatOpenAI(model="gpt-3.5-turbo-1106", temperature=0)

In [16]:
openai_embeddings = OpenAIEmbeddings()

In [18]:
# dataset_path = "hub://101010/text_embedding"
# # Create a DeepLake instance and add the documents
# db = DeepLake.from_documents(documents, dataset_path=dataset_path, embedding=OpenAIEmbeddings())

db = DeepLake(
    dataset_path=f"hub://siddartha10/manufacturing_CSI",  # org_id stands for your username or organization from activeloop
    embedding=openai_embeddings,
    runtime={"tensor_db": True},
    token=activeloop_key,
    # overwrite=True, # user overwrite flag if you want to overwrite the full dataset
    read_only=False,
)

Your Deep Lake dataset has been successfully created!




In [19]:
# Replace 'file_paths' with the paths to your local PDF files
file_paths = [
    "manual\'s.pdf",
    "Operation and Maintenance Manual.pdf",
    "Operations Manual.pdf"
]
# Initialize an empty list to store all pages
pages = []

# Loop through each manual and load its pages
for manual_path in file_paths:
    loader = PyPDFLoader(manual_path)
    pages1 = loader.load_and_split()
    pages.extend(pages1)

 45%|████▍     | 134/300 [35:15<43:40, 15.79s/it]
 26%|██▌       | 77/300 [22:09<1:04:10, 17.27s/it]


In [20]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

chunk_size = 4096
docs_new = []

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=chunk_size,
)

for doc in pages:
    if len(doc.page_content) < chunk_size:
        docs_new.append(doc)
    else:
        docs = text_splitter.create_documents([doc.page_content])
        docs_new.extend(docs)

In [21]:
docs = db.add_documents(docs_new)

Creating 197 embeddings in 1 batches of size 197:: 100%|██████████| 1/1 [00:07<00:00,  7.68s/it]

Dataset(path='hub://siddartha10/manufacturing_CSI', tensors=['text', 'metadata', 'embedding', 'id'])

  tensor      htype       shape      dtype  compression
  -------    -------     -------    -------  ------- 
   text       text      (197, 1)      str     None   
 metadata     json      (197, 1)      str     None   
 embedding  embedding  (197, 1536)  float32   None   
    id        text      (197, 1)      str     None   





In [22]:
from typing import List

from langchain.chains.openai_functions import (
    create_structured_output_chain,
)
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
from langchain.schema import HumanMessage, SystemMessage
from pydantic import BaseModel, Field

In [23]:
# fetch dataset docs and ids if they exist (optional you can also ingest)
docs = db.vectorstore.dataset.text.data(fetch_chunks=True, aslist=True)["value"]
ids = db.vectorstore.dataset.id.data(fetch_chunks=True, aslist=True)["value"]

In [24]:
# If we pass in a model explicitly, we need to make sure it supports the OpenAI function-calling API.
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)


class Questions(BaseModel):
    """Identifying information about the manufacturing system."""

    question: str = Field(..., description="Questions about manufacturing system")


prompt_msgs = [
    SystemMessage(
        content="You are a world class expert for generating questions based on provided context. \
                You make sure the question can be answered by the text."
    ),
    HumanMessagePromptTemplate.from_template(
        "Use the given text to generate a question from the following input: {input}"
    ),
    HumanMessage(content="Tips: Make sure to answer in the correct format"),
]
prompt = ChatPromptTemplate(messages=prompt_msgs)
chain = create_structured_output_chain(Questions, llm, prompt, verbose=True)

text = """
UWM CSI Vial Filling Connected Smart Manufacturing (CSM) System The UWM CSI Vial Filling CSM  System is an intelligent manufacturing system using the latest Industry 4.0 connected advanced manufacturing equipment and techniques to produce  vials filled with varying product using a variety of filling methods and capturing process data that can be used for data analysis and system optimization.  The  Vial Filling CSM System is a platform that university faculty and students will use for both education and research to  further the advancement of a connected enterprise.   The CAM components are  integrate d seamlessly within a fully  integrated architecture and connected  enterprise using cutting -edge smart -data devices at all layers.     The process overview detailed in this section describes in general how the Vial Filling Connected Smart Manufacturing system functions as a complete system processing components.  Operational steps and slight variances in the process may differ from what is described here depending on the configuration parameters or  using the stations  in a dry cycle mode .  The Vial Filling CSM utilizes localized system configuration setting within the machine as well as process data requirements and parameters to determine how  to process the product and which stations and inspections are required to complete the production job.  When running under MES Production Center control, the process requirements and parameters are received  from the MES system as to where  and how to process and inspect the vials produced by the system .    """
questions = chain.run(input=text)
print(questions)



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: You are a world class expert for generating questions based on provided context.                 You make sure the question can be answered by the text.
Human: Use the given text to generate a question from the following input: 
UWM CSI Vial Filling Connected Smart Manufacturing (CSM) System The UWM CSI Vial Filling CSM  System is an intelligent manufacturing system using the latest Industry 4.0 connected advanced manufacturing equipment and techniques to produce  vials filled with varying product using a variety of filling methods and capturing process data that can be used for data analysis and system optimization.  The  Vial Filling CSM System is a platform that university faculty and students will use for both education and research to  further the advancement of a connected enterprise.   The CAM components are  integrate d seamlessly within a fully  integrated architecture and connected  enter

In [26]:
def generate_queries(docs: List[str], ids: List[str], n: int=100 ):
    questions = []
    relevances = []
    pbar = tqdm(total=n)
    while len(questions) < n:
        # 1. randomly draw a piece of text and relevance id
        r = random.randint(0, len(docs) - 1)
        text, label = docs[r], ids[r]

        # 2. generate queries and assign and relevance id
        generated_qs = [chain.run(input=text).question]
        questions.extend(generated_qs)
        relevances.extend([[(label, 1)] for _ in generated_qs])
        pbar.update(len(generated_qs))
        if len(questions) % 10 == 0:
            print(f"q: {len(questions)}")
    return questions[:n], relevances[:n]


chain = create_structured_output_chain(Questions, llm, prompt, verbose=False)
questions, relevances = generate_queries(docs, ids, n=500)



q: 10




q: 20




q: 30




q: 40




q: 50




q: 60




q: 70




q: 80




q: 90




q: 100




q: 110




q: 120




q: 130




q: 140




q: 150




q: 160




q: 170




q: 180




q: 190




q: 200




q: 210




q: 220




q: 230




q: 240




q: 250




q: 260




q: 270




q: 280




q: 290




q: 300




q: 310




q: 320




q: 330




q: 340




q: 350




q: 360




q: 370




q: 380




q: 390




q: 400




q: 410




q: 420




q: 430




q: 440




q: 450




q: 460




q: 470




q: 480




q: 490


100%|██████████| 500/500 [46:07<00:00,  5.53s/it]

q: 500





In [27]:
train_questions, train_relevances = questions[:300], relevances[:300]
test_questions, test_relevances = questions[300:], relevances[300:]

In [28]:
print(len(train_questions), len(test_questions))

300 200


In [29]:
job_id = db.vectorstore.deep_memory.train(
    queries=train_questions,
    relevance=train_relevances,
)

Starting DeepMemory training job
Your Deep Lake dataset has been successfully created!




Preparing training data for deepmemory:


Creating 300 embeddings in 1 batches of size 300:: 100%|██████████| 1/1 [00:06<00:00,  6.07s/it]


DeepMemory training job started. Job ID: 656f8339ea392f219a119b0b


In [43]:
db.vectorstore.deep_memory.status("656f8339ea392f219a119b0b")

This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/siddartha10/manufacturing_CSI
--------------------------------------------------------------
|                  656f8339ea392f219a119b0b                  |
--------------------------------------------------------------
| status                     | completed                     |
--------------------------------------------------------------
| progress                   | eta: 0.9 seconds              |
|                            | recall@10: 96.67% (+15.00%)   |
--------------------------------------------------------------
| results                    | recall@10: 96.67% (+15.00%)   |
--------------------------------------------------------------




In [44]:
recall = db.vectorstore.deep_memory.evaluate(
    queries=test_questions,
    relevance=test_relevances,
)

Embedding queries took 2.03 seconds
---- Evaluating without Deep Memory ---- 
Recall@1:	  60.0%
Recall@3:	  72.5%
Recall@5:	  80.5%
Recall@10:	  88.0%
Recall@50:	  99.5%
Recall@100:	  99.5%
---- Evaluating with Deep Memory ---- 
Recall@1:	  76.5%
Recall@3:	  82.0%
Recall@5:	  88.5%
Recall@10:	  92.5%
Recall@50:	  99.0%
Recall@100:	  100.0%


In [80]:
retriver = db.as_retriever()
retriver.search_kwargs["deep_memory"] = True
retriver.search_kwargs["k"] = 10

query = "what are the different types of stations?"
qa = RetrievalQA.from_chain_type(
    llm=ChatOpenAI(model="gpt-3.5-turbo",temperature=0), chain_type="stuff", retriever=retriver
)
print(qa.run(query))

Based on the provided context, the different types of stations mentioned in the CSM Operations Manual are:

1. Station 1: Load station
2. Station 2: Dry Fill station
3. Station 3: Wet Fill station
4. Station 4: Stoppering station
5. Station 5: Wet Fill station
6. Station 6: Stoppering station
7. Station 7: Inspection station
8. Station 8: Labeling station
9. Station 9: Unload station

These stations are part of the vial filling system and each station has its own specific operation and functionality.


In [109]:
retriver = db.as_retriever()
retriver.search_kwargs["deep_memory"] = True
retriver.search_kwargs["k"] = 10

query = "what is a magne motion?"
qa = RetrievalQA.from_chain_type(
    llm=ChatOpenAI(model="gpt-3.5-turbo",temperature=0), chain_type="stuff", retriever=retriver
)
qa.run(query)

'MagneMotion is an independent cart transport system that utilizes linear motor technology and magnetic carriers to transport items, such as vials, throughout a specific area or system. It consists of nodes and paths that allow the transport carriers to move and stop at different process stations or bypass paths. The MagneMotion system is controlled by a node controller and can be manipulated and monitored through HMI screens.'

In [110]:
query = "it is not CSM please refer to it as CSI from next time?"
qa = RetrievalQA.from_chain_type(
    llm=ChatOpenAI(model="gpt-3.5-turbo",temperature=0), chain_type="stuff", retriever=retriver
)
qa.run(query)

'I apologize for the mistake. I will refer to it as CSI from now on. Thank you for pointing that out.'

In [111]:
query = "How to turn the test bed on?"
qa = RetrievalQA.from_chain_type(
    llm=ChatOpenAI(model="gpt-3.5-turbo",temperature=0), chain_type="stuff", retriever=retriver
)
qa.run(query)

"The CSM Operations Manual does not provide specific instructions on how to turn the test bed on. It mainly focuses on the HMI screens and operations. It is recommended to refer to the manufacturer's documentation or contact the technical support for the test bed to get the specific instructions on how to turn it on."

In [112]:
query = "Again you are it as CSM no it is CSI?"
qa = RetrievalQA.from_chain_type(
    llm=ChatOpenAI(model="gpt-3.5-turbo",temperature=0), chain_type="stuff", retriever=retriver
)
qa.run(query)

'I apologize for the confusion. The correct acronym is CSI, not CSM.'

In [113]:
query = "What safety measures are in place to protect workers?"
qa = RetrievalQA.from_chain_type(
    llm=ChatOpenAI(model="gpt-3.5-turbo",temperature=0), chain_type="stuff", retriever=retriver
)
qa.run(query)

'The CSM Operations Manual outlines several safety measures to protect workers. Here are some of the key safety precautions:\n\n1. Follow Safety Instructions: Workers are instructed to carefully read all safety messages in the manual and on the machine safety signs. They are also advised to learn how to operate the machine and use controls properly.\n\n2. Safety Precautions: Workers are reminded to always operate the machine with all guards in place and all safety devices functioning properly. They should avoid wearing loose-fitting clothing and jewelry while working near moving components. They should also keep the work area clean and free of obstacles that could cause them to slip or trip.\n\n3. Lockout/Tagout (LOTO): Workers are required to ensure that the correct LOTO is in place before performing any cleaning, repair, or maintenance routines on the machine. This is to prevent the unexpected startup of the machine and protect workers from hazardous energy.\n\n4. Hazardous Energy Pr

In [120]:
query = "Explain me about the Dry Fill station"
qa = RetrievalQA.from_chain_type(
    llm=ChatOpenAI(model="gpt-3.5-turbo",temperature=0), chain_type="stuff", retriever=retriver
)
qa.run(query)

"I'm sorry, but I couldn't find any information about the Dry Fill station in the provided context."

In [122]:
query = "what is wet Fill station"
qa = RetrievalQA.from_chain_type(
    llm=ChatOpenAI(model="gpt-3.5-turbo",temperature=0), chain_type="stuff", retriever=retriver
)
qa.run(query)

"A wet fill station is a part of a system or process that is used to dispense specified quantities of fluids into a container, such as a vial. It typically consists of one or more nozzles that dispense the fluids sequentially or simultaneously, depending on the configuration of the station. The liquid fill parameters and resulting data are passed between the system's programmable logic controller (PLC) and the fluid delivery system's PLC. The station receives process parameters from the system's part tracking and traceability logic and dispenses the appropriate liquids in the appropriate order into the container. After completion, the station updates the system's part tracking and traceability database and sends the container to the next processing station."

In [123]:
query = "what is dry fill stations?"
qa = RetrievalQA.from_chain_type(
    llm=ChatOpenAI(model="gpt-3.5-turbo",temperature=0), chain_type="stuff", retriever=retriver
)
print(qa.run(query))

Based on the provided context, there is no mention of "dry fill stations" in the given information. Therefore, it is not possible to provide a specific answer to your question.


In [129]:
query = "What is Dry cycle mode?"
qa = RetrievalQA.from_chain_type(
    llm=ChatOpenAI(model="gpt-3.5-turbo",temperature=0), chain_type="stuff", retriever=retriver
)
print(qa.run(query))

Dry Cycle Mode is a mode of operation in which the system emulates an auto sequence without any parts present. It is primarily used for maintenance or troubleshooting purposes. In Dry Cycle Mode, the system ignores part presence indicators and allows the Auto Sequence to function as if parts were present. Each time the system is started in Dry Cycle Mode, the Dry Cycle count is reset.


In [130]:
query = "Explain me about Robot screens?"
qa = RetrievalQA.from_chain_type(
    llm=ChatOpenAI(model="gpt-3.5-turbo",temperature=0), chain_type="stuff", retriever=retriver
)
print(qa.run(query))

Robot screens are a part of the HMI (Human Machine Interface) system used to view and control the functions of a robot. These screens allow operators to manipulate the robot's functions, I/O (Input/Output), and parameters. They also provide manual control of the robot, including operations such as pick, place, and go to operations, as well as control of the robot's gripper open and close functions.

The robot screens display all I/O and interface signals, such as digital I/O, Universal I/O, and Group I/O, to aid in maintenance and troubleshooting efforts. It is important to note that actions that may cause damage to the equipment or product should be interlocked through the PLC (Programmable Logic Controller) program or not allowed to be selected via the HMI. Modification of the HMI operator through "visibility" options is the preferred method.

The specific robot screens and their uses may vary depending on the type of robot being used. For example, the LR-Mate Robot Screen may includ

In [134]:
query = "What does it mean by red flashing lights?"
qa = RetrievalQA.from_chain_type(
    llm=ChatOpenAI(model="gpt-3.5-turbo",temperature=0), chain_type="stuff", retriever=retriver
)
print(qa.run(query))

According to the CSM Operations Manual, red flashing lights indicate that the system is faulted.


In [135]:
query = "How does the factory test vials for quality?"
qa = RetrievalQA.from_chain_type(
    llm=ChatOpenAI(model="gpt-3.5-turbo",temperature=0), chain_type="stuff", retriever=retriver
)
print(qa.run(query))

The factory tests vials for quality through a vial inspection station and a vision inspection station. 

The vial inspection station inspects the vials for damage or cleanliness by rotating the vial 360 degrees across three different sections of the vial. It also scans and associates the vial's 2D barcode data to the scheduled production job. If a vial passes the inspection, it is loaded onto the MagneMotion transport system. If it fails the inspection, it is rejected to the reject parts tray.

The vision inspection station verifies the contents of each vial it inspects. For liquid fill vials, the camera program evaluates the vial and returns RGB values of the inspection area. The resultant data is compared against process parameters to determine if the vial has the correct color. For


In [136]:
query = "explain me about CSI test bed?"
qa = RetrievalQA.from_chain_type(
    llm=ChatOpenAI(model="gpt-3.5-turbo",temperature=0), chain_type="stuff", retriever=retriver
)
output = qa.run(query)
sentences = output.split('. ')
formatted_output = '\n'.join(sentences)
print(formatted_output)

The CSI test bed is a system used for testing and demonstrating various manufacturing processes and technologies
It consists of different components and stations that simulate different stages of a manufacturing process
The test bed is designed to be transported and assembled at different locations for testing purposes.

The test bed includes features such as the MagneMotion Independent Cart Transport System, which uses linear motor technology and magnetic carriers to transport vials throughout the test bed
It also includes stations like the Vision Inspection Station, which is used to verify the contents of each vial, and the Inspect/Repair Station, where products can be staged for inspection and possible repair.

The test bed is equipped with safety systems, including emergency stop buttons, area scanners, light curtains, and door interlocks, to ensure the safety of personnel working with the system
It also has a dedicated Allen Bradley L8 series GuardLogix programmable safety control

In [161]:
query = "Explain different types of methods that prevent carts from crashing?"
qa = RetrievalQA.from_chain_type(
    llm=llm, chain_type="stuff", retriever=retriver
)
output = qa.run(query)


In [162]:
print(output)

The MagneMotion Independent Cart Transport system is equipped with different modes to prevent carts from crashing. These modes include:

1. Manual Mode: In this mode, system operators manually control the movement of the carts, following a regimented procedure to avoid damage and spills occurrences. The system is designed to take automatic measures to protect itself from moving into objects, such as crashing, while the operators are maneuvering the carts.

2. Semi-Automatic Mode: In this mode, kinetic energy is present, and a series of system operators perform the task that the rail robot would perform in a safe and secure manner. Operators stationed throughout the system physically pick and place vials from trays to vehicles and vice-versa without using the rail robot. The system is designed to require pushbutton or HMI stations in specific areas.

3. Sequential Step Mode: In this mode, kinetic energy is present, and system operators are capable of taking a single vial from its source

In [171]:
query = "How to factorial of a number?"
qa = RetrievalQA.from_chain_type(
    llm=llm, chain_type="stuff", retriever=retriver
)
output = qa.run(query)
sentences = output.split('. ')
formatted_output = '\n'.join(sentences)
print(formatted_output)


I don't know.


In [174]:
query = "Routers explain?"
qa = RetrievalQA.from_chain_type(
    llm=llm, chain_type="stuff", retriever=retriver
)
output = qa.run(query)
sentences = output.split('. ')
formatted_output = '\n'.join(sentences)
print(formatted_output)


I don't have enough information to answer your question.


In [178]:
query = "what is CSI?"
qa = RetrievalQA.from_chain_type(
    llm=llm, chain_type="stuff", retriever=retriver
)
output = qa.run(query)
sentences = output.split('. ')
formatted_output = '\n'.join(sentences)
print(formatted_output)


CSI stands for Connected Systems Institute
It is a platform at the University of Wisconsin - Milwaukee that uses the latest Industry 4.0 connected advanced manufacturing equipment and techniques to produce vials filled with varying products
The platform is used for education and research to further the advancement of a connected enterprise.


In [179]:
query = "How do we track the vials?"
qa = RetrievalQA.from_chain_type(
    llm=llm, chain_type="stuff", retriever=retriver
)
output = qa.run(query)
sentences = output.split('. ')
formatted_output = '\n'.join(sentences)
print(formatted_output)


The vials are tracked using a barcode/QR label system
Each vial is labeled with a barcode or QR code before use in the VFC
The label printer supplied with the VFC ensures full track-and-trace and serialization capability throughout the system
The vials are tied to a specific MagneMotion puck, and robotic manipulation during vial loading compensates for inconsistent label placement and varying vial orientation within each tray
A Teledyne Dalsa Imaging system is used for this application.


In [181]:
query = "explain me about newton's 2nd law?"
qa = RetrievalQA.from_chain_type(
    llm=llm, chain_type="stuff", retriever=retriver
)
output = qa.run(query)
sentences = output.split('. ')
formatted_output = '\n'.join(sentences)
print(formatted_output)

I don't know.
