In [2]:
# %%
#!pip install sentence-transformers
import os
from tqdm import tqdm
import chromadb
from chromadb.config import Settings
from langchain_chroma import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.embeddings.sentence_transformer import (
    SentenceTransformerEmbeddings,
)
from langchain_text_splitters import RecursiveCharacterTextSplitter

persistent_client = chromadb.PersistentClient()
collection = persistent_client.get_or_create_collection("agua")
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

pdf = [os.path.join('Papers',pdf) for pdf in os.listdir('Papers') if pdf.endswith(".pdf")]
file_path = pdf[0]

v = Chroma(client=persistent_client,
                collection_name="agua",
                embedding_function=embedding_function)

for file_path in tqdm(pdf):
    if os.path.exists('done.txt'):
        with open('done.txt', 'r') as f:
            done = f.read().split('\n')
        if file_path in done:
            continue
    loader = PyPDFLoader(file_path)
    document = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
    chunked_documents = text_splitter.split_documents(document)
    documents=chunked_documents
    for d in documents:
        d.metadata = {
            "title": file_path,
            "source": "pdf",
            "author": "unknown",
            "year": "unknown",
        }
    
    v.add_documents(
        documents=documents
    )
    with open('done.txt', 'a') as f:
        f.write(file_path+'\n')
    

100%|██████████| 74/74 [04:52<00:00,  3.95s/it]


In [6]:
results = v.similarity_search(
    "Technologies for water treatment",
    k=20,
)

In [7]:
results

[Document(metadata={'author': 'unknown', 'source': 'pdf', 'title': 'Papers/Vikesland - Nanosensors for water quality monitoring.pdf', 'year': 'unknown'}, page_content='residual disinfectant is added immediately prior to water leaving the \nplant to minimize pathogen concentrations. Within the distribution system, treated water can be subject to intrusion by waste-, surface- or ground-water and its quality can be degraded due to the leach\n-\ning of contaminants (for example, lead, copper, polymers) from plumbing materials or by bacterial re-growth\n39. Water treatment \nplants, water towers and water taps are readily visible components of centralized treatment systems; however, their relative importance \nas sampling locations can be dwarfed by the large volume and spa\n-\ntial diversity of the drinking water distribution system and the pipe \nnetworks within buildings (Fig. 2). This hidden infrastructure com -\nprises a highly complex engineered ecosystem whose characteristics are aff

In [10]:
from langchain_ollama import ChatOllama


llm = ChatOllama(model ='llama3.1')

""" 
Para open ai
pip install langchain-openai
from langchain_openai import OpenAI
if "OPENAI_API_KEY" not in os.environ:
    os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter your OpenAI API key: ")

llm = OpenAI(model="gpt-3.5-turbo")
"""

messages = [
    (
        "system",
        f"Context{results}",
    ),
    ("Technologies for water treatment"),
]
llm.invoke(messages)

AIMessage(content='There are several technologies used for water treatment, which can be categorized into physical, chemical, and biological methods. Here are some of the most common ones:\n\n**Physical Methods:**\n\n1. **Filtration**: Removing suspended particles and contaminants from water using filters made of materials like sand, gravel, or membranes.\n2. **Coagulation and Flocculation**: Adding chemicals to remove dirt and other suspended particles by causing them to clump together and settle out of the water.\n3. **Sedimentation**: Allowing suspended particles to settle out of the water under gravity.\n4. **Centrifugation**: Using a spinning motion to separate liquids and solids.\n\n**Chemical Methods:**\n\n1. **Disinfection**: Killing bacteria, viruses, and other microorganisms using chlorine or other disinfectants.\n2. **Chlorination**: Adding chlorine gas or hypochlorite solution to water to remove pathogens and odors.\n3. **Ozonation**: Using ozone gas to kill bacteria and ot

There are several technologies used for water treatment, which can be categorized into physical, chemical, and biological methods. Here are some of the most common ones:
**Physical Methods:**
1. **Filtration**: Removing suspended particles and contaminants from water using filters made of materials like sand, gravel, or membranes.
2. **Coagulation and Flocculation**: Adding chemicals to remove dirt and other suspended particles by causing them to clump together and settle out of the water.
3. **Sedimentation**: Allowing suspended particles to settle out of the water under gravity.
4. **Centrifugation**: Using a spinning motion to separate liquids and solids.
**Chemical Methods:**
1. **Disinfection**: Killing bacteria, viruses, and other microorganisms using chlorine or other disinfectants.
2. **Chlorination**: Adding chlorine gas or hypochlorite solution to water to remove pathogens and odors.
3. **Ozonation**: Using ozone gas to kill bacteria and other microorganisms.
4. **Activated Carbon Filtration**: Using activated carbon to remove impurities, taste, and odor from water.
**Biological Methods:**
1. **Biological Nutrient Removal (BNR)**: Using bacteria and other microorganisms to break down organic matter in wastewater.
2. **Aerobic Treatment**: Providing oxygen for microorganisms to decompose organic matter in wastewater.
3. **Anaerobic Treatment**: Decomposing organic matter without the presence of oxygen.
4. **Biological Filtration**: Using a living filter medium, such as plants or bacteria, to remove impurities from water.
**Advanced Technologies:**
1. **Reverse Osmosis (RO)**: Forcing water through a semi-permeable membrane to remove dissolved solids and other impurities.
2. **Ultrafiltration (UF)**: Using membranes with smaller pores than RO to remove suspended particles and other impurities.
3. **Nanofiltration (NF)**: Using membranes with even smaller pores than UF to remove dissolved solids and other impurities.
4. **Electrocoagulation**: Using an electric current to remove contaminants from water by causing them to coagulate and settle out.
5. **Advanced Oxidation Processes (AOPs)**: Using oxidants like ozone, chlorine dioxide, or hydrogen peroxide to break down organic matter and other impurities.
6. **Ion Exchange Systems**: Exchanging ions in the water with ions from a resin to remove dissolved solids and other impurities.
7. **Membrane Distillation**: Using heat to evaporate water from a membrane, which is then condensed back into liquid form.
**Emerging Technologies:**
1. **Graphene-based Membranes**: Using graphene-based membranes to remove contaminants from water with high efficiency.
2. **Biochar-based Filtration**: Using biochar (a type of charcoal) to remove impurities and pollutants from water.
3. **Algal-Based Systems**: Using algae to remove nutrients and other pollutants from wastewater.
These are just some of the technologies used for water treatment. The choice of technology depends on the specific needs and requirements of the water being treated, as well as the cost and efficiency considerations.