# Install dependencies in the terminal
pip install transformers
pip install langchain, langchain-huggingface, langchain-community
pip install faiss-cpu

In [29]:
# Import dependencies
from langchain_huggingface import HuggingFacePipeline, ChatHuggingFace
from langchain_community.embeddings.sentence_transformer import (
    SentenceTransformerEmbeddings,
    )
from langchain.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import AsyncHtmlLoader
from langchain_community.document_transformers import BeautifulSoupTransformer
from langchain.chains import RetrievalQA

In [21]:
# Load LLM
model_id = "unsloth/Llama-3.2-1B-Instruct"
# microsoft/Phi-4-mini-instruct
# google/gemma-3-1b-it
# meta-llama/Llama-3.2-1B-Instruct
# unsloth/Llama-3.2-1B-Instruct

# LLM
llm = HuggingFacePipeline.from_model_id(
    model_id=model_id,
    task="text-generation",
    pipeline_kwargs=dict(
        max_new_tokens=512,
        do_sample=False,
        repetition_penalty=1.03,
    ),
)


# Chat
chat = ChatHuggingFace(llm=llm, verbose=True)

Device set to use cuda:0


In [22]:
llm.invoke("What is SmartLab?")



"What is SmartLab? SmartLab is a company that specializes in developing and manufacturing high-quality, innovative laboratory equipment for the scientific community. Their products are designed to make scientific research more efficient, accurate, and cost-effective.\n\nSmartLab's products include a wide range of laboratory instruments, such as microscopes, spectrometers, and centrifuges, as well as software solutions for data analysis and visualization. Their equipment is known for its high precision, reliability, and ease of use, making it accessible to researchers from various disciplines, including biology, chemistry, physics, and more.\n\nSome of the key features of SmartLab's products include:\n\n* High-resolution imaging and spectroscopy\n* Advanced data analysis and visualization tools\n* Compact and portable designs\n* Easy-to-use interfaces and software\n* Compatibility with a wide range of operating systems\n\nOverall, SmartLab is a leading provider of laboratory equipment a

In [23]:
messages = [
    ("system", "You are a helpful assistant."),
    ("human", "What is SmartLab?"),
]

chat.invoke(messages)



AIMessage(content="<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nCutting Knowledge Date: December 2023\nToday Date: 25 Mar 2025\n\nYou are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat is SmartLab?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nSmartLab is a popular online platform that allows users to conduct experiments and learn about various scientific concepts in a hands-on, interactive way. It's often used in educational settings, particularly for middle school and high school students.\n\nWith SmartLab, users can explore different experiments, such as chemistry, physics, biology, and more, by creating and conducting their own experiments using virtual labs. The platform provides a user-friendly interface, step-by-step instructions, and real-time feedback, making it an engaging and effective way to learn about scientific concepts.\n\nSome of the features of SmartLab include:\n\n1. Virtual labs: Users can create and cond

In [24]:
# Load documents from webpage
urls = ["https://www.metropolia.fi/fi/tutkimus-kehitys-ja-innovaatiot/yhteistyoalustat/smart-lab"]
loader_html = AsyncHtmlLoader(urls)
docs_html = loader_html.load()

bs_transformer = BeautifulSoupTransformer()
docs_transformed = bs_transformer.transform_documents(docs_html, tags_to_extract=["div"])

# Split it into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
docs_transformed_split = text_splitter.split_documents(docs_transformed)


print(len(docs_transformed_split))
print(len(docs_transformed[0].page_content))

Fetching pages: 100%|##########| 1/1 [00:00<00:00,  4.03it/s]

18
15864





In [25]:
# create the open-source embedding function
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

# load it into FAISS
html_db = FAISS.from_documents(docs_transformed_split, embedding_function)

In [26]:
# Retriever

retriever_html = html_db.as_retriever()

qa_web = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever_html,    
    verbose=True
)

In [27]:
# Retrieval QA 
from time import time
def qa_rag(qa, query):
    print(f"Query: {query}\n")
    time_1 = time()
    result = qa.run(query)
    time_2 = time()
    print(f"Inference time: {round(time_2-time_1, 3)} sec.")
    print("\nResult:", result)

In [28]:
query = "Who are involved in the project SmartLab?"
qa_rag(qa_web, query)

Query: Who are involved in the project SmartLab?



[1m> Entering new RetrievalQA chain...[0m





[1m> Finished chain.[0m
Inference time: 1.232 sec.

Result: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

(/sites/default/files/2021-12/sl_049_nettikoko.jpg) SmartLab-keittiö   (/sites/default/files/2021-12/sl_108_nettikoko.jpg)                (/sites/default/files/2021-12/sl_108_nettikoko.jpg) SmartLab-paneeli   (/sites/default/files/2021-12/sl_015_nettikoko.jpg)                (/sites/default/files/2021-12/sl_015_nettikoko.jpg) SmartLab sähkökaappi   (/sites/default/files/2021-12/sl_059_nettikoko.jpg)                (/sites/default/files/2021-12/sl_059_nettikoko.jpg) Valokatkaisimia SmartLabissa   (/sites/default/files/2021-12/sl_060_nettikoko.jpg)                (/sites/default/files/2021-12/sl_060_nettikoko.jpg) SmartLab valaistus Ota yhteyttä Teknologiapäällikkö Harri Hahkala p. 040 082 8636 harri.hahkala [at] metropolia.fi (harri[dot]hahkala[at]metropolia[dot]f

In [None]:
# Clear RAM cache
#del pipe

# Clear CUDA cache
import torch
torch.cuda.empty_cache()