In [63]:
!pip install langchain langchain-core
!pip install --upgrade langchain langchain-core langchain-community

Defaulting to user installation because normal site-packages is not writeable


In [58]:
from langchain.document_loaders import UnstructuredURLLoader
from langchain_groq import ChatGroq
from langchain.text_splitter import RecursiveCharacterTextSplitter
from sentence_transformers import SentenceTransformer
import faiss
import os
import pickle
from langchain.chains.qa_with_sources.retrieval import RetrievalQAWithSourcesChain
import langchain
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.schema import Document

In [1]:
!pip install langchain-groq

Defaulting to user installation because normal site-packages is not writeable
Collecting langchain-core<2.0.0,>=1.0.0 (from langchain-groq)
  Using cached langchain_core-1.0.4-py3-none-any.whl.metadata (3.5 kB)
Using cached langchain_core-1.0.4-py3-none-any.whl (471 kB)
Installing collected packages: langchain-core
  Attempting uninstall: langchain-core
    Found existing installation: langchain-core 0.3.79
    Uninstalling langchain-core-0.3.79:
      Successfully uninstalled langchain-core-0.3.79
Successfully installed langchain-core-1.0.4


ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
langchain 0.3.19 requires langchain-core<1.0.0,>=0.3.35, but you have langchain-core 1.0.4 which is incompatible.
langchain-classic 1.0.0 requires langchain-text-splitters<2.0.0,>=1.0.0, but you have langchain-text-splitters 0.3.11 which is incompatible.
langchain-openai 0.3.6 requires langchain-core<1.0.0,>=0.3.35, but you have langchain-core 1.0.4 which is incompatible.
langchain-unstructured 0.1.6 requires langchain-core<0.4.0,>=0.3.6, but you have langchain-core 1.0.4 which is incompatible.


In [3]:


llm = ChatGroq(
    model="llama-3.3-70b-versatile",
    temperature=0,
    max_tokens=500,
   
    groq_api_key='your api key here',
   
    # other params...
)

In [6]:
urls = [
    "https://calgary.citynews.ca/2025/11/10/calgary-chamber-of-commerce-champagne/",
    "https://www.castanet.net/news/Canada/583308/Finance-minister-says-critical-minerals-refining-is-the-name-of-the-game-",
]

loader = UnstructuredURLLoader(urls=urls)

data = loader.load()
len(data)

2

In [31]:
r_splitter= RecursiveCharacterTextSplitter(
   
    chunk_size=200,
    chunk_overlap=0
)
chunks =  r_splitter.split_documents(data)
len(chunks)

51

In [32]:

chunk_texts = [chunk.page_content for chunk in chunks]
type(chunk_texts)

list

In [50]:


encoder_model = SentenceTransformer("all-mpnet-base-v2")
vectors = encoder_model.encode(chunk_texts)

# Create a custom embeddings class for LangChain compatibility
class SentenceTransformerEmbeddings(HuggingFaceEmbeddings):
    def __init__(self, model_name="all-mpnet-base-v2"):
        super().__init__(model_name=model_name)

# Create FAISS vector store properly
embeddings = SentenceTransformerEmbeddings()
vectorstore = FAISS.from_documents(
    documents=chunks,  # Your document chunks
    embedding=embeddings
)


file_path = "faiss_vectorstore"
vectorstore.save_local(file_path)

# Load the vector store later
if os.path.exists(file_path):
    vectorstore_loaded = FAISS.load_local(
        file_path, 
        embeddings, 
        allow_dangerous_deserialization=True
    )
    
    # Now create retriever
    retriever = vectorstore_loaded.as_retriever()
    
    # Create your chain
    chain = RetrievalQAWithSourcesChain.from_llm(
        llm=llm,
        retriever=retriever
    )

In [62]:
question = "Canada’s Minister of Finance give what budget to mineral industry?"
langchain.debug=True
chain({"question":question},return_only_outputs=True)


{'answer': 'The Canadian Minister of Finance gave a budget that includes a $2-billion “critical minerals sovereign fund” to the mineral industry.\n\n',
 'sources': 'https://calgary.citynews.ca/2025/11/10/calgary-chamber-of-commerce-champagne/'}