In [None]:
%pip install --quiet --upgrade langchain-text-splitters langchain-community langgraph

In [None]:
%pip install langchain llama-index


In [13]:
import bs4
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict

In [None]:
import bs4
from langchain_community.document_loaders import WebBaseLoader

# Only keep post title, headers, and content from the full HTML.
bs4_strainer = bs4.SoupStrainer(class_=("post-title", "post-header", "post-content"))
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs={"parse_only": bs4_strainer},
)
docs = loader.load()

assert len(docs) == 1
print(f"Total characters: {len(docs[0].page_content)}")

In [None]:
print(docs[0].page_content[:500])



In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,  # chunk size (characters)
    chunk_overlap=200,  # chunk overlap (characters)
    add_start_index=True,  # track index in original document
)
all_splits = text_splitter.split_documents(docs)

print(f"Split blog post into {len(all_splits)} sub-documents.")

In [None]:
%pip install -qU langchain-core



In [19]:
import os
from llama_index.core import Settings
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.ollama import OllamaEmbedding

# Configure Ollama LLM
ollama_llm = Ollama(
    #model="llama3.2:latest",
    model="mistral:7b",
    base_url="http://localhost:11434",
    temperature=0.1
)

# Configure embedding model
ollama_embedding = OllamaEmbedding(
    model_name="nomic-embed-text:latest",
    base_url="http://localhost:11434",
    ollama_additional_kwargs={"mirostat": 0}
)

Settings.llm = ollama_llm
Settings.embed_model = ollama_embedding

In [None]:
from langchain.document_loaders import TextLoader
from langchain.embeddings.base import Embeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.llms.base import BaseLLM
import requests
import json


# Step 1: Custom LLM Wrapper for Ollama
class OllamaLLM(BaseLLM):
    def __init__(self, model_name: str, base_url: str = "http://localhost:11434", temperature: float = 0.1):
        self.model_name = model_name
        self.base_url = base_url
        self.temperature = temperature

    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
        headers = {"Content-Type": "application/json"}
        data = {
            "model": self.model_name,
            "prompt": prompt,
            "temperature": self.temperature,
        }
        if stop:
            data["stop"] = stop
        response = requests.post(f"{self.base_url}/api/completion", headers=headers, data=json.dumps(data))
        if response.status_code == 200:
            return response.json()["choices"][0]["text"]
        else:
            raise Exception(f"Ollama API error: {response.text}")

    def _generate(self, prompts: List[str], stop: Optional[List[str]] = None) -> List[str]:
        # Generate responses for multiple prompts
        return [self._call(prompt, stop=stop) for prompt in prompts]

    @property
    def _llm_type(self) -> str:
        return "ollama"

# Step 2: Custom Embedding Class for Ollama
class OllamaEmbedding(Embeddings):
    def __init__(self, model_name: str, base_url: str = "http://localhost:11434"):
        self.model_name = model_name
        self.base_url = base_url

    def embed_documents(self, texts):
        return [self._embed(text) for text in texts]

    def embed_query(self, text):
        return self._embed(text)

    def _embed(self, text):
        headers = {"Content-Type": "application/json"}
        data = {
            "model": self.model_name,
            "prompt": text
        }
        response = requests.post(f"{self.base_url}/api/embed", headers=headers, data=json.dumps(data))
        if response.status_code == 200:
            return response.json()["embedding"]
        else:
            raise Exception(f"Ollama API error: {response.text}")

In [None]:
from langchain_core.vectorstores import InMemoryVectorStore

vector_store = InMemoryVectorStore(ollama_embedding)

In [None]:
document_ids = vector_store.add_documents(documents=all_splits)

print(document_ids[:3])

In [None]:
%pip install chromadb



In [None]:
from sentence_transformers import SentenceTransformer
from langchain.embeddings.base import Embeddings

# Step 1: Custom Embedding Class Using SentenceTransformer
class SentenceTransformerEmbedding(Embeddings):
    def __init__(self, model_name="all-MiniLM-L6-v2"):
        self.model = SentenceTransformer(model_name)

    def embed_documents(self, texts):
        return self.model.encode(texts, convert_to_tensor=True).tolist()

    def embed_query(self, text):
        return self.model.encode(text, convert_to_tensor=True).tolist()

from langchain.vectorstores import Chroma

def create_vectorstore(documents, embedding_model):
    vectorstore = Chroma.from_documents(documents, embedding_model)
    return vectorstore

# Step 3: Main Program
if __name__ == "__main__":
    # Path to the text file
    file_path = "../0-Data/paul_graham_short.txt"  # Replace with the actual file path
    
    # Step 1: Load the text file
    print("Loading documents...")
    documents = load_documents(file_path)
    
    # Step 2: Create embeddings and a vectorstore
    print("Creating vectorstore...")
    embedding_model = SentenceTransformerEmbedding()
    vectorstore = create_vectorstore(documents, embedding_model)
    
    # Step 3: Set up the QA chain with Ollama
    print("Setting up QA chain...")
    ollama_llm = OllamaLLM(model_name="llama2:latest", base_url="http://localhost:11434", temperature=0.1)
    qa_chain = setup_qa_chain(vectorstore, ollama_llm)
    
    # Step 4: Query the chain
    query = "What does Paul Graham say about startups?"
    print("Querying the system...")
    response = qa_chain.run(query)
    
    # Print the response
    print("Query Response:")
    print(response)
