In [None]:
pip install --quiet -U langchain-community langchain-ollama langchain-core tiktoken langchain-nomic "nomic[local]"
pip install scikit-learn "nomic[local]" pdfminer.six

In [33]:
import os, getpass


def _set_env(env_var):
    os.environ[env_var] = os.getenv(env_var)

_set_env("TAVILY_API_KEY")
_set_env("LANGCHAIN_API_KEY")
_set_env("LANGCHAIN_TRACING_V2")
_set_env("LANGCHAIN_ENDPOINT")
_set_env("LANGCHAIN_PROJECT")

os.environ["TOKENIZERS_PARALLELISM"] = "true"

from langchain_community.tools.tavily_search import TavilySearchResults
web_search_tool = TavilySearchResults(k=3)


In [34]:
from langchain_ollama import ChatOllama
local_llm = 'llama3.2:3b-instruct-fp16'

llm = ChatOllama(model=local_llm, temperature=0.0)
llm_json_mode = ChatOllama(model=local_llm, temperature=0.0, format="json")

from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "{input}"),
        ("user", "{input}"),
    ]
)

Please run both the command locally

ollama pull llama3.2:3b-instruct-fp16
ollama run llama3.2:3b-instruct-fp16

In [36]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PDFMinerLoader
from langchain_community.vectorstores import SKLearnVectorStore
from langchain_nomic.embeddings import NomicEmbeddings

loader = PDFMinerLoader("data/Algomasterio_System_Design_Interview_Handbook.pdf")
documents = loader.load()

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=1000, chunk_overlap=200)
docs = text_splitter.split_documents(documents)

embeddings = NomicEmbeddings(model="nomic-embed-text-v1.5", inference_mode="local")

vector_store = SKLearnVectorStore.from_documents(docs, embeddings)

retriever = vector_store.as_retriever(k=3)

query = "What is system design elements?"

docs = vector_store.similarity_search(query)
print(docs[0].page_content)


SYSTEM DESIGN 
INTERVIEW HANDBOOK

ASHISH PRATAP SINGH

AlgoMaster.io

75 pages guide to ace your next
System Design Interview

Table of Contents

Fundamentals

1. Scalability

2. Availability

3. Latency vs Throughput

4. CAP Theorem

5. Load Balancers

6. Databases

7. CDN

8. Message Queues

9. Rate Limiting

10. Database Indexes

11. Caching

12. Consistent Hashing

13. Database Sharding

14. Consensus Algorithms

6

7

8

9

10

11

12

13

14

15

16

18

19

20

15. Proxy Servers

16. Heartbeats

17. Checksums

18. Service Discovery

19. Bloom Filters

20. Gossip Protocol

Trade-offs

1. Vertical vs Horizontal Scaling

2. Strong vs Eventual Consistency

3. Stateful vs Stateless Design

4. Read vs Write Through Cache

5. SQL vs NoSQL

6. REST vs RPC

21

22

23

24

25

26

28

29

30

31

33

35

7. Synchronous vs Asynchronous

8. Batch vs Stream Processing

9. Long Polling vs WebSockets

10. Normalization vs Denormalization

11. TCP vs UDP

Architectural Patterns

1. Client-

In [None]:
retriever.invoke("What is the best way to prepare for a system design interview?")

In [42]:
documents = retriever.invoke(""" 
                             Question: What is the Table of Contents covered in this book? Example Scalability """)

for i in range(len(documents)):
    if i >= 8:
        doc = documents[i].page_content
        print(documents[i].page_content)
        documents = retriever.invoke(""" Please provide the details from the book for the following question:
                             {doc} """)
    




SYSTEM DESIGN 
INTERVIEW HANDBOOK

ASHISH PRATAP SINGH

AlgoMaster.io

75 pages guide to ace your next
System Design Interview

Table of Contents

Fundamentals

1. Scalability

2. Availability

3. Latency vs Throughput

4. CAP Theorem

5. Load Balancers

6. Databases

7. CDN

8. Message Queues

9. Rate Limiting

10. Database Indexes

11. Caching

12. Consistent Hashing

13. Database Sharding

14. Consensus Algorithms

6

7

8

9

10

11

12

13

14

15

16

18

19

20

15. Proxy Servers

16. Heartbeats

17. Checksums

18. Service Discovery

19. Bloom Filters

20. Gossip Protocol

Trade-offs

1. Vertical vs Horizontal Scaling

2. Strong vs Eventual Consistency

3. Stateful vs Stateless Design

4. Read vs Write Through Cache

5. SQL vs NoSQL

6. REST vs RPC

21

22

23

24

25

26

28

29

30

31

33

35

7. Synchronous vs Asynchronous

8. Batch vs Stream Processing

9. Long Polling vs WebSockets

10. Normalization vs Denormalization

11. TCP vs UDP

Architectural Patterns

1. Client-

In [43]:
tavily_search = TavilySearchResults(k=3)
tavily_search.invoke("Scalability?")

[{'url': 'https://www.techtarget.com/searchdatacenter/definition/scalability',
  'content': 'What is scalability? In information technology, scalability (frequently spelled scaleability) has two uses: for a computer application to function with change in size and volume, and to take advantage of rescaling.. Continual application function with size and volume change. The ability of a computer application or product, hardware or software, to continue to function well when it, or its ...'},
 {'url': 'https://www.g2.com/glossary/scalability',
  'content': 'Scalability is the ability to handle changing workloads without compromising performance. Learn how scalability applies to organizations, systems, processes, and software, and see examples of scalable solutions.'},
 {'url': 'https://www.techopedia.com/definition/9269/scalability',
  'content': 'Scalability is the ability of a system to handle more data, workloads, and users without sacrificing performance or functionality. Learn about di

In [None]:
retriever.invoke("What are the best practices for designing scalable systems?")

In [None]:

retriever.invoke("What are the best practices for designing distributed systems?")

In [None]:
###Router

import json
from langchain_core.messages import HumanMessage, SystemMessage

#Prompt
router_instructions = """ You are an expert at routing a user question to Vectorstore to web search.
The vector store contains a handbook on system design interview.
The web search contains the most recent information on the internet.
You will be given a user query and you need to decide which tool to use.
You can either choose to search the web or search the vector store.
You can only choose one of the options.
If you can answer the user question, return "I can answer that!"
If not, you need to find the most relevant information in the vector store or decide to search the web.
return the json with the following schema:
{
    "choice": "vector_store" or "web_search",
    "reason": "reason for choosing the tool"
    "search_query": "search query to use"
    "search_k": "number of results to return"
}
"""

question = [HumanMessage(content="What is the best way to prepare for a system design interview?")]
test_vector_store = llm_json_mode.invoke([SystemMessage(content=router_instructions)]+ question)
json.loads(test_vector_store.content)