In [2]:
import os
from dotenv import load_dotenv
load_dotenv()


True

In [3]:

# Fetch environment variables with a fallback to avoid None errors
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
LANGSMITH_API_KEY = os.getenv("LANGSMITH_API_KEY")
LANGSMITH_PROJECT = os.getenv("LANGSMITH_PROJECT")
LANGSMITH_ENDPOINT = os.getenv("LANGSMITH_ENDPOINT")
LANGSMITH_TRACING = os.getenv("LANGSMITH_TRACING")

# Set environment variables
os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY
os.environ["TAVILY_API_KEY"] = TAVILY_API_KEY
os.environ["GROQ_API_KEY"] = GROQ_API_KEY
os.environ["LANGSMITH_API_KEY"] = LANGSMITH_API_KEY
os.environ["LANGSMITH_PROJECT"] = LANGSMITH_PROJECT
os.environ["LANGSMITH_ENDPOINT"] = LANGSMITH_ENDPOINT
os.environ["LANGSMITH_TRACING"] = LANGSMITH_TRACING


In [4]:
from langchain_google_genai import ChatGoogleGenerativeAI
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash")

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
from langchain import PromptTemplate

template= 'Hi! I am learning {skill}. Can you suggest me top 5 things to learn?\n'

prompt = PromptTemplate(template=template,input_variables=["skill"])



In [6]:
prompt

PromptTemplate(input_variables=['skill'], input_types={}, partial_variables={}, template='Hi! I am learning {skill}. Can you suggest me top 5 things to learn?\n')

In [7]:
from langchain_core.output_parsers import StrOutputParser
parser = StrOutputParser()

## LCEL

In [8]:
chain = prompt | llm | parser

In [9]:
print(chain.invoke({'skill':'Big Data'}))

To effectively learn Big Data, focus on these 5 key areas:

1. **Programming Languages (Python or Java):**  Big Data processing relies heavily on programming. Python is preferred for its ease of use, extensive libraries (like Pandas, NumPy, Scikit-learn for data manipulation and analysis), and strong community support. Java is also crucial, especially for working with Hadoop and Spark ecosystems.  Choose one to start with and become proficient; you can always add the other later.

2. **Hadoop Ecosystem:**  Understanding the fundamentals of Hadoop (HDFS for storage and MapReduce for processing) is essential. While newer tools like Spark are gaining popularity, Hadoop remains a cornerstone of many Big Data architectures.  Focus on concepts like data partitioning, data replication, and the MapReduce paradigm.  Don't get bogged down in the minutiae of Hadoop's underlying implementation unless you plan on becoming a Hadoop administrator.

3. **Apache Spark:** Spark is a fast, in-memory data

In [10]:
from langchain_core.runnables import RunnableParallel, RunnablePassthrough , RunnableLambda

In [11]:
chain = RunnablePassthrough()

In [12]:
chain.invoke("Hi")

'Hi'

In [13]:
def string_upper(str):
    return str.upper()

In [14]:
chain = RunnablePassthrough() | RunnableLambda(string_upper)

In [15]:
chain.invoke("abcd")

'ABCD'

In [16]:
chain = RunnableParallel({'x':RunnablePassthrough(),'y':RunnablePassthrough()})

In [17]:
chain.invoke("Hello")

{'x': 'Hello', 'y': 'Hello'}

In [18]:
chain.invoke({'Youtube': 'abcd','Blog': "abc blog"})

{'x': {'Youtube': 'abcd', 'Blog': 'abc blog'},
 'y': {'Youtube': 'abcd', 'Blog': 'abc blog'}}

In [19]:
func = lambda x: x['Blog'] ## lambda is a anonymous function in Python
func({'Youtube': 'abcd','Blog': "abc blog"})

'abc blog'

In [20]:
def fetch_website(input: dict):
    output = input.get('Website','Not found')
    return output

In [21]:
chain = RunnableParallel({'x':RunnablePassthrough() | RunnableLambda(fetch_website),'Blog':lambda x: x['Blog']})
chain.invoke({'Youtube': 'abcd','Blog': "abc blog"})

{'x': 'Not found', 'Blog': 'abc blog'}

## RAG

In [22]:
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

### Reading the txt files from source directory

# loader = DirectoryLoader('./source', glob="./*.txt", loader_cls=TextLoader)
# docs = loader.load()
loader = TextLoader('./source.txt')
docs = loader.load()

### Creating Chunks using RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=50,
    chunk_overlap=10,
    length_function=len
)
new_docs = text_splitter.split_documents(documents=docs)
doc_strings = [doc.page_content for doc in new_docs]

###  BGE Embddings

'''from langchain.embeddings import HuggingFaceBgeEmbeddings

model_name = "BAAI/bge-base-en-v1.5"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity
embeddings = HuggingFaceBgeEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs,
)
'''

### Creating Retriever using Vector DB

db = Chroma.from_documents(new_docs, embeddings)
retriever = db.as_retriever(search_kwargs={"k": 4})

In [23]:
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = PromptTemplate.from_template(template=template)


In [24]:
retrieval_chain = (
    RunnableParallel({"context": retriever, "question": RunnablePassthrough()})
    | prompt
    | llm
    | StrOutputParser()
    )

In [25]:
question ="What is my name"

In [26]:
result = await retrieval_chain.ainvoke(question)
print(result)


Number of requested results 4 is greater than number of elements in index 1, updating n_results = 1


Sparsh


In [27]:
template = 'Hi! I am learning {skill}. Can you suggest me top 5 things to learn?\n'

prompt = PromptTemplate.from_template(template=template)

chain = prompt | llm

In [30]:
for s in chain.stream({'skill':'Data Structure and Algorithms'}):
    print(s.content,end='')

For someone starting to learn Data Structures and Algorithms (DSA), focusing on these 5 foundational areas will provide the strongest base:

1. **Big O Notation:** This is crucial.  Understanding Big O notation allows you to analyze the efficiency of your algorithms in terms of time and space complexity.  Don't just memorize the notations; understand *why* an algorithm has a particular time complexity (e.g., O(n), O(log n), O(n^2)). This will inform your choices when designing algorithms.

2. **Arrays and Linked Lists:** These are fundamental data structures.  Mastering their implementations (including variations like doubly linked lists, circular linked lists) and understanding their strengths and weaknesses (e.g., array access vs. linked list insertion/deletion) is essential.

3. **Searching and Sorting Algorithms:**  Learn at least one algorithm from each category:
    * **Searching:** Linear search and Binary search (understand when each is appropriate).
    * **Sorting:**  Bubble 