<a href="https://colab.research.google.com/github/rabbitmetrics/langchain-13-min/blob/main/notebooks/langchain-13-min.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Imports
from langchain import PromptTemplate
from langchain.chains import LLMChain, SimpleSequentialChain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma

from factory import Models

In [None]:
# Create an LLM
models = Models()
llm = models.llama()
llm("explain large language models in one sentence")

In [None]:
# Run LLM with PromptTemplate

template = """
You are an expert data scientist with an expertise in building deep learning models. 
Explain the concept of {concept} in a couple of lines
"""
prompt = PromptTemplate(
    input_variables=["concept"],
    template=template,
)

llm(prompt.format(concept="autoencoder"))

In [None]:
# Define chains whose output from a chain is input to the next chain

chain = LLMChain(llm=llm, prompt=prompt)
chain.run("autoencoder")

# Define another chain
second_prompt = PromptTemplate(
    input_variables=["ml_concept"],
    template="Turn the concept description of {ml_concept} and explain it to me like I'm five in 500 words",
)
chain_two = LLMChain(llm=llm, prompt=second_prompt)
chain_two.run("algorithm_concept")

# Sequential chain: first chain's output is second chain's input
overall_chain = SimpleSequentialChain(chains=[chain, chain_two], verbose=True)
explanation = overall_chain.run("autoencoder")

In [None]:
# Import utility for splitting up texts and split up the explanation given above into document chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 200,
    chunk_overlap  = 50,
)
documents = text_splitter.create_documents([explanation])

# Document structure
documents[-1]

In [None]:
# Reference: https://python.langchain.com/docs/modules/data_connection/vectorstores/
embeddings = models.llama_embeddings()

# LangChain embeddings offer two base methods: embed_documents and embed_query.
embeddings.embed_documents([doc.page_content for doc in documents])
embeddings.embed_query(
    "What is magical about an autoencoder?"
)

# There are many ways to create a vector store.
# Here we use the Chroma library to create a vector store in memory.
# TODO: failed here saying the document's metadata is empty
db = Chroma.from_documents(documents, embeddings)

# Do a simple vector similarity search
query = "What is magical about an autoencoder?"
result = db.similarity_search(query)

print(result)