# References
- https://python.langchain.com/docs/modules/chains/popular/summarize.html

In [2]:
import sys
sys.version_info


sys.version_info(major=3, minor=10, micro=12, releaselevel='final', serial=0)

In [None]:
!pip3 install langchain==0.0.242
!pip3 install chromadb==0.4.2

In [3]:
# Constants
SCRAPE_URL = "https://medium.com/@symmetrics_hr/the-m%CC%B6o%CC%B6n%CC%B6k%CC%B6-immigrant-who-s%CC%B6o%CC%B6l%CC%B6d%CC%B6-bought-his-ferrari-e7be20c4d891"
DEFAULT_QUESTION = "What is H1b?"
SAMPLE_PDF_DOCUMENT = "Tech_Hubs_NOFO.pdf"

In [26]:
# Document loader
from langchain.document_loaders import WebBaseLoader
loader = WebBaseLoader(SCRAPE_URL)
data = loader.load()

# Split
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 0)
# Since FAISS requires splits from text and not Lang Document type, we do the below
all_splits = text_splitter.split_text(text=data[0].page_content)
all_splits

['The MĚ¶oĚ¶nĚ¶kĚ¶ Immigrant Who SĚ¶oĚ¶lĚ¶dĚ¶ Bought His Ferrari | by SparkIQ Tech | MediumThe MĚ¶oĚ¶nĚ¶kĚ¶ Immigrant Who SĚ¶oĚ¶lĚ¶dĚ¶ Bought His FerrariSometimes the last mile action you take in your journey decides whether you fulfil your American Dream !SparkIQ TechÂ·Follow7 min readÂ·Jan 29--ListenShareBackgroundThe Monk Who Sold His Ferrari tells the extraordinary story of Julian Mantle, a lawyer forced to confront the spiritual crisis of his out-of-balance (lavish and rich) life, and the',
 'subsequent wisdom that he gains on a life-changing odyssey that enables him to create a life of passion, purpose and peace.While spirituality is in its place and a very personal realisation, for most of us material things (some are small, some medium and some large) is what drives us to live this life (and ushers hope too, unless you are one of those naturally born spiritual ones). If you are relatively younger, chances are you have dreams of acquiring a certain thing in your life. For',
 'au

## MapReduce chain type

In [12]:
from langchain.chains.summarize import load_summarize_chain
from langchain.docstore.document import Document
from langchain.llms import GPT4All

llm = GPT4All(model="/home/ubuntu/Downloads/orca-mini-3b.ggmlv3.q4_0.bin",max_tokens=2048)
chain = load_summarize_chain(llm=llm, chain_type="map_reduce")

docs = [Document(page_content=t) for t in all_splits[:3]]

chain.run(docs)

Found model file at  /home/ubuntu/Downloads/orca-mini-3b.ggmlv3.q4_0.bin


llama.cpp: loading model from /home/ubuntu/Downloads/orca-mini-3b.ggmlv3.q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 32000
llama_model_load_internal: n_ctx      = 2048
llama_model_load_internal: n_embd     = 3200
llama_model_load_internal: n_mult     = 240
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_layer    = 26
llama_model_load_internal: n_rot      = 100
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: n_ff       = 8640
llama_model_load_internal: n_parts    = 1
llama_model_load_internal: model size = 3B
llama_model_load_internal: ggml ctx size =    0.06 MB
llama_model_load_internal: mem required  = 2862.72 MB (+  682.00 MB per state)
llama_new_context_with_model: kv self size  =  650.00 MB


' \n\nThe story of a lawyer who sold his Ferrari to find redemption through spirituality.'

## Stuff Chain Type

In [13]:
chain = load_summarize_chain(llm=llm, chain_type="stuff")
chain.run(docs)

' The article talks about a man named Venkat who worked as a Google Engineer and wished to buy his Ferrari before he turned 40. He applied for H1b visa, which is a highly sought-after visa in the US tech sector. The article describes how he faced many challenges but eventually succeeded in getting the visa.'

# Prompt 

In [15]:
from langchain import OpenAI, PromptTemplate, LLMChain
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains.mapreduce import MapReduceChain
from langchain.prompts import PromptTemplate

prompt_template = """Write a concise summary of the following:
{text}
CONCISE SUMMARY IN ITALIAN:"""
PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])
chain = load_summarize_chain(llm=llm, chain_type="stuff", prompt=PROMPT)
chain.run(docs)

" L'immigrato tedesco che ha acquistato la Ferrari 599 GTB Fiorano, racconta la storia di una vita cambiata e l'avventura spirituale che lo hanno portato ad un'existential crisis e all'ascensione spiritica.�� un'esperienza personale e personalissima, ma è stato anche questa la storia di una persona che ha capito come sia importante per la vita avere un'attività passionaria e divertente, e non solo una vita di lavoro.�� un'esperienza che\n\nè molto utile per chiunque abbia paura di non poter fare quello che desidera o che sia in equilibrio spirituale. La storia è anche un'esempio di come una persona, dopo essere andata a grandi distanze dal suo cammino spirituale, possa tornare indietro e capire cosa significa per la vita seguirgli.�� stato un'avventura"

In [19]:
chain = load_summarize_chain(llm=llm, 
                             chain_type="map_reduce", 
                             return_intermediate_steps=True)
chain({"input_documents": docs}, return_only_outputs=True)

{'intermediate_steps': [' The M����o����n����k���� Immigrant Who S����o����l����d���� Bought His Ferrari | by SparkIQ Tech | MediumThe Monk Who Sold His Ferrari tells the story of a lawyer who was forced to confront his spiritual crisis and out-of-balance life, and how he found redemption through selling his beloved Ferrari.',
  ' \n\nThe story follows the journey of an individual who embarks on a life-changing odyssey that leads to personal growth and fulfillment. The protagonist realizes the importance of spirituality in his life, but also learns that material things are what drives him to live this life and hope. The story highlights the significance of balance between these two aspects of life.',
  ' \n\nThe article talks about a person named Venkat who had always dreamed of owning a Ferrari. He finally achieved his dream when he purchased the car he had been dreaming about since childhood. However, this was not enough for him and he also applied for an H1B visa to work in the US t

# Custom MapReduce Chain

In [37]:
from langchain.chains.combine_documents.map_reduce import MapReduceDocumentsChain,ReduceDocumentsChain
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.llms import GPT4All
from langchain.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter


llm = GPT4All(model="/home/ubuntu/Downloads/orca-mini-3b.ggmlv3.q4_0.bin",max_tokens=2048)

map_template_string = """Give the following article , generate a summary of the article and mention the source of article
Code:
{article}

Return the summary in the following format:
summary: summary text
"""


reduce_template_string = """Given the following summary text, answer the following question
{summary_text}
Question: {question}
Answer:
"""

# Prompt to use in map and reduce stages 
MAP_PROMPT = PromptTemplate(input_variables=["article"], template=map_template_string)
REDUCE_PROMPT = PromptTemplate(input_variables=["summary_text", "question"], template=reduce_template_string)

# LLM to use in map and reduce stages 
# llm = OpenAI()
map_llm_chain = LLMChain(llm=llm, prompt=MAP_PROMPT)
reduce_llm_chain = LLMChain(llm=llm, prompt=REDUCE_PROMPT)

# Takes a list of documents and combines them into a single string
combine_documents_chain = StuffDocumentsChain(
    llm_chain=reduce_llm_chain,
    document_variable_name="summary_text",
)

# Combines and iteravely reduces the mapped documents 
reduce_documents_chain = ReduceDocumentsChain(
        # This is final chain that is called.
        combine_documents_chain=combine_documents_chain,
        # If documents exceed context for `combine_documents_chain`
        collapse_documents_chain=combine_documents_chain,
        # The maximum number of tokens to group documents into
        token_max=3000)

# Combining documents by mapping a chain over them, then combining results with reduce chain
combine_documents = MapReduceDocumentsChain(
    # Map chain
    llm_chain=map_llm_chain,
     # Reduce chain
    reduce_documents_chain=reduce_documents_chain,
    # The variable name in the llm_chain to put the documents in
    document_variable_name="article",
)


map_reduce = MapReduceChain(
    combine_documents_chain=combine_documents,
    text_splitter=CharacterTextSplitter(chunk_size=500, chunk_overlap=0),
)

Found model file at  /home/ubuntu/Downloads/orca-mini-3b.ggmlv3.q4_0.bin


llama.cpp: loading model from /home/ubuntu/Downloads/orca-mini-3b.ggmlv3.q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 32000
llama_model_load_internal: n_ctx      = 2048
llama_model_load_internal: n_embd     = 3200
llama_model_load_internal: n_mult     = 240
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_layer    = 26
llama_model_load_internal: n_rot      = 100
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: n_ff       = 8640
llama_model_load_internal: n_parts    = 1
llama_model_load_internal: model size = 3B
llama_model_load_internal: ggml ctx size =    0.06 MB
llama_model_load_internal: mem required  = 2862.72 MB (+  682.00 MB per state)
llama_new_context_with_model: kv self size  =  650.00 MB


In [45]:
loader = WebBaseLoader(SCRAPE_URL)
data = loader.load()
article = data[0].page_content

map_reduce.run(input_text=article, question="What is the profession mentioned in the article?")

LLaMA ERROR: The prompt is 2887 tokens and the context window is 2048!


'The profession mentioned in the article is "Journalist".'

# Refine Chain

In [75]:
# Document loader
from langchain.document_loaders import WebBaseLoader
loader = WebBaseLoader(SCRAPE_URL)
data = loader.load()

# Split
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 0)
# Since FAISS requires splits from text and not Lang Document type, we do the below
all_splits = text_splitter.split_text(text=data[0].page_content)
docs = [Document(page_content=t) for t in all_splits[:3]]

refine_chain = load_summarize_chain(llm=llm, chain_type="refine",return_refine_steps=True)

# refine_chain.run(docs[0].page_content)
resp = refine_chain({"input_documents": [Document(page_content=t) for t in all_splits[:15]]},return_only_outputs=True)


In [76]:
resp

{'intermediate_steps': [' The M����o����n����k���� Immigrant Who S����o����l����d���� Bought His Ferrari | by SparkIQ Tech | MediumThe Monk Who Sold His Ferrari tells the story of a lawyer who was forced to confront his spiritual crisis and out-of-balance life, and how he found redemption through selling his beloved Ferrari.',
  '',
  '',
  '',
  '',
  '',
  '',
  '',
  '',
  '',
  '',
  '',
  '',
  '',
  ''],
 'output_text': ''}

In [77]:
prompt_template = """Write a concise summary of the following:


{text}


"""
PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])

refine_template = (
    "Your job is to produce a final summary\n"
    "We have provided an existing summary up to a certain point: {existing_answer}\n"
    "We have the opportunity to refine the existing summary"
    "(only if needed) with some more context below.\n"
    "------------\n"
    "{text}\n"
    "------------\n"
    "Given the new context, refine the original summary in Italian"
    "If the context isn't useful, return the original summary."
)
refine_prompt = PromptTemplate(
    input_variables=["existing_answer", "text"],
    template=refine_template,
)
chain = load_summarize_chain(llm=llm, chain_type="refine", return_intermediate_steps=True, question_prompt=PROMPT, refine_prompt=refine_prompt)
chain({"input_documents": docs}, return_only_outputs=True)

{'intermediate_steps': ['A: Here\'s your answer:\n\n"The Monk Who Sold His Ferrari tells the story of Julian Mantle, a lawyer who is forced to confront the spiritual crisis of his out-of-balance (lively and rich) life, and the decision he makes to sell his beloved Ferrari in order to pursue a more meaningful existence."',
  '',
  ''],
 'output_text': ''}