# 20240310 - Project: Summarization



In [None]:
import os
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv(), override=True)


## A) Basic Prompt

In [None]:
from langchain_openai import ChatOpenAI
from langchain.schema import AIMessage, HumanMessage, SystemMessage


In [None]:
text = r"""
Mojo combines the usability of Python with the performance of C, unlocking unparalleled programmability \
of AI hardware and extensibility of AI models.
Mojo is a new programming language that bridges the gap between research and production \ 
by combining the best of Python syntax with systems programming and metaprogramming.
With Mojo, you can write portable code that’s faster than C and seamlessly inter-op with the Python ecosystem.
When we started Modular, we had no intention of building a new programming language. \
But as we were building our platform with the intent to unify the world’s ML/AI infrastructure, \
we realized that programming across the entire stack was too complicated. Plus, we were writing a \
lot of MLIR by hand and not having a good time.
And although accelerators are important, one of the most prevalent and sometimes overlooked "accelerators" \
is the host CPU. Nowadays, CPUs have lots of tensor-core-like accelerator blocks and other AI acceleration \
units, but they also serve as the “fallback” for operations that specialized accelerators don’t handle, \
such as data loading, pre- and post-processing, and integrations with foreign systems. \
"""

messages = [
    SystemMessage(content='You are an expert copywriter with expertise in summarizing documents'),
    HumanMessage(content=f'Please provide a short and concise summary of the following text:\n TEXT: {text}')
]

llm = ChatOpenAI(temperature=0, model_name='gpt-3.5-turbo')



In [None]:
llm.get_num_tokens(text)

In [None]:
summary_output = llm.invoke(messages)

In [None]:
# print(summary_output.content)

## Summarizing Using Prompt Templates

In [None]:
from langchain import PromptTemplate
from langchain.chains import LLMChain

template = '''
Write a concise and short summary of the following text:
TEXT: `{text}`
Translate the summary to {language}.
'''

prompt = PromptTemplate(
    input_variables=['text', 'language'],
    template=template
)


In [None]:
llm.get_num_tokens(prompt.format(text=text, language='English'))

In [None]:
chain = LLMChain(llm=llm, prompt=prompt)
summary = chain.invoke({'text': text, 'language': 'french'})
print(summary)

## Summarizing using SuffDocumentChain

In [None]:
from langchain import PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.chains.summarize import load_summarize_chain
from langchain.docstore.document import Document



In [None]:
with open('files/sj.txt', encoding='utf-8') as f:
    text = f.read()

# text
docs = [Document(page_content=text)]
llm = ChatOpenAI(temperature=0, model_name='gpt-3.5-turbo')


In [None]:
template = '''Write a concise and short summary of the following text.
TEXT: `{text}`
'''

prompt = PromptTemplate(
    input_variables=['text'],
    template=template
)

chain = load_summarize_chain(
    llm=llm,
    chain_type='stuff',
    prompt=prompt,
    verbose=False
)

output_summary = chain.invoke(docs)

In [None]:
print(output_summary['output_text'])

## Summarizing Large Documents Using map_reduce

In [None]:
from langchain import PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.chains.summarize import load_summarize_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter

import os
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv(), override=True)

In [None]:
with open('files/sj.txt', encoding='utf-8') as f:
    text = f.read()

# text
# docs = [Document(page_content=text)]
llm = ChatOpenAI(temperature=0, model_name='gpt-3.5-turbo')

In [None]:
llm.get_num_tokens(text)

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=50)
chunks = text_splitter.create_documents([text])

In [None]:
len(chunks)

In [None]:
chain = load_summarize_chain(
    llm,
    chain_type='map_reduce',
    verbose=False
)

output_summary = chain.invoke(chunks)

In [None]:
# print(output_summary)

In [12]:
chain.llm_chain.prompt

PromptTemplate(input_variables=['text'], template='Write a concise summary of the following:\n\n\n"{text}"\n\n\nCONCISE SUMMARY:')

In [13]:
chain.combine_document_chain.llm_chain.prompt


PromptTemplate(input_variables=['text'], template='Write a concise summary of the following:\n\n\n"{text}"\n\n\nCONCISE SUMMARY:')

## map_reduce with Custom Prompts

In [14]:
map_prompt = '''
Write a short and concise summary of the following:
Text: `{text}`
CONCISE SUMMARY:
'''

map_prompt_template = PromptTemplate(
    input_variables=['text'],
    template=map_prompt
)

In [15]:
    combine_prompt = '''
Write a concise summary of the following text that covers the key points.
Add a title to the summary.
Start your summary with an INTRODUCTION PARAGRAPH that gives an overview of the topic 
FOLLOWED by BULLET POINTS if possible AND end the summary with a CONCLUSION PHRASE.
Text: `{text}`
'''

combine_prompt_template = PromptTemplate(template=combine_prompt, input_variables=['text'])


In [16]:
summary_chain = load_summarize_chain(
    llm=llm,
    chain_type='map_reduce',
    map_prompt=map_prompt_template,
    combine_prompt=combine_prompt_template,
    verbose=False
)
output = summary_chain.invoke(chunks)

In [18]:
print(output['output_text'])

Title: "Lessons from Steve Jobs' Commencement Speech"

Introduction:
Steve Jobs shares three impactful stories from his life during a commencement speech, highlighting the importance of following one's passion and living authentically.

Key Points:
- Story 1: Dropped out of college, followed curiosity, designed Macintosh computer
- Story 2: Fired from Apple, found success with NeXT and Pixar
- Story 3: Faced death after cancer diagnosis, changed perspective on life
- Emphasis on following passion, not settling, living each day to the fullest
- Importance of inevitability of death, living authentically, following intuition
- Reference to The Whole Earth Catalog, staying hungry and foolish in new beginnings

Conclusion:
Steve Jobs' stories serve as a reminder to pursue passion, embrace change, and live authentically.
