In [1]:
from transformers import pipeline
import textwrap
import random

In [2]:
PROMPT_TEMPLATES = {
    "blog_to_tweet": "Rewrite the following blog post into a 3-tweet thread. Each tweet should be concise, engaging and capture key points:\n\n{content}",
    "academic_to_linkedin": "Summarise the following academic abstract into a friendly LinkedIn post:\n\n{content}",
    "news_to_email": "Rewrite this news article into a short internal email:\n\n{content}",
    "informal_to_formal": "Rewrite the following text in a formal, professional tone:\n\n{content}"
}

In [3]:
generator = pipeline("text2text-generation", model="google/flan-t5-base", tokenizer="google/flan-t5-base")

Device set to use mps:0


In [4]:
def transform_content(content: str, task_key: str, max_length: int = 512):
    prompt = PROMPT_TEMPLATES[task_key].format(content=content.strip())
    result = generator(prompt, max_length=max_length, do_sample=True, temperature=0.7, top_k=50)
    return result[0]['generated_text']

In [5]:
sample_text = """
AI is revolutionising the financial services sector by automating manual processes, improving fraud detection, and enabling better decision-making through predictive analytics.
"""

output = transform_content(sample_text, "blog_to_tweet")
print("\nTransformed Output:\n")
print(textwrap.fill(output, width=80))


Transformed Output:

@mcfly_flna - AI is revolutionising the financial services sector by automating
manual processes, improving fraud detection, and enabling better decision-making
through predictive analytics.


In [6]:
def detect_format(text):
    if len(text) < 280:
        return "tweet"
    elif "abstract" in text.lower() or "in this paper" in text.lower():
        return "academic"
    elif text.lower().startswith("dear") or "regards" in text.lower():
        return "email"
    else:
        return "blog"

In [7]:
def clean_output(text):
    lines = text.split("\n")
    seen = set()
    cleaned = []
    for line in lines:
        if line not in seen and line.strip():
            seen.add(line)
            cleaned.append(line)
    return "\n".join(cleaned)

In [10]:
pip install langchain openai transformers tiktoken faiss-cpu

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [12]:
!pip install langchain-community

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting langchain-community
  Downloading langchain_community-0.3.27-py3-none-any.whl.metadata (2.9 kB)
Collecting aiohttp<4.0.0,>=3.8.3 (from langchain-community)
  Downloading aiohttp-3.12.14-cp310-cp310-macosx_11_0_arm64.whl.metadata (7.6 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.10.1-py3-none-any.whl.metadata (3.4 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.1-py3-none-any.whl.metadata (9.4 kB)
Collecting aiohappyeyeballs>=2.5.0 (from aiohttp<4.0.0,>=3.8.3->langchain-community)
  Downloading aiohappyeyeballs-2.6.1-py3-none-any.whl.metadata (5.9 kB)
Collecting aiosignal>=1.4.0 (from aiohttp<4.0.0,>=3.8.3->langchain-community)
  Downloading aiosignal-1.4.0-py3-none-any.whl.metadata (3.7 kB)
Collecting frozenlist>=1.1.1 (from aiohttp

In [13]:
from langchain.llms import HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline

# Load Flan-T5 as LangChain-compatible model
model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
pipe = pipeline("text2text-generation", model=model, tokenizer=tokenizer)

llm = HuggingFacePipeline(pipeline=pipe)

Device set to use mps:0
  llm = HuggingFacePipeline(pipeline=pipe)


In [14]:
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

prompt_template = PromptTemplate(
    input_variables=["content"],
    template="Rewrite the following blog post into a 3-tweet thread. Each tweet should be concise, engaging, and capture key points:\n\n{content}"
)

chain = LLMChain(llm=llm, prompt=prompt_template)


  chain = LLMChain(llm=llm, prompt=prompt_template)


In [15]:
sample_text = """
AI is transforming financial services by automating manual tasks, enhancing fraud detection, and using predictive analytics to improve decisions.
"""

output = chain.run(content=sample_text)
print(output)


  output = chain.run(content=sample_text)


@Analyst_Boys - AI is transforming financial services


In [16]:
def get_template(task):
    templates = {
        "Blog to Tweet": "Rewrite the following blog post into a 3-tweet thread:\n\n{content}",
        "Informal to Formal": "Rewrite the following message using a formal tone:\n\n{content}",
        "Academic to LinkedIn": "Summarize this academic abstract for a general audience on LinkedIn:\n\n{content}",
    }
    return templates[task]

def build_chain(task: str):
    prompt = PromptTemplate(input_variables=["content"], template=get_template(task))
    return LLMChain(llm=llm, prompt=prompt)


In [1]:

import ipywidgets as widgets
from IPython.display import display, clear_output
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA

# Widgets
uploader = widgets.FileUpload(accept='.pdf', multiple=False)
display(uploader)

output_area = widgets.Output()

def on_upload(change):
    output_area.clear_output()
    for filename, fileinfo in uploader.value.items():
        with open(filename, 'wb') as f:
            f.write(fileinfo['content'])
        loader = PyPDFLoader(filename)
        docs = loader.load()
        splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
        docs = splitter.split_documents(docs)
        embeddings = HuggingFaceEmbeddings()
        db = FAISS.from_documents(docs, embeddings)
        retriever = db.as_retriever(search_kwargs={"k": 3})
        rag_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)

        # Summarization
        summary = rag_chain.run("Provide a concise summary of the document.")

        # Display
        with output_area:
            print("### Document Summary")
            print(summary)
            print("\n### Ask a question in the next cell by calling rag_chain.run('your question')")

        break

uploader.observe(on_upload, names='value')
display(output_area)


FileUpload(value=(), accept='.pdf', description='Upload')

Output()