<a href="https://colab.research.google.com/github/moulaali/colabs/blob/main/openai_lang_chain.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Dependencies

In [None]:
!pip install langchain
!pip install openai
!pip install langchain_decorators

#Auth Setup

In [None]:
from google.colab import userdata
import os
os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')

# Checker Chain : Self fact checking

In [None]:
# Interestingly, this returns different results every time
from langchain.chains import LLMCheckerChain
from langchain.llms import OpenAI
llm = OpenAI(temperature=0.7)
text = "What type of mammal lays the biggest eggs?"
checker_chain = LLMCheckerChain.from_llm(llm, verbose=True)
checker_chain.run(text)



[1m> Entering new LLMCheckerChain chain...[0m


[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


' Based on the information provided, the platypus is the type of mammal that lays the biggest eggs.'

# Summarization of webpage with prompt

In [None]:
# html parsing
!pip install requests beautifulsoup4

In [None]:
import requests
from bs4 import BeautifulSoup

def get_url_body(url):
  response = requests.get(url)
  if response.status_code == 200:
      soup = BeautifulSoup(response.text, 'html.parser')

      # Extract the body text
      body_text = soup.body.get_text(separator=' ', strip=True)

      return body_text
  else:
      print(f"Failed to retrieve the webpage. Status code: {response.status_code}")
      return "failure"

In [None]:
url_txt = get_url_body(url="https://abcnews.go.com/Politics/tiktok-sues-federal-government-potential-us-ban/story?id=109994231")
url_txt

In [None]:
from langchain_decorators import llm_prompt
@llm_prompt
def summarize(text:str, length="short") -> str:
    """
    Summarize this text in {length} sentences:
    {text}
    """
    return



In [None]:
summary = summarize(text=url_txt, length=100)
summary

# Chain of Density

In [None]:
template = """Article: {text}
You will generate increasingly concise, entity-dense summaries of the above article.
Repeat the following 2 steps 5 times.
Step 1. Identify 1-3 informative entities (";" delimited) from the article which are missing from the previously generated summary.
Step 2. Write a new, denser summary of identical length which covers every entity and detail from the previous summary plus the missing entities.
A missing entity is:
- relevant to the main story,
- specific yet concise (5 words or fewer),
- novel (not in the previous summary),
- faithful (present in the article),
- anywhere (can be located anywhere in the article).
Guidelines:
- The first summary should be long (4-5 sentences, ~80 words) yet highly non-specific, containing little information beyond the entities marked as missing. Use overly verbose language and fillers (e.g., "this article discusses") to reach ~80 words.
- Make every word count: rewrite the previous summary to improve flow and make space for additional entities.
- Make space with fusion, compression, and removal of uninformative phrases like "the article discusses".
- The summaries should become highly dense and concise yet self-contained, i.e., easily understood without the article.
- Missing entities can appear anywhere in the new summary.
- Never drop entities from the previous summary. If space cannot be made, add fewer new entities.
Remember, use the exact same number of words for each summary.
Answer in JSON. The JSON should be a list (length 5) of dictionaries whose keys are "Missing_Entities" and "Denser_Summary".
"""
summary = llm(template.format(text=url_txt))
summary