In [1]:
import os
from dotenv import load_dotenv
from IPython.display import Markdown, display
from openai import OpenAI
from bs4 import BeautifulSoup
import requests
import json

In [11]:
# Initialize and constants

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

MODEL = 'gpt-5-nano'
openai = OpenAI()

# Step1 : The "Fact Extractor" 

In [2]:
def get_clickbait_titles(subreddit , n_top = 20):
    print(f"Scraping headlines from subreddit: {subreddit}...")

    url = f"https://old.reddit.com/r/{subreddit}/"
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }

    try:
        response = requests.get(url, headers=headers)
        if response.status_code != 200:
            print(f"Failed to retrieve content. Status code: {response.status_code}")
            return []

        soup = BeautifulSoup(response.text, 'html.parser')
        
        title_tags = soup.find_all('a', class_='title')
        
        titles = []
        for tag in title_tags:
            titles.append(tag.text)

        print(f"Completed Scraping headlines from subreddit: {subreddit}...")    
            
        return titles[:n_top]

    except Exception as e:
        print(f"An error occurred: {e}")
        return []


In [4]:
titles = get_clickbait_titles('nottheonion', n_top = 20)

Scraping headlines from subreddit: nottheonion...


In [3]:
system_prompt_to_extract_title = """
You are provided with a list of subreddit post titles. Your job is to read these titles and identify the click baity words used.
Clean the title off the click baity words, to get facts. 
And also assign a clickbait score from 0-10 (0- being the lowest clickbaity title and 10- being the most clickbaity). 

please respond in json 
{ "titles": 
[
            {"title":"title","facts":"[fact_1, fact_2]","cb_words":"[amazing , believe]" , "score":10}
            {"title":"title","facts":"[fact_1, fact_2]","cb_words":"[none]" , "score":0}
]
}
"""

In [4]:
def get_non_political_titles_from_subreddit(subreddit):
    user_prompt_to_generate = f"""
Here is all the scrapped data from {subreddit} - 
Please look at these titles and get all titles that are non political. 
respond in json. 
"""

    text = get_clickbait_titles(subreddit)
    user_prompt_to_generate += "\n".join(text)
    return user_prompt_to_generate
    

In [5]:

def get_non_political_titles_clickbait_scores(subreddit):

    response = openai.chat.completions.create(
        model = MODEL, 
        messages = [
            {"role" : "system" , "content": system_prompt_to_extract_title},
            {"role" : "user" , "content" :get_non_political_titles_from_subreddit(subreddit) }
        ], 
        response_format={"type":"json_object"}
    )

    result = response.choices[0].message.content

    print('Completed getting the clickbait scores and facts for the titles.')

    titles = json.loads(result)

    return titles



In [9]:
result = get_non_political_titles_clickbait_scores('savedyouaclick')

Scraping headlines from subreddit: savedyouaclick...


In [20]:
for titles in result['titles'] :
        facts  = titles['facts']
        print(facts)

[Cause of death: pulmonary embolism, Underlying cancer being treated since March]
[Wegovy pill introduced, advertisement]
[TomTom maps article appears to be paid content, not clearly disclosed]
[Adult joke in Disney's Muppets Show, Kermit says we're still working out a few kinks; Sabrina Carpenter comments that she likes a kink too]
[New mom hires Amish nanny, Amish nanny plays hymns on mom's electric keyboard]
[Vispa Limited will disconnect service, UK broadband shutdown]
[Robbie Williams fans warned before Liverpool concert, cold weather could affect queuing]
[Target policy requires employees to smile and make eye contact within 10 feet of customers, policy aims to be welcoming]
[Severance Season 3 release date has bad news, release likely late 2027]
[Amazon pulled Melania from Oregon theater, marquees reference Does Melania wear Prada? and To defeat your enemy, you must know them]
[Disney Retconning The Last Jedi after fan backlash, later works fill gaps in Luke Skywalker's story]
[

# Step 2:  The "Factual jornalist"

In [6]:
system_prompt_to_make_factual_summary = """
You are provided with set of facts. Your job is to make a neutral sounding one or two line summary out of these facts.
Please make the summary as least clickbaity as possible.
"""

def get_user_prompt_to_make_factual_summary(facts):
    user_prompt_to_make_factual_summary = f"""
        Here are some facts - {facts}. Please generate summary for each set of facts. 
        """
    return user_prompt_to_make_factual_summary


def make_factual_summary(subreddit):
    titles_result = get_non_political_titles_clickbait_scores(subreddit)
    print('Generating factual summary')
    for titles in titles_result['titles'] :
        facts  = titles['facts']


        response = openai.chat.completions.create(
            model = MODEL , 
            messages=[
                {"role":"system" , "content":system_prompt_to_make_factual_summary},
                {"role":"user" , "content":get_user_prompt_to_make_factual_summary(facts)}
            ], 

        )

        titles['anti_hype_summary'] = response.choices[0].message.content
        
    print('Completed generating factual summary')

    return titles_result
        



In [None]:
clean = make_factual_summary('savedyouaclick')

print(clean)

Scraping headlines from subreddit: savedyouaclick...


# Step 3: LLM as judge and display as a newspaper

In [7]:
newspaper_system_prompt = """
You are a news paper editor assistant who is given subreddit titles, facts derived from the title 
, summary , cb_words and clickbait score for each title. 
Your job is to design a small newspaper the has these titles and summaries as articles.
Also add a p.S. for each article, compare the clickbaity title with the factual summary 
and give a small one liner on whether the summary misses any key facts? And also give a clickbait score to entire article 0-10.
Please generate this as markdown without code blocks.
"""

In [13]:
def get_newspaper_user_prompt(subreddit):
    print('Generating newspaper for subreddit: ' + subreddit)
    user_prompt = f"""
    You are looking at top subreddit posts called: {subreddit}
    Here are its titles, the facts derived from the titles, the anti-hype summaries, the clickbaity words and clickbait scores;
    use this information to build a short newspaper of the subreddit in markdown without code blocks.\n\n
    """
    factual_summary = make_factual_summary(subreddit)
    user_prompt += json.dumps(factual_summary, indent=2)
    return user_prompt

In [14]:
def create_newspaper_for_subreddit(subreddit):
    
    response = openai.chat.completions.create(
        model = "gpt-4.1-mini" , 
        messages=[
            {"role":"system" , "content":newspaper_system_prompt},
            {"role":"user" , "content":get_newspaper_user_prompt(subreddit)}
        ], 

    )

    newspaper = response.choices[0].message.content

    display(Markdown(newspaper))

In [15]:
create_newspaper_for_subreddit("savedyouaclick")

Generating newspaper for subreddit: savedyouaclick
Scraping headlines from subreddit: savedyouaclick...
Completed Scraping headlines from subreddit: savedyouaclick...
Completed getting the clickbait scores and facts for the titles.
Generating factual summary
Completed generating factual summary
Generating factual summary
Completed generating factual summary
Generating factual summary
Completed generating factual summary
Generating factual summary
Completed generating factual summary
Generating factual summary
Completed generating factual summary
Generating factual summary
Completed generating factual summary
Generating factual summary
Completed generating factual summary
Generating factual summary
Completed generating factual summary
Generating factual summary
Completed generating factual summary
Generating factual summary
Completed generating factual summary
Generating factual summary
Completed generating factual summary
Generating factual summary
Completed generating factual summary


# SavedYouAClick Daily  
*Your concise source for news without the fluff*  

---

## Catherine O'Hara's Cause of Death Revealed?  
There is no credible report that Catherine O’Hara has died. Hypothetically, if true, the cause of death would be a pulmonary embolism linked to an underlying cancer for which she was receiving treatment since March. However, this report remains unverified.  

**p.S.** The title reveals a dramatic cause of death, but the summary clarifies there’s no actual death reported. The summary omits the urgency created by the title, rating a clickbait score of **4/10** for bold but misleading phrasing.

---

## 5 Creative Twists to Upgrade Your Scrambled Eggs  
Scrambled eggs don’t have to be boring. Here are five creative variations to spice up your usual breakfast routine. From adding herbs to mixing cheeses, these ideas can elevate your morning plate without complication.  

**p.S.** The title's promise of upgrading breakfast is fully met by the straightforward summary. No key facts missed here. Clickbait score: **7/10** for engaging suggestions without overpromising.

---

## Not Going Back to Google Maps: The TomTom Experience  
The review covers TomTom maps, but the article appears to be sponsored content, which may influence its objectivity. Users are advised to consider this when weighing the recommendation.  

**p.S.** The title suggests a firm personal decision, but the summary dampens enthusiasm by exposing potential bias. Some key skeptical facts are included. Clickbait score: **5/10** for a half-persuasive tone.

---

## Parents Question Adult Joke in Disney’s New Muppets Show  
Disney’s new Muppets show features a joke with adult innuendo: Kermit mentions "working out a few kinks," to which host Sabrina Carpenter replies she likes "a kink too." This has raised some concerns among parents.  

**p.S.** The title implies widespread controversy, but the summary sticks to the facts with less alarm. It captures the gist without exaggeration. Clickbait score: **4/10** due to mild sensationalism.

---

## New Mom Surprised by Amish Nanny’s Day One  
A new mother hired an Amish nanny who, on her first day, played hymns using the family’s electric keyboard, surprising the household.  

**p.S.** The title hints at shock but the summary softens the reaction to a simple fact. The key detail of hymns on an electric keyboard is present. Clickbait score: **5/10** due to wordplay on surprise.

---

## UK Broadband Provider Vispa Limited to Disconnect Service  
Vispa Limited, a UK broadband company, is shutting down with service disconnection forthcoming for its customers.  

**p.S.** The title is direct and factual, matched by the summary with no missing information. Clickbait score: **3/10** — straightforward news.

---

## Robbie Williams Fans Warned About Cold at Liverpool Gig  
Concert-goers at Robbie Williams’ Liverpool show were advised not to queue too early due to cold temperatures expected on the day of the event.  

**p.S.** The cautionary note in the title aligns with the summary; no additional facts omitted. Clickbait score: **4/10** for mild hype on weather warning.

---

## Target’s New Policy Making Some Shoppers Uncomfortable  
Target has implemented a policy requiring employees to smile, make eye contact, wave, and use welcoming body language when within 10 feet of customers. This change has made some shoppers feel uneasy.  

**p.S.** The title suggests shopper discomfort but does not overstate it; the summary describes the policy clearly. No significant details are missed. Clickbait score: **3/10**.

---

## 'Severance' Season 3 Won’t Air Until Late 2027  
Fans of the show *Severance* will have to wait a while longer, as season 3 is now unlikely to be released until late 2027.  

**p.S.** The title’s "very bad news" tone is confirmed by the summary’s straightforward timeline. No missing facts here. Clickbait score: **7/10** for emotional framing.

---

## Amazon Pulls ‘Melania’ from Oregon Theater Amid Marquee Messages  
Amazon has withdrawn the theatrical production *Melania* from an Oregon theater. Prior to the removal, marquees featured phrases like “Does Melania wear Prada?” and “To defeat your enemy, you must know them.”  

**p.S.** The title’s claim of an “unspeakable act” is not factually supported in the summary, which only notes promotional strategies and the pull decision. Key critical context missing. Clickbait score: **6/10**.

---

## Disney Denies Retconning ‘The Last Jedi’ After Star Wars Backlash  
Despite rumors, Disney is not officially rewriting *The Last Jedi*. Instead, later *Star Wars* works have expanded on Luke Skywalker’s story beyond *Return of the Jedi*, filling gaps for fans.  

**p.S.** The title misleads with “officially retconning,” but the summary clarifies it’s about narrative expansion, not erasure. Some key factual counterpoints included. Clickbait score: **6/10**.

---

## Achieve Home Ownership with Just 3% Down Payment  
It is possible to buy a home by putting down only 3% of the purchase price, making ownership more accessible for many people.  

**p.S.** The title uses casual slang (“TFW”) but aligns cleanly with the summary’s fact. No significant omissions. Clickbait score: **8/10** for engaging but accurate phrasing.

---

## Derby Mum Finds Dust and Dirt Under Son’s Carpet  
In Derby, a mother removed her son’s bedroom carpet and was surprised to find a lot of dust and dirt had accumulated beneath it.  

**p.S.** The title’s dramatic tone about “what pours out” contrasts with the very ordinary fact of dust buildup. The summary delivers the reality simply. Clickbait score: **4/10**.

---

*End of issue. Stay tuned for more saved clicks and truth in headlines.*