In [None]:
!pip install langchain
!pip install requests
!pip install html2text
!pip install groq

In [1]:
from langchain.utilities import GoogleSerperAPIWrapper
from time import sleep
import requests
from html2text import HTML2Text
import keys
from groq import Groq
import warnings

warnings.filterwarnings("ignore", category=DeprecationWarning)
client = Groq(api_key=keys.GROQ_API_KEY)

In [2]:
def groq_call(text, system_content, temperature=0.0, max_tokens=1024):
    res = client.chat.completions.create(
        messages=[
            {
                "role": "system",
                "content": system_content
            },
            {
                "role": "user",
                "content": "Here is a news article to analyze: " + text,
            }
        ],
        model="llama3-70b-8192", #llama3-8b-8192, #llama3-70b-8192
        temperature=temperature,
        max_tokens=max_tokens
    )
    return res.choices[0].message.content


def find_links(event, num_results=1):
    search = GoogleSerperAPIWrapper(k=num_results, serper_api_key=keys.SERPER_API_KEY)
    results = search.results(f"{event}")
    if "topStories" in results and results["topStories"] != []:
        links = [results["topStories"][i]["link"] for i in range(num_results)]
    elif "organic" in results and results["organic"] != []:
        links = [results["organic"][i]["link"] for i in range(num_results)]
    else:
        links = []
    return links
    
    
def scrape_links(urls):
    news = []
    for url in urls:
        url = url.strip()
        print(f"Scraping website {url}...")
        r = requests.get(url)
        if r.status_code == 200:
            h = HTML2Text()
            h.ignore_links = True
            h.ignore_images = True
            text = h.handle(r.text)
            lines = text.split('\n')
            filtered_lines = [line for line in lines if '*' not in line]
            text = '\n'.join(filtered_lines)
            news.append(f"{{'url': '{url}', 'content': '{text}'}}")
            sleep(2)  
        else:
            print(f"HTTP request failed with {r.status_code}.")
    return news


def main(event, system_content_summary, system_content_unbiased):
    links = find_links(event, num_results=3)
    news = scrape_links(links)
    summary = []
    summary_unbiased = []
    for article in news:
        output = groq_call(article, system_content_summary, max_tokens=8192)
        summary.append(output)
        output = groq_call(article, system_content_unbiased, max_tokens=8192)
        summary_unbiased.append(output)
    return summary, summary_unbiased

In [3]:
system_content_summary = "You are a AI journalist tasked with analyzing news articles. Your task is to provide a summary of the article based on the given content."

system_content_summary_unbiased = "You are an AI impartial journalist analyzing the news for political bias. Write a politically unbiased summary of the article, focusing on the key points and facts without injecting personal opinions."

content_long = "Here are some typical signs of political bias in news:\nLoaded language: The use of emotionally charged or biased language to influence readers' opinions. This can include words or phrases that convey a particular viewpoint or judgment without presenting balanced information.\nSelective reporting: Choosing to report only certain aspects of a story while ignoring others that may provide context or alternative viewpoints. This can lead to a skewed representation of events.\nOmission of key information: Intentionally leaving out relevant facts or details that may contradict or weaken the narrative being presented. This can result in a one-sided portrayal of issues.\nSource selection: Relying heavily on sources that align with a particular political ideology or agenda, while marginalizing or ignoring dissenting voices. This can create a perception of bias by presenting a limited range of perspectives.\nFraming: Presenting information in a way that emphasizes one interpretation over others. This can involve highlighting certain aspects of a story while downplaying or minimizing others, influencing how readers perceive the issue.\nSensationalism: Prioritizing sensational or attention-grabbing stories over more substantive or nuanced coverage. This can lead to exaggerated or misleading portrayals of events.\nMisrepresentation: Distorting facts or events to fit a particular narrative or agenda. This can involve cherry-picking data, misquoting sources, or presenting information out of context.\nBiased headlines: Crafting headlines that convey a particular viewpoint or interpretation of a story before readers have had a chance to assess the content. This can influence readers' perceptions before they've read the article.\nOpinion masquerading as news: Blurring the line between opinion and factual reporting by interjecting personal bias or commentary into news articles. This can lead to a lack of objectivity in reporting.\nConsistency of bias: Patterns of consistently favoring one political viewpoint or party over others in news coverage, over time. This can indicate systemic bias within a news organization."

In [4]:
# Select the event you want to analyze
event = "Ukraine war"
summary, summary_unbiased = main(event, system_content_summary, system_content_summary_unbiased+content_long)

Scraping website https://www.cnn.com/2024/04/20/europe/russia-belgorod-impact-ukraine-war-intl-cmd/index.html...
Scraping website https://www.theguardian.com/world/2024/apr/20/ukraine-war-briefing-kyiv-claims-bomber-shot-down-at-308km-range-after-crash-in-russia...
Scraping website https://www.bbc.com/news/world-us-canada-68848277...


In [5]:
print(summary[0])

The article reports on the devastating impact of the war in Ukraine on the Russian city of Belgorod, located near the Ukrainian border. The city has been transformed into a "ghost town" with deserted streets, shuttered shops, and damaged buildings. The residents of Belgorod are living in fear, with daily shelling and drone attacks causing casualties and destruction. The city has become a key military hub for Russia's invading forces, and Ukraine has responded by targeting the region, leading to a significant escalation of violence.

The article highlights the struggles of the residents, who are trying to navigate an uncertain future amidst the war. Many have chosen to stay in the city despite the danger, with some opting to move further away from the border. The authorities have evacuated around 5,000 children to safer regions, but many residents feel neglected by the media, authorities, and the broader Russian population.

The article also mentions the rise in looting incidents in des

In [6]:
print(summary_unbiased[0])

Here is a politically unbiased summary of the article:

The article reports on the impact of the war in Ukraine on the Russian city of Belgorod, which is located near the Ukrainian border. The city has been subject to frequent shelling and drone attacks, causing damage to buildings, infrastructure, and daily life. Residents are living in fear, with many opting to stay indoors and avoiding public places. The city's streets are deserted, shops are closed, and essential services are disrupted.

The article cites reports from residents, including a volunteer who describes the city as "ghostly" and "lifeless." Another resident describes the fear and anxiety that accompanies daily life in Belgorod, where the sound of air raid sirens is a constant presence.

The article also mentions the efforts of local authorities to evacuate children from the region and restore order to the border territories. However, the situation remains dire, with looting and crime on the rise in abandoned areas.

The 

In [7]:
# Provide a list of URLs to analyze
links = ["https://www.nbcnews.com/news/asian-america/tom-cotton-backlash-tiktok-ceo-shou-chew-rcna136673"]
news = scrape_links(links)

Scraping website https://www.nbcnews.com/news/asian-america/tom-cotton-backlash-tiktok-ceo-shou-chew-rcna136673...


In [8]:
output = groq_call(news[0], system_content_summary, max_tokens=8192)
print(output)

Here is a summary of the article:

Senator Tom Cotton (R-Ark.) is facing backlash for repeatedly questioning TikTok CEO Shou Zi Chew about his citizenship and potential ties to the Chinese Communist Party during a Senate hearing on online child safety. Chew, who is Singaporean, repeatedly clarified his nationality and denied any affiliation with the Chinese Communist Party. Cotton's line of questioning has been criticized as racist and Sinophobic, with many on social media accusing him of perpetuating harmful stereotypes. Cotton defended his comments, stating that it was reasonable to investigate potential influence by the Chinese Communist Party. The hearing also featured CEOs from Meta, X, Discord, and Snap, who were questioned about their companies' positions on various pieces of legislation related to online safety.


In [11]:
output = groq_call(news[0], system_content_summary_unbiased+content_long, max_tokens=8192)
print(output)

Here is a politically unbiased summary of the article:

The article reports on a Senate hearing where Senator Tom Cotton (R-Ark.) questioned TikTok CEO Shou Zi Chew about his citizenship and potential ties to the Chinese Communist Party. Cotton repeatedly asked Chew about his residency, nationality, and relationship with the Chinese Communist Party, which sparked backlash on social media. Chew, who is Singaporean, denied any affiliation with the Chinese Communist Party. The exchange was criticized by some as racist and Sinophobic. Cotton defended his line of questioning, stating that it was reasonable to investigate potential influence by the Chinese Communist Party. The hearing was part of a broader discussion on online child safety and regulation of social media.

Key points:

* Senator Tom Cotton questioned TikTok CEO Shou Zi Chew about his citizenship and potential ties to the Chinese Communist Party.
* Chew denied any affiliation with the Chinese Communist Party and stated that he