In [1]:
import os
import sys

module_path = os.path.abspath(os.path.join('..', '..', 'news-bias-agent'))
if module_path not in sys.path:
    sys.path.append(module_path)

%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
from src.dataLoader import NewsDataLoader
from src.biasAnalyzer import BiasAnalyzer
from src.prompts import BIAS_ANALYSIS_SYSTEM_PROMPT
import time
from tqdm import tqdm

In [3]:
def pretty_print(result):
    data = result.model_dump() if hasattr(result, 'model_dump') else result.__dict__
    df = pd.DataFrame([data]).T
    df.columns = ['Analysis Result']
    display(df.style.set_properties(**{
        'text-align': 'left',
        'white-space': 'pre-wrap', 
        'word-wrap': 'break-word'
        }))

In [4]:
loader = NewsDataLoader(data_path=os.path.join('..','data', 'News_Category_Dataset_v3.json'))

------------------------------------------------------------------------------------------------------
#### Dataset

In [5]:
loader.data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 209527 entries, 0 to 209526
Data columns (total 6 columns):
 #   Column             Non-Null Count   Dtype         
---  ------             --------------   -----         
 0   link               209527 non-null  object        
 1   headline           209527 non-null  object        
 2   category           209527 non-null  object        
 3   short_description  209527 non-null  object        
 4   authors            209527 non-null  object        
 5   date               209527 non-null  datetime64[ns]
dtypes: datetime64[ns](1), object(5)
memory usage: 9.6+ MB


In [6]:
loader.data.head()

Unnamed: 0,link,headline,category,short_description,authors,date
0,https://www.huffpost.com/entry/covid-boosters-...,Over 4 Million Americans Roll Up Sleeves For O...,U.S. NEWS,Health experts said it is too early to predict...,"Carla K. Johnson, AP",2022-09-23
1,https://www.huffpost.com/entry/american-airlin...,"American Airlines Flyer Charged, Banned For Li...",U.S. NEWS,He was subdued by passengers and crew when he ...,Mary Papenfuss,2022-09-23
2,https://www.huffpost.com/entry/funniest-tweets...,23 Of The Funniest Tweets About Cats And Dogs ...,COMEDY,"""Until you have a dog you don't understand wha...",Elyse Wanshel,2022-09-23
3,https://www.huffpost.com/entry/funniest-parent...,The Funniest Tweets From Parents This Week (Se...,PARENTING,"""Accidentally put grown-up toothpaste on my to...",Caroline Bologna,2022-09-23
4,https://www.huffpost.com/entry/amy-cooper-lose...,Woman Who Called Cops On Black Bird-Watcher Lo...,U.S. NEWS,Amy Cooper accused investment firm Franklin Te...,Nina Golgowski,2022-09-22


In [7]:
loader.data["category"].unique()

array(['U.S. NEWS', 'COMEDY', 'PARENTING', 'WORLD NEWS', 'CULTURE & ARTS',
       'TECH', 'SPORTS', 'ENTERTAINMENT', 'POLITICS', 'WEIRD NEWS',
       'ENVIRONMENT', 'EDUCATION', 'CRIME', 'SCIENCE', 'WELLNESS',
       'BUSINESS', 'STYLE & BEAUTY', 'FOOD & DRINK', 'MEDIA',
       'QUEER VOICES', 'HOME & LIVING', 'WOMEN', 'BLACK VOICES', 'TRAVEL',
       'MONEY', 'RELIGION', 'LATINO VOICES', 'IMPACT', 'WEDDINGS',
       'COLLEGE', 'PARENTS', 'ARTS & CULTURE', 'STYLE', 'GREEN', 'TASTE',
       'HEALTHY LIVING', 'THE WORLDPOST', 'GOOD NEWS', 'WORLDPOST',
       'FIFTY', 'ARTS', 'DIVORCE'], dtype=object)

In [9]:
loader.data["category"].value_counts()

category
POLITICS          35602
WELLNESS          17945
ENTERTAINMENT     17362
TRAVEL             9900
STYLE & BEAUTY     9814
PARENTING          8791
HEALTHY LIVING     6694
QUEER VOICES       6347
FOOD & DRINK       6340
BUSINESS           5992
COMEDY             5400
SPORTS             5077
BLACK VOICES       4583
HOME & LIVING      4320
PARENTS            3955
THE WORLDPOST      3664
WEDDINGS           3653
WOMEN              3572
CRIME              3562
IMPACT             3484
DIVORCE            3426
WORLD NEWS         3299
MEDIA              2944
WEIRD NEWS         2777
GREEN              2622
WORLDPOST          2579
RELIGION           2577
STYLE              2254
SCIENCE            2206
TECH               2104
TASTE              2096
MONEY              1756
ARTS               1509
ENVIRONMENT        1444
FIFTY              1401
GOOD NEWS          1398
U.S. NEWS          1377
ARTS & CULTURE     1339
COLLEGE            1144
LATINO VOICES      1130
CULTURE & ARTS     1074
EDUCATI

------------------------------------------------------------------------------------------------------

In [10]:
analyzer = BiasAnalyzer(system_prompt=BIAS_ANALYSIS_SYSTEM_PROMPT, temperature=0.1)

In [12]:
test_df = loader.get_high_bias_categories(n=5)
articles = loader.to_dict_list(test_df)
articles

[{'headline': "Obama Savages GOP Candidates' Economic Plans: They 'Defy Logic'",
  'short_description': 'Burn.',
  'category': 'POLITICS'},
 {'headline': 'Sunday Roundup',
  'short_description': 'This was a week that saw the return of former leaders with less than successful results. In the World Cup, Spain fell 2-0 to Chile, knocking the defending champions out even before the upcoming knockout rounds. In an even worse reappearance, former Vice President Dick Cheney appeared on Fox News to defend his assertion that President Obama has "been so wrong about so much." But host Megyn Kelly scored a surprise knockout herself when she said, "History has proven that you got it wrong as well in Iraq, sir." As they say: \'Goooooooal!\' Meanwhile, on Wednesday, the Patent Office canceled several trademarks on the name "Washington Redskins." Though D.C.\'s football team hasn\'t changed its mascot yet, if they do, I\'m partial to renaming them the Washington Drones. Other suggestions that came my

In [13]:
result = analyzer.analyze_news(
    headline=articles[0]['headline'],
    description=articles[0]['short_description']
)
pretty_print(result)

Unnamed: 0,Analysis Result
sensationalism_score,8
emotional_charge,7
political_leaning,Left
sentiment,Negative
bias_indicators,"['Savages', 'Defy Logic', 'Burn.']"
framing,"The story is framed to highlight Barack Obama's strong and decisive criticism of GOP economic plans, presenting his arguments as irrefutable and emphasizing a 'take-down' narrative."
subjectivity_flag,True
reasoning,"The headline uses highly charged language like 'Savages' and 'Defy Logic' to describe Obama's critique, immediately signaling a strong negative stance against GOP plans. The accompanying description 'Burn.' further amplifies the sensationalism and emotional manipulation, framing the interaction as a decisive verbal victory for Obama. This language clearly indicates a left political leaning by amplifying a prominent Democratic figure's criticism of the Republican party."


In [13]:
result = analyzer.analyze_news(
    headline=articles[1]['headline'],
    description=articles[1]['short_description']
)
pretty_print(result)

Unnamed: 0,Analysis Result
sensationalism_score,2
emotional_charge,4
political_leaning,Neutral
sentiment,Negative
bias_indicators,"[Unpopular, fears, might convince even more young and healthy people]"
framing,"The story is framed around the concerns of a bipartisan group of governors regarding the negative consequences of repealing the individual mandate, specifically the potential for more young and healthy people to opt out."
subjectivity_flag,True
reasoning,"The headline uses the subjective term ""unpopular,"" and the description highlights the ""fears"" of governors regarding negative consequences if the mandate is removed, framing the story from a perspective that supports its endurance."
