# Business News Summarizer

## Importing dependencies

In [None]:
from dotenv import load_dotenv
import os
import requests
import feedparser
from bs4 import BeautifulSoup
import torch
from transformers import AutoTokenizer, AutoModel
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
from bs4 import BeautifulSoup

from orchestrator import Orchestrator
from sentence_transformers import SentenceTransformer

  from .autonotebook import tqdm as notebook_tqdm


## Model parameters

In [2]:
model_path = 'all-MiniLM-L6-v2'
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"

In [3]:
tokenizer = AutoTokenizer.from_pretrained("microsoft/codebert-base")
model = AutoModel.from_pretrained("microsoft/codebert-base")

In [4]:
model = model.to(device)

## Creating Dataframe

In [5]:
rss_url = "http://feeds.bbci.co.uk/news/business/rss.xml"
feed = feedparser.parse(rss_url)

articles = []

for entry in feed.entries:
    articles.append({
        'title': entry.title,
        'link': entry.link,
        'summary': entry.summary if 'summary' in entry else '',
        'published': entry.published if 'published' in entry else ''
    })

df_news = pd.DataFrame(articles)
df_news

Unnamed: 0,title,link,summary,published
0,Government to water down business rate rise fo...,https://www.bbc.com/news/articles/c8e57dexly1o...,The hospitality industry had called for a reth...,"Thu, 08 Jan 2026 19:10:16 GMT"
1,"Weight loss jabs affecting Greggs, boss says",https://www.bbc.com/news/articles/c20g11y4dywo...,"The Greggs boss says there is ""no doubt"" weigh...","Thu, 08 Jan 2026 15:38:19 GMT"
2,Tesco and M&S report strong Christmas food sales,https://www.bbc.com/news/articles/cp82ylmy6d1o...,"Both major retailers report strong food sales,...","Thu, 08 Jan 2026 13:42:32 GMT"
3,Inside the sub-zero lair of the world's most p...,https://www.bbc.com/news/articles/c62r6dvpl5ro...,Faisal Islam gets rare access to Willow - Goog...,"Thu, 08 Jan 2026 08:03:50 GMT"
4,Unions accuse McDonald's of 'repeated harassme...,https://www.bbc.com/news/articles/c2e1g17drr2o...,It follows a BBC investigation three years ago...,"Thu, 08 Jan 2026 00:00:14 GMT"
5,How tariff disruption will continue reshaping ...,https://www.bbc.com/news/articles/czejp3gep63o...,Trump's import levies are still changing the p...,"Thu, 08 Jan 2026 00:00:40 GMT"
6,Workers' rights reforms will cost billions les...,https://www.bbc.com/news/articles/c5yv6n536vno...,Firms implementing the new employment terms ar...,"Thu, 08 Jan 2026 11:07:03 GMT"
7,Trump calls for US military spending to rise m...,https://www.bbc.com/news/articles/cy59kxl2xwzo...,The president also called on defence firms to ...,"Thu, 08 Jan 2026 02:15:51 GMT"
8,Sportswear firm Castore defends price of footb...,https://www.bbc.com/news/articles/c1lzv94qnpyo...,"Tom Beahon, of sportswear firm Castore, says f...","Thu, 08 Jan 2026 09:38:59 GMT"
9,Trump backs ban on institutional investor home...,https://www.bbc.com/news/articles/c0lxz5wn2yzo...,Wall Street investors have bought thousands of...,"Wed, 07 Jan 2026 22:38:36 GMT"


## Embeddings

In [8]:
prompt = "How is the current situation in south america"


In [None]:
model = SentenceTransformer(model_path)

texts = (df_news['title'] + ". " + df_news['summary']).tolist()[:50]
embeddings = model.encode(texts, normalize_embeddings=True)

prompt_embedding = model.encode([prompt], normalize_embeddings=True)

In [19]:
similarities = cosine_similarity(prompt_embedding, embeddings)[0]
top5_idx = np.argsort(similarities)[-5:][::-1]

In [20]:
similarities

array([ 0.11018484,  0.01502993,  0.09605145,  0.05433956,  0.0796041 ,
        0.3774246 ,  0.14972924,  0.12906699, -0.02264721,  0.06129436,
       -0.06498737,  0.2653304 ,  0.12563786,  0.10118265,  0.0316577 ,
       -0.05084961,  0.14020541,  0.05566239,  0.3240686 ,  0.07037839,
        0.10019156,  0.09434243,  0.13297114,  0.10615736,  0.10574865,
        0.17209804,  0.03921305,  0.13106658,  0.20770521,  0.10458659,
        0.05737606,  0.11170243,  0.09220589,  0.13517046,  0.252737  ,
        0.17128195,  0.10019642,  0.00734501,  0.06129436,  0.18594104,
       -0.04254094,  0.18212104,  0.10246376,  0.12875223,  0.00711524,
        0.26278904,  0.15063566,  0.15017001,  0.19057325,  0.17551342],
      dtype=float32)

In [21]:
for idx in top5_idx:
    print(f"Title: {df_news['title'].iloc[idx]}")
    print(f"Similarity: {similarities[idx]:.4f}")
    print("---")

Title: How tariff disruption will continue reshaping the global economy in 2026
Similarity: 0.3774
---
Title: Trump says Venezuela will be 'turning over' up to 50 million barrels of oil to US
Similarity: 0.3241
---
Title: US will control Venezuela oil sales 'indefinitely', official says
Similarity: 0.2653
---
Title: How Milei's 'Thatcherite' economics divided his nation - but won over Trump
Similarity: 0.2628
---
Title: Why this month's inflation figure may be good news for you
Similarity: 0.2527
---


## Parsing data from chosed articles

In [22]:
def get_article_text(url):
    try:
        response = requests.get(url, timeout=10)
        soup = BeautifulSoup(response.text, 'html.parser')
        paragraphs = soup.find_all('p')
        text = ' '.join([p.get_text() for p in paragraphs])
        #TODO - implement removing useless words, adds etc
        return text.strip()
    except Exception as e:
        return f"Error: {e}"

In [23]:
top5_df = df_news.iloc[top5_idx].reset_index(drop=True)
top5_df['content'] = top5_df['link'].apply(get_article_text)
top5_df

articles_combined = [
    f"Title: {row['title']}\nDate: {row['published']}\nContent: {row['content']}"
    for _, row in top5_df.iterrows()
]

all_articles_text = "\n\n".join(articles_combined)


In [24]:
print(all_articles_text)

Title: How tariff disruption will continue reshaping the global economy in 2026
Date: Thu, 08 Jan 2026 00:00:40 GMT
Content: President Trump's favourite word is tariffs. He reminded the world of that in his pre-Christmas "address to the nation". With the world still unwrapping the tariffs "gift" from the first year of his second term in office, he said they were bringing jobs, higher wages and economic growth to the US. That is hotly contested. What is less debatable is that they've refashioned the global economy, and will continue to do so into 2026. The International Monetary Fund (IMF) says that although "the tariff shock is smaller than originally announced", it is a key reason why it now expects the rate of global economic growth to slow to 3.1% in 2026. A year ago, it predicted a 3.3% expansion this year. For the head of the IMF, Kristalina Georgieva, things are "better than we feared, worse than it needs to be". Speaking on a podcast recently she explained that growth had fallen

## Creating agents

In [25]:
orchestrator = Orchestrator(model='llama3')
orchestrator.execute(prompt, all_articles_text)


Based on the provided articles, here are some key points about the current situation in South America:

1. Argentina:
	* President Javier Milei's economic program has led to a decrease in inflation and an increase in foreign investment.
	* However, there is concern that his strategy of keeping inflation under control could unravel if Argentina can't pay its debts.
	* The country owes $20 billion of debt next year, which could be a challenge for the government.
2. Venezuela:
	* President Nicolas Maduro has been ousted by Donald Trump's administration, which sees this as a "decisive win".
	* This development is likely to have significant implications for regional politics and international trade.

It is important to note that these articles only provide limited information about the current situation in South America, and there may be other developments or issues that are not mentioned.
