# Business News Summarizer

## Importing dependencies

In [1]:
from dotenv import load_dotenv
import os
import requests
import feedparser
from bs4 import BeautifulSoup
import torch
from transformers import AutoTokenizer, AutoModel
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
from bs4 import BeautifulSoup

from orchestrator import Orchestrator
from sentence_transformers import SentenceTransformer

  from .autonotebook import tqdm as notebook_tqdm


## Model parameters

In [2]:
model_path = 'all-MiniLM-L6-v2'
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"

In [3]:
tokenizer = AutoTokenizer.from_pretrained("microsoft/codebert-base")
model = AutoModel.from_pretrained("microsoft/codebert-base")

In [4]:
model = model.to(device)

## Creating Dataframe

In [5]:
rss_url = "http://feeds.bbci.co.uk/news/business/rss.xml"
feed = feedparser.parse(rss_url)

articles = []

for entry in feed.entries:
    articles.append({
        'title': entry.title,
        'link': entry.link,
        'summary': entry.summary if 'summary' in entry else '',
        'published': entry.published if 'published' in entry else ''
    })

df_news = pd.DataFrame(articles)
df_news

Unnamed: 0,title,link,summary,published
0,'Unprecedented' numbers seeking debt help post...,https://www.bbc.com/news/articles/c0er4n1zelro...,Debt charities say that calls have risen compa...,"Sat, 10 Jan 2026 00:08:52 GMT"
1,Musk says X outcry is 'excuse for censorship',https://www.bbc.com/news/articles/ce3kqzepp5zo...,The government is urging Ofcom to use all its ...,"Sat, 10 Jan 2026 11:41:06 GMT"
2,'Out of stock': What went wrong at luxury reta...,https://www.bbc.com/news/articles/czdq1z73pv9o...,"Saks Global, which owns Saks Fifth Avenue and ...","Sat, 10 Jan 2026 03:13:16 GMT"
3,"Trump seeks $100bn for Venezuela oil, but Exxo...",https://www.bbc.com/news/articles/c205dx61x76o...,"Oil executives expressed caution, with one exe...","Sat, 10 Jan 2026 20:28:22 GMT"
4,EU reaches South America trade deal after 25 y...,https://www.bbc.com/news/articles/ceqzj49gny4o...,"Some farmers in Europe opposed the deal, but t...","Fri, 09 Jan 2026 19:35:38 GMT"
5,Excel: The software that's hard to quit,https://www.bbc.com/news/articles/cwyxkzjpp87o...,Companies are trying to wean staff off Excel s...,"Fri, 09 Jan 2026 00:07:59 GMT"
6,Businesses call to be included in any pub rate...,https://www.bbc.com/news/articles/cp80l2ndz5yo...,"Shops, pharmacies and music venues want any cl...","Fri, 09 Jan 2026 11:21:03 GMT"
7,US calls Argentina peso bet a 'homerun deal',https://www.bbc.com/news/articles/cwyngv1keq7o...,The US Treasury Secretary said the US no longe...,"Fri, 09 Jan 2026 17:39:08 GMT"
8,US job creation in 2025 slows to weakest since...,https://www.bbc.com/news/articles/c0r4zd29n5no...,Employment growth for the world's largest econ...,"Fri, 09 Jan 2026 18:55:43 GMT"
9,"Weight loss jabs affecting Greggs, boss says",https://www.bbc.com/news/articles/c20g11y4dywo...,"The Greggs boss says there is ""no doubt"" weigh...","Thu, 08 Jan 2026 15:38:19 GMT"


## Embeddings

In [26]:
prompt = "How will the inflation change in 2026"


In [27]:
model = SentenceTransformer(model_path)

texts = (df_news['title'] + ". " + df_news['summary']).tolist()[:50]
embeddings = model.encode(texts, normalize_embeddings=True)

prompt_embedding = model.encode([prompt], normalize_embeddings=True)

In [28]:
similarities = cosine_similarity(prompt_embedding, embeddings)[0]
top5_idx = np.argsort(similarities)[-5:][::-1]

In [29]:
similarities

array([ 0.27403015,  0.07234748,  0.0881452 ,  0.16258731,  0.05443028,
        0.0944892 ,  0.1263816 ,  0.18139896,  0.40218782,  0.00206407,
        0.1330554 ,  0.1453911 , -0.01041401,  0.07674169,  0.23151693,
        0.25594574,  0.05666828,  0.30038512,  0.17357102, -0.0214565 ,
        0.20833832,  0.15068904,  0.26749408,  0.05575768,  0.07293187,
        0.09301247,  0.0895467 ,  0.5267395 ,  0.15982535,  0.1034236 ,
       -0.00189881,  0.07611743,  0.14307538,  0.14778556,  0.15880266,
        0.5124323 ,  0.14388642,  0.27403015,  0.13140501,  0.1191556 ,
        0.09606753,  0.17357102,  0.31620955,  0.2388626 ,  0.24402219,
        0.20513324,  0.19724783,  0.13140501,  0.09348457,  0.17975032],
      dtype=float32)

In [30]:
for idx in top5_idx:
    print(f"Title: {df_news['title'].iloc[idx]}")
    print(f"Similarity: {similarities[idx]:.4f}")
    print("---")

Title: How tariff disruption will continue reshaping the global economy in 2026
Similarity: 0.5267
---
Title: Why this month's inflation figure may be good news for you
Similarity: 0.5124
---
Title: US job creation in 2025 slows to weakest since Covid
Similarity: 0.4022
---
Title: Budget 2025: What's the best and worst that could happen for Labour?
Similarity: 0.3162
---
Title: Government to water down business rate rise for pubs
Similarity: 0.3004
---


## Parsing data from chosed articles

In [31]:
def get_article_text(url):
    try:
        response = requests.get(url, timeout=10)
        soup = BeautifulSoup(response.text, 'html.parser')
        paragraphs = soup.find_all('p')
        text = ' '.join([p.get_text() for p in paragraphs])
        #TODO - implement removing useless words, adds etc
        return text.strip()
    except Exception as e:
        return f"Error: {e}"

In [32]:
top5_df = df_news.iloc[top5_idx].reset_index(drop=True)
top5_df['content'] = top5_df['link'].apply(get_article_text)
top5_df

articles_combined = [
    f"Title: {row['title']}\nDate: {row['published']}\nContent: {row['content']}"
    for _, row in top5_df.iterrows()
]

all_articles_text = "\n\n".join(articles_combined)


In [33]:
print(all_articles_text)

Title: How tariff disruption will continue reshaping the global economy in 2026
Date: Thu, 08 Jan 2026 00:00:40 GMT
Content: President Trump's favourite word is tariffs. He reminded the world of that in his pre-Christmas "address to the nation". With the world still unwrapping the tariffs "gift" from the first year of his second term in office, he said they were bringing jobs, higher wages and economic growth to the US. That is hotly contested. What is less debatable is that they've refashioned the global economy, and will continue to do so into 2026. The International Monetary Fund (IMF) says that although "the tariff shock is smaller than originally announced", it is a key reason why it now expects the rate of global economic growth to slow to 3.1% in 2026. A year ago, it predicted a 3.3% expansion this year. For the head of the IMF, Kristalina Georgieva, things are "better than we feared, worse than it needs to be". Speaking on a podcast recently she explained that growth had fallen

## Creating agents

In [34]:
load_dotenv()
api_key = os.getenv("GOOGLE_API_KEY")

In [35]:
orchestrator = Orchestrator(model='gemini-3-flash-preview', client='google', api_key=api_key)

orchestrator.execute(prompt, all_articles_text)


Based on the provided articles, inflation in 2026 is expected to be influenced by several conflicting factors:

**United Kingdom**
*   **General Trend:** In late 2025, UK inflation stood at 3.2%, which is above the Bank of England's 2% target. However, analysts note clear signs that prices are rising at a slower rate, and the Bank of England forecasts that inflation will move "gradually south" (decrease) during 2026.
*   **Upward Pressures:** The 2025 Budget was expected to result in higher inflation than previously anticipated. Additionally, businesses like pubs have faced "astronomical" energy and staffing costs.
*   **Downward Pressures:** Food price increases have begun to slow, with items like olive oil dropping in price due to recovered harvests.

**United States**
*   **Tariff Impact:** Senior economist Aditya Bhave (Bank of America) estimates that tariffs have added between 0.3% and 0.5% to US inflation, which was 2.7% in November 2025. He suggests the "full impact" of these ta