# Business News Summarizer

## Importing dependencies

In [23]:
from dotenv import load_dotenv
import os
import requests
import feedparser
from bs4 import BeautifulSoup
import torch
from transformers import AutoTokenizer, AutoModel
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
from bs4 import BeautifulSoup

## Model parameters

In [4]:
model_path = 'microsoft/codebert-base'
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"

In [5]:
tokenizer = AutoTokenizer.from_pretrained("microsoft/codebert-base")
model = AutoModel.from_pretrained("microsoft/codebert-base")

In [6]:
model = model.to(device)

## Creating Dataframe

In [54]:
rss_url = "http://feeds.bbci.co.uk/news/business/rss.xml"
feed = feedparser.parse(rss_url)

articles = []

for entry in feed.entries:
    articles.append({
        'title': entry.title,
        'link': entry.link,
        'summary': entry.summary if 'summary' in entry else '',
        'published': entry.published if 'published' in entry else ''
    })

df_news = pd.DataFrame(articles)
df_news

Unnamed: 0,title,link,summary,published
0,Government to water down business rate rise fo...,https://www.bbc.com/news/articles/c8e57dexly1o...,The hospitality industry had called for a reth...,"Thu, 08 Jan 2026 16:21:46 GMT"
1,"Weight loss jabs affecting Greggs, boss says",https://www.bbc.com/news/articles/c20g11y4dywo...,"The Greggs boss says there is ""no doubt"" weigh...","Thu, 08 Jan 2026 15:38:19 GMT"
2,Tesco and M&S report strong Christmas food sales,https://www.bbc.com/news/articles/cp82ylmy6d1o...,"Both major retailers report strong food sales,...","Thu, 08 Jan 2026 13:42:32 GMT"
3,Inside the sub-zero lair of the world's most p...,https://www.bbc.com/news/articles/c62r6dvpl5ro...,Faisal Islam gets rare access to Willow - Goog...,"Thu, 08 Jan 2026 08:03:50 GMT"
4,Unions accuse McDonald's of 'repeated harassme...,https://www.bbc.com/news/articles/c2e1g17drr2o...,It follows a BBC investigation three years ago...,"Thu, 08 Jan 2026 00:00:14 GMT"
5,How tariff disruption will continue reshaping ...,https://www.bbc.com/news/articles/czejp3gep63o...,Trump's import levies are still changing the p...,"Thu, 08 Jan 2026 00:00:40 GMT"
6,Workers' rights reforms will cost billions les...,https://www.bbc.com/news/articles/c5yv6n536vno...,Firms implementing the new employment terms ar...,"Thu, 08 Jan 2026 11:07:03 GMT"
7,Trump calls for US military spending to rise m...,https://www.bbc.com/news/articles/cy59kxl2xwzo...,The president also called on defence firms to ...,"Thu, 08 Jan 2026 02:15:51 GMT"
8,Sportswear firm Castore defends price of footb...,https://www.bbc.com/news/articles/c1lzv94qnpyo...,"Tom Beahon, of sportswear firm Castore, says f...","Thu, 08 Jan 2026 09:38:59 GMT"
9,Trump backs ban on institutional investor home...,https://www.bbc.com/news/articles/c0lxz5wn2yzo...,Wall Street investors have bought thousands of...,"Wed, 07 Jan 2026 22:38:36 GMT"


## Embedding

In [45]:
titles = df_news['title'].tolist()[:50]  # 50 najnowszych tytułów
titles = df_news['title'].tolist()[:50]  # 50 najnowszych tytułów

In [46]:
with torch.no_grad():
    inputs = tokenizer(titles, padding=True, truncation=True, return_tensors="pt")
    inputs = {k: v.to(device) for k, v in inputs.items()}
    outputs = model(**inputs)
    # Najczęściej używa się wektora CLS jako embeddingu
    embeddings = outputs.last_hidden_state[:, 0, :].cpu().numpy()

print("Embedding shape:", embeddings.shape) 

Embedding shape: (50, 768)


## Embedding prompt

In [47]:
prompt = "How the stocks of Tesla will change"


In [48]:
with torch.no_grad():
    prompt_inputs = tokenizer([prompt], padding=True, truncation=True, return_tensors="pt")
    prompt_inputs = {k: v.to(device) for k, v in prompt_inputs.items()}
    prompt_outputs = model(**prompt_inputs)
    prompt_embedding = prompt_outputs.last_hidden_state[:, 0, :].cpu().numpy()

In [49]:
similarities = cosine_similarity(prompt_embedding, embeddings)[0]
top5_idx = np.argsort(similarities)[-5:][::-1]

In [50]:
for idx in top5_idx:
    print(f"Title: {df_news['title'].iloc[idx]}")
    print(f"Similarity: {similarities[idx]:.4f}")
    print("---")

Title: The showers and baths keeping data centre tech cool
Similarity: 0.9980
---
Title: How the new road safety measures could affect you
Similarity: 0.9980
---
Title: Meet the biggest heat pumps in the world
Similarity: 0.9979
---
Title: Nestle issues global recall of some baby formula products over toxin fears
Similarity: 0.9978
---
Title: The 'magical' blue flower changing farmers' fortunes in India
Similarity: 0.9978
---


## Parsing data from chosed articles

In [None]:
def get_article_text(url):
    try:
        response = requests.get(url, timeout=10)
        soup = BeautifulSoup(response.text, 'html.parser')
        paragraphs = soup.find_all('p')
        text = ' '.join([p.get_text() for p in paragraphs])
        #TODO - implement removing useless words, adds etc
        return text.strip()
    except Exception as e:
        return f"Error: {e}"

In [52]:
top5_df = df_news.iloc[top5_idx].reset_index(drop=True)
top5_df['content'] = top5_df['link'].apply(get_article_text)
top5_df


Unnamed: 0,title,link,summary,published,content
0,The showers and baths keeping data centre tech...,https://www.bbc.com/news/articles/cp8zd176516o...,Finding greener ways to keep giant new data ce...,"Tue, 23 Dec 2025 00:04:45 GMT",They work 24/7 at high speeds and get searingl...
1,How the new road safety measures could affect you,https://www.bbc.com/news/articles/ckgxy9p117no...,The government is proposing lowering alcohol l...,"Wed, 07 Jan 2026 15:37:07 GMT",The UK government has launched the first major...
2,Meet the biggest heat pumps in the world,https://www.bbc.com/news/articles/c17p44w87rno...,Across Europe huge heat pumps are being instal...,"Tue, 16 Dec 2025 00:06:44 GMT","The pipe that will supply the heat pump, drawi..."
3,Nestle issues global recall of some baby formu...,https://www.bbc.com/news/articles/cx2yvx8gnnpo...,The food giant discovered some batches of SMA ...,"Tue, 06 Jan 2026 08:43:47 GMT",Nestle has issued a global recall of some baby...
4,The 'magical' blue flower changing farmers' fo...,https://www.bbc.com/news/articles/clyxln49760o...,Indian farmers are turning to butterfly pea fl...,"Tue, 06 Jan 2026 00:01:52 GMT","""Until a few years ago, the butterfly pea flow..."


## Creating agents