Sure, I can help you with that. Here's a step-by-step guide along with Python code to implement your project:

Step 1: Retrieve the 10 most recent news articles
You can use web scraping libraries like BeautifulSoup or Scrapy to extract the news articles from the specified website.

In [2]:
import requests
from bs4 import BeautifulSoup

def get_recent_articles(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    articles = soup.find_all('article')[:10]  # Assuming articles are wrapped in <article> tags
    return articles

articles = get_recent_articles("https://www.mediapool.bg/news/")


Step 2: Summarize each article using BgGPT
You can use the OpenAI API to summarize each article. Make sure you have your API key.

In [3]:
import openai

openai.api_key = 'your-api-key'

def summarize_article(article_text):
    summary = openai.Completion.create(
        engine="text-davinci-003",
        prompt=article_text,
        max_tokens=150,
        temperature=0.7
    )
    return summary.choices[0].text.strip()

summaries = []
for article in articles:
    article_text = article.find('div', class_='article-text').text  # Assuming article text is inside a div with class 'article-text'
    summary = summarize_article(article_text)
    summaries.append(summary)


Step 3: Present the articles in Streamlit GUI
You can use Streamlit to create a user interface to display the articles and summaries.

In [4]:
import streamlit as st

def display_articles_with_summaries(articles, summaries):
    for i, article in enumerate(articles):
        st.title(f"Article {i+1}")
        st.write(f"Title: {article.find('h2').text}")  # Assuming article title is inside an h2 tag
        st.write(f"Summary: {summaries[i]}")
        st.write(f"Read more: {article.find('a')['href']}")  # Assuming article link is inside an <a> tag
        st.write("----")

display_articles_with_summaries(articles, summaries)


Step 4: Retrieve articles periodically
You can use a scheduler like schedule to retrieve articles at specific times.

Step 5: Store articles
You can store articles and their summaries in SQLite database or in files.

In [6]:
import sqlite3

conn = sqlite3.connect('articles.db')
c = conn.cursor()

# Create table
c.execute('''CREATE TABLE IF NOT EXISTS articles
             (id INTEGER PRIMARY KEY, title TEXT, summary TEXT, link TEXT)''')

# Insert data
for i, (article, summary) in enumerate(zip(articles, summaries)):
    title = article.find('h2').text
    link = article.find('a')['href']
    c.execute("INSERT INTO articles (id, title, summary, link) VALUES (?, ?, ?, ?)", (i+1, title, summary, link))

conn.commit()
conn.close()


In [8]:
import os
from datetime import datetime

# Assume 'source' is the name of the news source, e.g., 'NYTimes'
source = "NYTimes"

# Format current time as a string for the folder name, e.g., '20240413_0750'
time_of_summarization = datetime.now().strftime('%Y%m%d_%H%M')

# Create a folder name using the current time and source
folder_name = f"articles_{time_of_summarization}_{source}"

# Ensure the directory does not already exist to avoid overwriting files
if not os.path.exists(folder_name):
    os.makedirs(folder_name)

# Example data to simulate the environment for demonstration
articles = ["Article 1 content", "Article 2 content"]
summaries = ["Summary 1 content", "Summary 2 content"]

# Save each article and summary as a separate file
for i, (article, summary) in enumerate(zip(articles, summaries)):
    article_file_path = os.path.join(folder_name, f"article_{i+1}.txt")
    summary_file_path = os.path.join(folder_name, f"summary_{i+1}.txt")
    
    with open(article_file_path, "w") as f:
        f.write(article)
    with open(summary_file_path, "w") as f:
        f.write(summary)
