# Web Content Aggregator(web scrapper)

Create a web content aggregator that allows users to input URLs of their favorite news websites, blogs, or information sources. The aggregator will scrape the content from these URLs and present it in a user-friendly interface, allowing users to quickly access and read the latest content from various sources in one place.

In [1]:
import tkinter as tk
import requests
from bs4 import BeautifulSoup

# Function to scrape data from the website
def scrape_data():
    url = url_input.get()
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    title = soup.find('title').get_text()
    paragraphs = soup.find_all('p')
    data = {
        'title': title,
        'paragraphs': []
    }
    for paragraph in paragraphs:
        data['paragraphs'].append(paragraph.get_text())
    # Display the scraped data in the GUI
    title_label.config(text=data['title'])
    paragraphs_text.delete('1.0', tk.END)
    for paragraph in data['paragraphs']:
        paragraphs_text.insert(tk.END, paragraph + '\n')

# Create the GUI
root = tk.Tk()
root.title('Web Scraper')

root.geometry("1200x600")

# URL input field
url_label = tk.Label(root, text='Website URL:')
url_label.pack()
url_input = tk.Entry(root, width=50)
url_input.pack()

# Button to start scraping
scrape_button = tk.Button(root, text='Scrape', command=scrape_data)
scrape_button.pack()

# Title label
title_label = tk.Label(root, text='Title')
title_label.pack()

# Text area to display the scraped paragraphs
paragraphs_label = tk.Label(root, text='Paragraphs')
paragraphs_label.pack()
paragraphs_text = tk.Text(root,width=120, height=25)
paragraphs_text.pack()

# Start the GUI
root.mainloop()


## Improved by adding error handling

In [2]:
import tkinter as tk
import requests
from bs4 import BeautifulSoup
import json
import csv

# Function to scrape data from the website
def scrape_data():
    url = url_input.get()
    try:
        response = requests.get(url)
        response.raise_for_status()
    except requests.exceptions.RequestException as e:
        message = f"Error: {e}"
        error_label.config(text=message)
        return
    soup = BeautifulSoup(response.content, 'html.parser')
    title = soup.find('title').get_text()
    paragraphs = soup.find_all('p', {'class': 'content'})
    data = {
        'title': title,
        'paragraphs': []
    }
    for paragraph in paragraphs:
        data['paragraphs'].append(paragraph.get_text())
    # Display the scraped data in the GUI
    title_label.config(text=data['title'])
    paragraphs_text.delete('1.0', tk.END)
    for paragraph in data['paragraphs']:
        paragraphs_text.insert(tk.END, paragraph + '\n')
    # Store the data in JSON and CSV formats
    filename = 'scraped_data'
    with open(f"{filename}.json", 'w') as f:
        json.dump(data, f, indent=4)
    with open(f"{filename}.csv", 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(['Title', 'Paragraph'])
        for paragraph in data['paragraphs']:
            writer.writerow([data['title'], paragraph])

root = tk.Tk()
root.title('Web Scraper')

root.geometry("900x500")

url_label = tk.Label(root, text='Website URL:')
url_label.pack()
url_input = tk.Entry(root, width=50)
url_input.pack()

scrape_button = tk.Button(root, text='Scrape', command=scrape_data)
scrape_button.pack()

# Error message label
error_label = tk.Label(root, fg='red')
error_label.pack()

title_label = tk.Label(root, text='Title')
title_label.pack()

# Text area to display the scraped paragraphs
paragraphs_label = tk.Label(root, text='Paragraphs')
paragraphs_label.pack()
paragraphs_text = tk.Text(root, width=100, height=20)
paragraphs_text.pack()

root.mainloop()
