# Web Scraper to Collect Data and Generate Brochure
This notebook demonstrates how to create a web scraper tool that collects website data and generates a brochure. It uses Python libraries like `requests`, `BeautifulSoup`, and `FPDF`.

In [ ]:
# Import required libraries
import requests
from bs4 import BeautifulSoup
from fpdf import FPDF


## Step 1: Scrape Website Data
We will begin by scraping the data from a website, extracting its title and paragraphs.

In [ ]:
# Function to scrape the website
def scrape_website(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    title = soup.title.string if soup.title else 'No Title Found'
    paragraphs = [p.get_text() for p in soup.find_all('p')]
    return title, paragraphs

# Example URL
url = 'https://example.com'  # Replace with the website URL
title, paragraphs = scrape_website(url)

# Print scraped data
print(f'Title: {title}')
for para in paragraphs:
    print(para)

## Step 2: Process Data for Brochure
Now we will process the data by cleaning the paragraphs and preparing them for the brochure.

In [ ]:
# Function to process the data
def process_data(title, paragraphs):
    processed_paragraphs = [para.strip() for para in paragraphs if len(para.strip()) > 50]
    return {
        'title': title,
        'paragraphs': processed_paragraphs
    }

# Example processing
processed_data = process_data(title, paragraphs)
print(processed_data)

## Step 3: Create a PDF Brochure
We will use `fpdf` to format the processed data into a PDF brochure.

In [ ]:
# Function to create the brochure
def create_brochure(data, filename='brochure.pdf'):
    pdf = FPDF()
    pdf.set_auto_page_break(auto=True, margin=15)
    pdf.add_page()
    pdf.set_font('Arial', 'B', 16)
    pdf.cell(200, 10, txt=data['title'], ln=True, align='C')
    pdf.ln(10)
    pdf.set_font('Arial', size=12)
    for para in data['paragraphs']:
        pdf.multi_cell(0, 10, txt=para)
    pdf.output(filename)

# Create the brochure
create_brochure(processed_data)

## Step 4: Integrate Database for Storing Scraped Data
We will now create a simple database using SQLite to store the scraped data.

In [ ]:
# Function to initialize the database
import sqlite3

def initialize_db(db_name='scraped_data.db'):
    conn = sqlite3.connect(db_name)
    cursor = conn.cursor()
    cursor.execute('''CREATE TABLE IF NOT EXISTS webpages (
                        id INTEGER PRIMARY KEY,
                        title TEXT,
                        content TEXT
                    )''')
    conn.commit()
    conn.close()

# Function to store scraped data
def store_data(title, paragraphs, db_name='scraped_data.db'):
    conn = sqlite3.connect(db_name)
    cursor = conn.cursor()
    content = ' '.join(paragraphs)
    cursor.execute('INSERT INTO webpages (title, content) VALUES (?, ?)', (title, content))
    conn.commit()
    conn.close()

# Initialize the database
initialize_db()

# Store scraped data into the database
store_data(title, paragraphs)
print('Data stored in the database successfully.')

## Final Tool
We can now combine everything into a tool that scrapes data, processes it, generates a brochure, and stores the data in a database.

In [ ]:
# Final function to generate brochure and store data
def generate_brochure_from_website(url, filename='brochure.pdf', db_name='scraped_data.db'):
    title, paragraphs = scrape_website(url)
    processed_data = process_data(title, paragraphs)
    create_brochure(processed_data, filename)
    store_data(title, paragraphs, db_name)
    print(f'Brochure created and data stored for: {url}')

# Example usage
generate_brochure_from_website('https://example.com')