In [2]:
import os
import csv
import sqlite3
from pathlib import Path
from urllib.request import urlopen, urlretrieve
from urllib.parse import urlparse, urljoin

from bs4 import BeautifulSoup

In [3]:
def get_absolute_url(base_url: str, source: str) -> str:
    is_absolute = source.startswith(("http://", "https://"))
    has_external_domain = is_absolute and base_url not in source

    if has_external_domain:
        return None

    if is_absolute:
        url = source.replace("www.", "")
    elif source.startswith("www."):
        url = urljoin("http://", source.replace("www.", ""))
    else:
        url = urljoin(base_url, source)

    parsed = urlparse(url)
    return f"{parsed.scheme}://{parsed.netloc}{parsed.path}"


def get_download_path(base_url: str, absolute_url: str, download_dir: str):
    if not base_url.endswith("/"):
        base_url += "/"

    relative_path = absolute_url.replace(base_url, "")
    download_path = Path(download_dir) / relative_path

    if not os.path.exists(download_path.parent):
        os.makedirs(download_path.parent)

    return download_path

In [None]:
download_dir = "downloaded"
base_url = "http://pythonscraping.com"
url = "http://www.pythonscraping.com"

html = urlopen(url)
bs = BeautifulSoup(html, "html.parser")
download_list = bs.find_all(src=True)

for download in download_list:
    if file_url := get_absolute_url(base_url, download["src"]):
        print(file_url)
        download_path = get_download_path(base_url, file_url, download_dir)
        urlretrieve(file_url, download_path)

In [3]:
with open("test.csv", "w+") as csv_file:
    writer = csv.writer(csv_file)
    writer.writerow(("number", "number plus 2", "number times 2"))
    for i in range(10):
        writer.writerow((i, i + 2, i * 2))

In [39]:
def execute_sql_from_file(sql_file: str) -> None:
    try:
        conn = sqlite3.connect("../sqlite/db/chapter6.db")
        cursor = conn.cursor()

        with open(f"../sqlite/sql/chapter5/{sql_file}", "r") as sqlFile:
            command = sqlFile.read()

        cursor.execute(command)
        conn.commit()
    finally:
        cursor.close()
        conn.close()


def describe_table(table_name: str):
    try:
        conn = sqlite3.connect("../sqlite/db/chapter6.db")
        cursor = conn.cursor()

        cursor.execute(f"PRAGMA table_info({table_name})")
        columns_info = cursor.fetchall()

        for column_info in columns_info:
            print(column_info)
    finally:
        cursor.close()
        conn.close()


def print_table_data(table_name: str) -> None:
    try:
        conn = sqlite3.connect("../sqlite/db/chapter6.db")
        cursor = conn.cursor()

        cursor.execute(f"SELECT * FROM {table_name}")
        table_data = cursor.fetchall()

        for row in table_data:
            print(row)
    finally:
        cursor.close()
        conn.close()

In [43]:
execute_sql_from_file("create_pages_table.sql")
describe_table("pages")

(0, 'id', 'INTEGER', 0, None, 1)
(1, 'title', 'TEXT', 0, None, 0)
(2, 'content', 'TEXT', 0, None, 0)
(3, 'created', 'TIMESTAMP', 0, 'CURRENT_TIMESTAMP', 0)


In [44]:
execute_sql_from_file("insert_3items.sql")
print_table_data("pages")

(1, 'test title 1', 'test content 1', '2024-02-03 07:22:33')
(2, 'test title 2', 'test content 2', '2024-02-03 07:22:33')
(3, 'test title 3', 'test content 3', '2024-02-03 07:22:33')


In [45]:
execute_sql_from_file("update_item3.sql")
print_table_data("pages")

(1, 'test title 1', 'test content 1', '2024-02-03 07:22:33')
(2, 'test title 2', 'test content 2', '2024-02-03 07:22:33')
(3, 'new title 3', 'new content 3', '2024-02-03 07:22:33')


In [46]:
execute_sql_from_file("delete_item3.sql")
print_table_data("pages")

(1, 'test title 1', 'test content 1', '2024-02-03 07:22:33')
(2, 'test title 2', 'test content 2', '2024-02-03 07:22:33')
