In [8]:
import requests
from bs4 import BeautifulSoup

def get_html_content(url):
    page = requests.get(url)
    soup = BeautifulSoup(page.content, 'html.parser')
    return soup


def get_article_title(soup):
    title = soup.find('h1', {'id': 'firstHeading'}).text
    return title



def get_article_content(soup):
    article_content = {}
    sections = soup.find_all('div', {'class': 'mw-parser-output'})
    for section in sections:
        subheadings = section.find_all(['h2', 'h3'])
        for subheading in subheadings:
            subheading_text = subheading.text
            article_content[subheading_text] = []
            for sibling in subheading.find_next_siblings():
                if sibling.name in ['h2', 'h3']:
                    break
                if sibling.name == 'p':
                    article_content[subheading_text].append(sibling.text)
    return article_content



def get_internal_links(soup):
    internal_links = []
    for link in soup.find_all('a'):
        href = link.get('href')
        if href is not None and href.startswith('/wiki/') and ':' not in href:
            internal_links.append(href)
    return internal_links


def scrape_wikipedia_page(url):
    soup = get_html_content(url)
    title = get_article_title(soup)
    content = get_article_content(soup)
    internal_links = get_internal_links(soup)
    return {'title': title, 'content': content, 'internal_links': internal_links}

wikipedia_url = "https://en.wikipedia.org/wiki/Web_scraping"
scraped_data = scrape_wikipedia_page(wikipedia_url)
print(scraped_data)

{'title': 'Web scraping', 'content': {'History[edit]': ['The history of web scraping dates back nearly to the time when the World Wide Web was born.\n'], 'Techniques[edit]': ['Web scraping is the process of automatically mining data or collecting information from the World Wide Web. It is a field with active developments sharing a common goal with the semantic web vision, an ambitious initiative that still requires breakthroughs in text processing, semantic understanding, artificial intelligence and human-computer interactions.\n'], 'Human copy-and-paste[edit]': ["The simplest form of web scraping is manually copying and pasting data from a web page into a text file or spreadsheet. Sometimes even the best web-scraping technology cannot replace a human's manual examination and copy-and-paste, and sometimes this may be the only workable solution when the websites for scraping explicitly set up barriers to prevent machine automation.\n"], 'Text pattern matching[edit]': ['A simple yet powe