In [170]:
from bs4 import BeautifulSoup
from urllib.request import urlopen
from urllib.parse import urlparse, urljoin
from string import punctuation
import requests

def is_valid(url):
    """Checks whether `url` is a valid URL.
    """
    parsed = urlparse(url)
    return bool(parsed.netloc) and bool(parsed.scheme)

def get_all_website_links(url):
    """Returns all URLs that is found on the webpage.
    """    

    #Obtain all the links within the webpage
    #soup = BeautifulSoup(html_page, "lxml")
    urls = set()
        # domain name of the URL without the protocol
    domain_name = urlparse(url).netloc
    soup = BeautifulSoup(requests.get(url).content, "html.parser")

    internal_urls = set()
    external_urls = set()

    for link in soup.findAll('a'):
        link_extract = link.get('href')
        

        if link_extract == "" or link_extract is None:
            # href empty tag
            continue

        #join the URL if it's relative (not absolute link)
        link_extract = urljoin(url, link_extract)
        parsed_href = urlparse(link_extract)

        # remove URL GET parameters, URL fragments, etc.
        link_extract = parsed_href.scheme + "://" + parsed_href.netloc + parsed_href.path

        if not is_valid(link_extract):
            # not a valid URL
            continue
        if link_extract in internal_urls:
            continue
        if domain_name not in link_extract:
                # external link
                if link_extract not in external_urls:
                    external_urls.add(link_extract)
                    urls.add(link_extract)
                continue
        urls.add(link_extract)
        internal_urls.add(link_extract)

    return(urls)



def search_date(url):
    """ Search for date of French Revolution in webpage
    """
    html_page = urlopen(url)
    soup = BeautifulSoup(html_page, "lxml")
    date = soup.find(text = "Date").findNext('td')
    #print("\nDate: " + date.text)
    return(date.text)


def extract_para(url):
    """Extract the largest paragraph in the article
    """

    large = 0
    
    #obtain individual paragraphs in the article
    for s in soup.findAll('p'):
        words = []

        sentence = s.findAll(text=True)
        
        for phrase in sentence:
            part = [x.rstrip(punctuation) for x in phrase.split()]
            words = words + part
            
        if large < len(words):
            large = len(words)
            lg_par = s.get_text()

    return(paragraph)
    
url = "https://en.wikipedia.org/wiki/French_Revolution"    
print(get_all_website_links(url))
print(search_date(url))
print(extract_para(url))

{'https://en.wikipedia.org/wiki/Women_in_the_French_Revolution', 'https://en.wikipedia.org/wiki/House_of_Valois', 'https://en.wikipedia.org/wiki/Carolingian_dynasty', 'https://en.wikipedia.org/wiki/Strike_action', 'https://en.wikipedia.org/wiki/Vent%C3%B4se_Decrees', 'https://en.wikipedia.org/wiki/Jean_Sylvain_Bailly', 'https://en.wikipedia.org/wiki/Louis_XIV_of_France', 'https://te.wikipedia.org/wiki/%E0%B0%AB%E0%B1%8D%E0%B0%B0%E0%B1%86%E0%B0%82%E0%B0%9A%E0%B1%8D_%E0%B0%B5%E0%B0%BF%E0%B0%AA%E0%B1%8D%E0%B0%B2%E0%B0%B5%E0%B0%82', 'https://en.wikipedia.org/wiki/Eug%C3%A8ne_Delacroix', 'https://en.wikipedia.org/wiki/Imperial,_royal_and_noble_ranks', 'https://en.wikipedia.org/wiki/History_of_Europe', 'https://en.wikipedia.org/wiki/Special:RecentChangesLinked/French_Revolution', 'https://en.wikipedia.org/wiki/Battle_of_Fishguard', 'https://en.wikipedia.org/wiki/Frederick_William_II_of_Prussia', 'https://en.wikipedia.org/wiki/Thermidorian_Reaction', 'https://en.wikipedia.org/wiki/Democratic_

5 May 1789 – 9 November 1799 (1789-05-05 – 1799-11-09)(10 years, 6 months and 4 days)
Over the course of the 18th century, there emerged what the philosopher Jürgen Habermas called the idea of the "public sphere" in France and elsewhere in Europe.[21]:26 Habermas argued that the dominant cultural model in 17th-century France was a "representational" culture, which was based on a one-sided need to "represent" power with one side active and the other passive.[21]:26 A perfect example would be the Palace of Versailles, which was meant to overwhelm the senses of the visitor and convince one of the greatness of the French state and Louis XIV.[21]:26 Starting in the early 18th century the "public sphere" emerged which was "critical" in that both sides were active.[21]:26–7 Examples of the public sphere included newspapers, journals, masonic lodges, coffee houses and reading clubs where people either in person or virtually via the printed word debated and discussed issues.[21]:27 In France, t