In [2]:
import requests
from bs4 import BeautifulSoup
import wikipediaapi
from IPython.display import display, Image
from PIL import Image as PILImage
from io import BytesIO

# Set up Wikipedia API
wiki_lang = "en"
wiki = wikipediaapi.Wikipedia(wiki_lang)

def page_parser(response):
    # Parse the HTML content
    soup = BeautifulSoup(response.text, 'html.parser')
        
    # Extract the article title
    title = soup.find('h1', {'id': 'firstHeading'}).text
        
    # Extract the table of contents
    tableofcontents = soup.find('div', {'id': 'toc'})
        
    # Extract the article content
    content_div = soup.find('div', {'id': 'mw-content-text'})
    content_paragraphs = content_div.find_all('p')
    content = '\n'.join(p.text for p in content_paragraphs)
        
    # Extract the tables 
    tables = soup.find_all('table', {'class': 'wikitable'})
        
    # Extra the graphs
    graphs = soup.find_all('div', {'class': 'chart-container'})

    # Extract the last modified date
    last_modified = soup.find('li', {'id': 'footer-info-lastmod'}).text
        
    # Extract the authors (contributors) - this information is not directly available on the page,
    # but you can provide a link to the contributors' list.
    contributors = url.replace('/wiki/', '/w/index.php?title=') + '&action=history'
    
    # Find all the image tags
    image_tags = soup.find_all('img')
        
    return {
        'title': title,
        'content': content,
        'last_modified': last_modified,
        'contributors': contributors,
        'toc': tableofcontents,
        'tables': tables,
        'graphs': graphs,
        'image_tags': image_tags,
    }

def image_collector(article_data):
    counter = 0
    # Download and display the images
    for img_tag in article_data['image_tags']:
        if counter == 3:
            break
        img_url = 'https:' + img_tag['src']
        try:
            response = requests.get(img_url)
            img = PILImage.open(BytesIO(response.content))
            img.verify()  # Verify that the image is valid
            if counter < 3:
                display(Image(url=img_url))
                counter += 1
        except Exception as e:
            pass
        
def table_format(table):
    if table:
        rows = table.find_all('tr')
    
        # Initialize the column widths using the first row
        header_cells = rows[0].find_all(['th', 'td'])
        num_columns = len(header_cells)
        column_widths = [len(cell.get_text(strip=True)) for cell in header_cells]
            # Iterate over the rows to find the maximum width for each column
        for row in rows[1:]:
            cells = row.find_all(['th', 'td'])
            for i, cell in enumerate(cells):
                cell_value = cell.get_text(strip=True)
                if i < len(column_widths):
                    column_widths[i] = max(len(cell.get_text(strip=True)), column_widths[i])

        # Add some padding to each column width
        column_widths = [width + 1 for width in column_widths]

        # Create a format string based on the column widths
        format_string = ' '.join([f'{{:<{width}}}' for width in column_widths])

        # Iterate over the rows and display the formatted output
        for row in rows:
            cells = row.find_all(['th', 'td'])
            cell_values = [cell.get_text(strip=True) for cell in cells]

            # Extend the cell_values list to the required number of columns with empty strings
            cell_values.extend([''] * (num_columns - len(cell_values)))

            formatted_row = format_string.format(*cell_values)
            print(formatted_row)
    else:
        print("No table found")

def wikipedia_scraper(url):
    if wiki.page(url).exists():
        return page_parser(wiki.page(url))
    
    # Send a GET request to the URL
    response = requests.get(url)
    
    # Check if the request was successful
    if response.status_code == 200:
        return page_parser(response)
    else:
        print(f"Error: Request to {url} returned status code {response.status_code}")
        return None


# Example usage:
print("Enter a valid Wikipedia URL\n")
url = input()

# Calling the Wikipedia Scraper function
article_data = wikipedia_scraper(url)

# Check if a valid article is found
if article_data:
    
    # Calling the Image display function
    image_collector(article_data)
    
   
    print("Title:", article_data['title'])
    
    if article_data['toc']:
        print(article_data['toc'].get_text())
        
    print("Content:", article_data['content'])
    
    print("Last Modified:", article_data['last_modified'])
    
    print("Contributors:", article_data['contributors'])
    
    if article_data['tables']:
        print("Tables:")
        for idx, table in enumerate(article_data['tables']):
            print(f"Table {idx + 1}:")
            table_format(table)
                
    if article_data['graphs']:
        print("Graphs:")
        for idx, graph in enumerate(article_data['graphs']):
            print(f"Graph {idx + 1}:")
            print(graph.get_text())
    
    
else:
    print("Valid article not found. Please try a different URL.")


Enter a valid Wikipedia URL

https://en.wikipedia.org/wiki/Argentina_national_football_team


Title: Argentina national football team
Content: 

The Argentina national football team (Spanish: Selección de fútbol de Argentina) represents Argentina in men's international football and is administered by the Argentine Football Association, the governing body for football in Argentina.

Nicknamed La Albiceleste ('The White and Sky Blue'), they are the reigning world champions, having won the most recent World Cup in 2022, earning their third star shown by the team's crest. Overall, Argentina has appeared in a World Cup final six times, a record equaled by Italy and surpassed only by Brazil and Germany. Argentina played in the first ever final in 1930, which they lost 4–2 to Uruguay. The following final appearance came 48 years later, in 1978, when the team captained by Daniel Passarella defeated the Netherlands 3–1 in extra time, becoming world champions for the first time. Captained by Diego Maradona, Argentina won their second World Cup eight years later, in 1986, with a 3–2 final