In [64]:
import requests
from bs4 import BeautifulSoup
import urllib.parse
import copy
import itertools

config_de = {"base": "https://de.wikipedia.org",
             "start_page": "https://de.wikipedia.org/wiki/Zeitzone",
             "goal_page": "https://de.wikipedia.org/wiki/Februarumsturz",
             "bad": ["/w/", "/Wikipedia", "/wiki/Hilfe", "/wiki/Spezial", "/wiki/Datei", "/wiki/Kategorie", 
                     "/wiki/Portal", "/wiki/Wikipedia:", "/wiki/Diskussion:"]
}

config = config_de
base = config["base"]
bad = config["bad"]


def format_page(page):
    '''formats the page so it can be scraped. uses requests and BeautifulSoup libraries'''
    page_ready = BeautifulSoup(requests.get(page).text, "html.parser").body
    return page_ready


def prepare_bad_urls(base, bad):
    '''returns list of wikipedia subsites which shouldn't be used for the game'''
    full_bad_urls = []
    
    for bad_url in bad:
        full_bad_url = urllib.parse.urljoin(base, bad_url)
        full_bad_urls.append(full_bad_url)
    
    return full_bad_urls


def return_wiki_links(page):
    '''returns wiki links contained in a page. needs format_page and prepare_bad_urls functions.'''
    full_bad_urls = prepare_bad_urls(base, bad)
    wiki_links_in_page = []
    page_text = format_page(page)
    links_in_page = page_text.find_all('a', href=True)
    
    for link in links_in_page:
        full_url = urllib.parse.urljoin(base, link['href'])
        
        if "#" in full_url:
            full_url = full_url[:full_url.find("#")]
        
        if full_url == base:
            continue
        
        if not full_url.startswith(base):
            continue 
            
        is_bad_url = False
        for fbu in full_bad_urls:
            if full_url.startswith(fbu):
                is_bad_url = True
                break
        if is_bad_url == True:
            continue
        
        wiki_links_in_page.append(full_url)
        
    return list(set(wiki_links_in_page))


In [65]:
def wikipedia_game(start_page, goal_page):
    if goal_page == start_page:
        print("DONE: no need for searching a connection, the two pages are identical.")
        return print("You are already on {}".format(goal_page))

    round_count = 1
    already_tried = [start_page]
    links_to_test = return_wiki_links(start_page)
    print("LET'S GO: Starting to search the shortest connection between {} and {}".format(start_page, goal_page))

    while True:
        
        if round_count > 1:
            links_to_test = copy.deepcopy(new_links)
        else:
            new_links = []
        
        if goal_page in links_to_test:
            return print("DONE: The goal page {} can be reached with one click: {} is a link in {}.".format(goal_page, goal_page, goal_page))
        else:
            print("Round {} completed without results. Starting round {}.".format(round_count, (round_count + 1)))
            round_count += 1

        for link in links_to_test: #creates list with all links relevant for the round.
            new_link_bunch = return_wiki_links(link)
            new_links_bunch = [x for x in new_link_bunch if x not in already_tried]
            
            if goal_page in new_links_bunch:
                return print("DONE: The goal page {} can be reached with {} clicks: {} connects {} with {}.".format(goal_page, str(round_count), link, start_page, goal_page))
            
            new_links.append(new_links_bunch)
            already_tried.append(new_links_bunch)

        new_links = list(set(list(itertools.chain.from_iterable(new_links))))

In [66]:
# now play the game:
wikipedia_game(config["start_page"] config["goal_page"])

LET'S GO: Starting to search the shortest connection between https://de.wikipedia.org/wiki/Zeitzone and https://de.wikipedia.org/wiki/Februarumsturz
Round 1 completed without results. Starting round 2.
DONE: The goal page https://de.wikipedia.org/wiki/Februarumsturz can be reached with 2 clicks: https://de.wikipedia.org/wiki/Tschechien connects https://de.wikipedia.org/wiki/Zeitzone with https://de.wikipedia.org/wiki/Februarumsturz.
