**Imports**

In [1]:
import bs4
import requests
import time

**Get the first normal link in the body of a wikipedia article**


In [2]:
def get_first_link(link):
  """
    This function returns the first link in the wikipedia article
    while considering the given rules.

    args:
      link: type = 'string'  
            contain the the wikipedia link to use
    
    return: type = 'string'
            contain the next link to use
  """

  # Request the link
  response = requests.get(link)
  # Get the link html
  html = response.text
  # Parse it
  soup = bs4.BeautifulSoup(html, "html.parser")
  # Select the content div
  content_text_div = soup.find(id='mw-content-text').find(class_="mw-parser-output")
  # Select the paragraphs
  paragraphs = content_text_div.find_all('p')

  # Get the paragraphs 
  for p in paragraphs:
    if p.find("a"):
        # Get the links from the 'a' tags 
        next_link = p.find("a").get('href')
        if not next_link:
            return None
        # Make sure it is a normal link and not pointing to the current page
        if 'wiki' in next_link.split("/") and next_link != link.split("https://en.wikipedia.org/")[1]:
          return 'https://en.wikipedia.org'+next_link
  
  # Failed to find normal link  
  return None


**Examine the link and decide whether it is the goal or not**

In [3]:
def print_visited_links(visited_links):
  print('******************** Visited links ********************')
  for counter, link in enumerate(visited_links):
    print(str(counter + 1) + '- ' + link)

def examine_link(current_link, goal_link, visited_links):
  """
    Test if the search should terminate. And if it will terminate then print 
    the reason of termination and visited links. 
  
    args:
      current_link: type = 'string'  
            contain the current wikipedia link in use
      goal_link: type = 'string'  
            contain the goal wikipedia link that we hope to reach
      visited_links: type = 'list of strings'  
            contain all the visited wikipedia links

    return: type = 'bool'
            False to terminate the search, True to keep it going.
  """ 
  
  # If link is already visited, then terminate 
  if current_link in visited_links:
    print('Loop has been detected')
    visited_links.append(current_link)
    print_visited_links(visited_links)
    return False

  # If we couldn't find a link to go to in the current article, then terminate 
  if not current_link:
    print('Dead end, could not reach the goal')
    print_visited_links(visited_links)
    return False

  # If goal link has been reached, then terminate 
  if current_link == goal_link:
    print('The target link has been reached')
    visited_links.append(current_link)
    print_visited_links(visited_links)
    return False

  # Else, keep search going 
  visited_links.append(current_link)
  return True

**Main function**

In [4]:
def main(current_link, goal_link):
  # Intialize an empty list to hold the visited links
  visited_links = []
  # Search
  while(examine_link(current_link, goal_link, visited_links)):
    # update the current_link
    current_link =  get_first_link(current_link)
    # sleep for .5 to avoid blocking 
    time.sleep(.5)


In [13]:
main('https://en.wikipedia.org/wiki/Special:Random', 'https://en.wikipedia.org/wiki/Philosophy')


The target link has been reached
******************** Visited links ********************
1- https://en.wikipedia.org/wiki/Special:Random
2- https://en.wikipedia.org/wiki/Australian_rules_football
3- https://en.wikipedia.org/wiki/Kick_(football)
4- https://en.wikipedia.org/wiki/Football
5- https://en.wikipedia.org/wiki/Team_sport
6- https://en.wikipedia.org/wiki/Sport
7- https://en.wikipedia.org/wiki/Competition
8- https://en.wikipedia.org/wiki/Goal
9- https://en.wikipedia.org/wiki/Idea
10- https://en.wikipedia.org/wiki/Philosophy
