# Wikipedia Library

In [None]:
# import the library
import wikipedia as wp
import wikipediaapi

In [None]:
# create a page object for the NG wikipedia page
ng = wp.page('Northrop Grumman')

In [None]:
# get the url of the page
url = ng.url
url

In [None]:
# get a summary of the page
summary = wp.summary(ng)
summary

In [None]:
# abbreviated summary
summary = wp.summary(ng, sentences=2)
summary

In [None]:
# the links function gets all the links on the page
# these are other wikipedia articles
# display the first 10
ng.links[0:10]

In [None]:
# get the image urls from the page
ng.images

In [None]:
# get the references from the reference section
ng.references

In [None]:
# helper function to display svg images
from IPython.display import SVG, display
def show_svg(url):
    display(SVG(url))

In [None]:
# displaying one image
show_svg(ng.images[13])

In [None]:
# get all the svg images
images = [x for x in ng.images if x.endswith('svg')]

# Wikipedia API Library

In [None]:
import wikipediaapi
wiki_wiki = wikipediaapi.Wikipedia('en')
ng_new = wiki_wiki.page('Northrop Grumman')

In [None]:
ng_new.summary

In [None]:
ng_new.title

In [None]:
# code will not compile
# cannot get references directly with this library
#ng_new.references

In [None]:
# test code to attempt to get external links
import wikipediaapi
# Create a Wikipedia API object
wiki = wikipediaapi.Wikipedia('en')
# Get the page for the Python programming language
page = wiki.page('Python (programming language)')
# Get the external links in the article
external_links = []
for link in page.links:
    if link.startswith('http'):
        external_links.append(link)
# Print the external links
print(external_links)

# Testing Wikipedia Search Implementation

In [None]:
# Prompt user to enter a search term
list_of_possible_topics = wp.search(input("Enter a search term:\n"))
print("\n")

# Declare an integer to hold the number of results in disambiguation, and a string for URLs
size = 0
page_url = ''

# Iterate through the list of disambiguation results, and increment size by 1 for each
for topic in list_of_possible_topics:
    size = size + 1
    print(topic)
    
if size == 1:
    # If there is only one topic found (no disamb.), then the URL is simply that topic's Wiki
    page_url = wp.page(list_of_possible_topics[0]).url
else:
    # Otherwise, prompt user to select from the list of topics in disambiguation
    # TODO: Add error handling
    chosen_topic = input("\nWhich of the above topics?\n")
    
    if (list_of_possible_topics.count(chosen_topic) > 0):
        # Once the user selects a topic, pick the associated index in list of disambiguation results
        selection = list_of_possible_topics.index(chosen_topic)
        
        # page_url is then set to the chosen topic's URL
        page_url = wp.page(list_of_possible_topics[selection]).url

if (page_url == ''):
    print("\nNo matching link found.\n")
else:
    print("\nHere is the link!\n" + page_url)

# Here is where the magic would happen, using tf-idf to identify which topics are the most closely 
# associated with the queried topic. So if a person searched for "hello" and selected the 
# corresponding topic on Wikipedia, they would be presented with the following data (along with 
# some additional functionality, as the team sees fit):

# Topic       |          Wikipedia Link          |      Degree of relatedness
# "Hello"     |    [Wiki link for "Hello"]       |              100%
# "Hi"        |    [Wiki link for "Hi"]          |              89%
# "Greetings" |    [Wiki link for "Greetings"]   |              82%

