# Wiki Game Solver
## Data Quality and Text Mining Capstone 2022
Chelsea Cantone, Philip Franco, Eric Retinger

12/7/22

In [2]:
import requests
from bs4 import BeautifulSoup
from nltk.tokenize import word_tokenize
import networkx as nx
import pandas as pd
import matplotlib.pyplot as plt
from similarityCheck import wordScore
from WikiWebCrawler import buildNetwork, getLinksFromTextBS, playWikiGame, recursiveSearch, createWikiGraph
import time

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def newGame(start_article, end_article, n):
    print('Starting Game {}: {} -> {}'.format(n+1, start_article, end_article))

    current_article = start_article
    
    models = ['L6', 'L12', 'microsoftNet', 'bert', 'roberta']
    
    result_steps = []
    result_pages_visited = []
    result_time = []
    result_win = []

    
    for model in models:
        steps, pages_visited, elapsed_time, win = playWikiGame(start_article, end_article, model)
        print(model + ":        It took " + str(steps) + " links to get from " + start_article + " to " + end_article +  " in " + elapsed_time)
        result_steps.append(steps)
        result_pages_visited.append(pages_visited)
        result_time.append(elapsed_time)
        result_win.append(win)
        
    if not any(result_win):
        print("All models failed to converge")
        raise Exception("All models failed to converge")
    
    # Find true distances
    tic = time.perf_counter()
    data = recursiveSearch(5, [start_article], {'source': [], 'target': [], 'weight': []}, end_article)
    toc = time.perf_counter()
    time_search = toc-tic
    search_win = time_search < 120
    print('Time to search was: {}'.format(time_search))

    # Create Network Graph
    df = pd.DataFrame(data)

    pages_visited_search = df.source.unique()
    steps_search = len(df.source.unique())
    # Returns steps to true answer
    steps_true = buildNetwork(df, start_article, end_article)
    
    models.append('BFS')
    result_steps.append(steps_search)
    result_time.append(time_search)
    result_pages_visited.append(pages_visited_search.tolist())
    result_win.append(search_win)

    # Create Dictionary
    results_dict = {'Model': models,
                   'Steps': result_steps,
                   'Time': result_time,
                   'Actual_Dist': [len(steps_true)-1] * len(models),
                   'Paths_Visited': result_pages_visited,
                   'Win': result_win}    
    
    return results_dict

In [4]:
def getPageViews(start_article):
    start_article_formatted = start_article.replace(" ","_")
    start_article_formatted
    links = "https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/en.wikipedia.org/all-access/user/{}/monthly/20221101/20221201".format(start_article_formatted)

    header={
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36'
    }

    response = requests.get(links, headers=header).json()
    page_views = response['items'][0]['views']
    return page_views

In [5]:
playGameNTimes = 100

keys = ['Model', 'Steps', 'Time', 'Actual_Dist', 'Win']

for i in range(0,playGameNTimes):
    print('\n\n===============================================================')
    final_results = {'Model': [],
                'Steps': [],
                'Time': [],
                'Actual_Dist': [],
                'Paths_Visited':[],
                'Win': []}
    
    # Generate two random pages
    views = 0
    
    while views < 1000:
        page = requests.get("https://en.wikipedia.org/api/rest_v1/page/random/summary").json()
        start_article = page['title']
        views = getPageViews(start_article)
    print('{} : {} views'.format(start_article, views))

    views = 0

    while views < 10000:
        page = requests.get("https://en.wikipedia.org/api/rest_v1/page/random/summary").json()
        end_article = page['title']
        views = getPageViews(end_article)
    print('{} : {} views'.format(end_article, views))
        
    try:
        # Run game
        final_results = newGame(start_article, end_article, i)

    #     final_results = {k: final_results[k] + results_dict[k] for k in keys}

        # Save dictionary
        df = pd.DataFrame(final_results)
        df.to_csv('C:\\Users\\chels\\PycharmProjects\\TextMining\\FinalProject\\WikiGameSolver\\game{}_{}_{}.csv' 
                  .format(i, start_article.replace(' ', '_'), end_article.replace(' ', '_')))
        print('DONE')
        
    except:
        print('FAILED!!!!')
        continue
        
    print('===============================================================\n\n')




Pwani Region : 1198 views
The Hindu : 19413 views
Starting Game 1: Pwani Region -> The Hindu
L6 took 23.408142899999987 seconds to complete
Game Win: True
L6:        It took 8 links to get from Pwani Region to The Hindu in 23.408142899999987
L12 took 37.627822899999984 seconds to complete
Game Win: True
L12:        It took 6 links to get from Pwani Region to The Hindu in 37.627822899999984
microsoftNet took 151.4568044 seconds to complete
Game Win: False
microsoftNet:        It took 8 links to get from Pwani Region to The Hindu in 151.4568044
bert took 136.90696989999998 seconds to complete
Game Win: False
bert:        It took 6 links to get from Pwani Region to The Hindu in 136.90696989999998
roberta took 33.05663079999999 seconds to complete
Game Win: True
roberta:        It took 3 links to get from Pwani Region to The Hindu in 33.05663079999999
Search Depth Remaining: 5
Search Depth Remaining: 4
Search Depth Remaining: 3
FOUND: The Hindu
Time to search was: 1299.7931737000001
DONE


KeyboardInterrupt

