In [185]:
from requests import get
from requests.exceptions import RequestException
from contextlib import closing
from bs4 import BeautifulSoup
import json
from random import randint as rand, shuffle, sample
import pandas as pd

In [116]:
def simple_get(url):
    try:
        with closing(get(url, stream=True)) as resp:
            if is_good_response(resp):
                return resp.content
            else:
                return None

    except RequestException as e:
        log_error('Error during requests to {0} : {1}'.format(url, str(e)))
        return None


def is_good_response(resp):
    content_type = resp.headers['Content-Type'].lower()
    return (resp.status_code == 200 
            and content_type is not None 
            and content_type.find('html') > -1)


def log_error(e):
    print(e)

In [138]:
def getContents(url):
    raw_html = simple_get(url)
    html = BeautifulSoup(raw_html, 'html.parser')
    words_html = html.findAll('div', {'class': 'col_a'})
    meanings_html = html.findAll('div', {'class': 'col_b'})
    words = ([word.find('div', {'class': 'text'}).encode_contents().decode('utf-8') for word in words_html])
    meanings = ([word.find('div', {'class': 'text'}).encode_contents().decode('utf-8') for word in meanings_html])
    return {'words': words, 'meanings': meanings}

In [179]:
def getAllWords(): 
    base_url = 'https://www.memrise.com/course/121215/barrons-800-essential-word-list-gre/'
    results = []
    for i in range(1, 81):
        url = base_url + str(i) + '/'
        result = getContents(url)
        results.append(result)
    return results

results = getAllWords()

In [183]:
def shuffleOptions(options, meanings):
    shuffle(options)
    return [meanings[i] for i in options]

def processResult(result):
    words = result['words']
    meanings = result['meanings']
    collect = [['Text of the question (required)', 'Text for option 1 (required)', 'Text for option 2 (required)', 'Text for option 3 (optional)', 'Text for option 4 (optional)', 'Text for option 5 (optional)', 'Integer (1-5 for the correct option)', 'Time in seconds (optional, default value is 30)', 'Link of the image (optional)']]
    
    for i in range(len(words)):
        choices = list(range(0, i)) + list(range(i+1, len(meanings)-1)) # Choose any among remaining options
        [o1, o2, o3] = sample(choices, 3)
        options = shuffleOptions([o1, o2, o3, i], meanings)
        correct = options.index(meanings[i]) + 1
        out = [words[i], options[0], options[1], options[2], options[3], '', correct, 15, '']
        collect.append(out)
    return collect
        
def processResults(results):
    headers = ['Question', 'Option 1', 'Option 2', 'Option 3', 'Option 4', 'Option 5', 'Correct Answer', 'Time', 'Image Link']
    for i in range(len(results)):
        result = processResult(results[i])
        df = pd.DataFrame(result, columns=headers)
        df.to_excel(f'./xl/{i}.xlsx', sheet_name='sheet1', index=False)

processResults(results);

In [186]:
sample([1,3,4,5], 2)

[1, 5]