# Generate Your GRE Writing Pool file

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1tSlVHUwtCfOktzJI--3hWnWs2JML7qXi?usp=sharing)

Link to the Github repository: [https://github.com/ktxlh/gre-writing-pool-scraper](https://github.com/ktxlh/gre-writing-pool-scraper)

Run these cells to create your own GRE writing pool with indexes directly from the official website.

Your task: Run all 3 cells by clicking the "run" buttons to the left of each one.


## 1. Settings

In [0]:
# Name your output files
argument_pool_file_name = "Argument pool.txt" #@param {type:"string"}
issue_pool_file_name = "Issue pool.txt" #@param {type:"string"}

# Where to access the pool
argument_url = "https://www.ets.org/gre/revised_general/prepare/analytical_writing/argument/pool" #@param {type:"string"} 
issue_url = "https://www.ets.org/gre/revised_general/prepare/analytical_writing/issue/pool" #@param {type:"string"} 

# Prompt indexes start from 1
start_index = 1 #@param {type:"integer"}

# Separation symbol(s) between each pair of prompts
# Default: Repeat "-" 122 times
separation_symbol = "-" #@param {type:"string"}
number_of_seperation_symbols = 122 #@param {type:"integer"}
separation = separation_symbol * number_of_seperation_symbols

# Starting words of last paragraph of each prompt
promp_start_word = "Write a response" #@param {type:"string"}

## 2. Generate the Pools

### Define modules

In [0]:
from requests import get
from requests.exceptions import RequestException
from contextlib import closing
from bs4 import BeautifulSoup, element
import datetime

def is_good_response(resp):
    """
    Returns True if the response seems to be HTML, False otherwise.
    Ref: https://realpython.com/python-web-scraping-practical-introduction/
    """
    content_type = resp.headers['Content-Type'].lower()
    return (resp.status_code == 200 
            and content_type is not None 
            and content_type.find('html') > -1)
  

def simple_get(url):
    """
    Attempts to get the content at `url` by making an HTTP GET request.
    If the content-type of response is some kind of HTML/XML, return the
    text content, otherwise return None.
    Ref: https://realpython.com/python-web-scraping-practical-introduction/
    """
    try:
        with closing(get(url, stream=True)) as resp:
            if is_good_response(resp):
                return resp.content
            else:
                return None

    except RequestException as e:
        print('Error during requests to {0} : {1}'.format(url, str(e)))
        return None


def generate_pool(file_name, url):
  raw_html = simple_get(url)
  bs_html = BeautifulSoup(raw_html, 'html.parser')
  ps = bs_html.select('div.contents.left')[0].select('p')

  intro = ps[:2]
  contents = ps[2:]
  
  i = start_index
  lines = [p.text for p in list(contents)]
  new_lines = []
  new_lines.append(f"# {i} "+separation)

  for line in lines:
    new_lines.append("\n" + line)
    if line.startswith(promp_start_word):
      i += 1
      new_lines.append(f"\n# {i} "+separation)

  with open(file_name, "w") as fout:
    fout.write(file_name.split('.')[0] + "\n")
    fout.write(f"{i-1} topics in total. Accessed on {datetime.date.today()}.\n\n")
    fout.write('\n'.join(new_lines[:-1]) + "\n")


### Generate files

In [0]:
generate_pool(argument_pool_file_name, argument_url)
generate_pool(issue_pool_file_name, issue_url)

## 3. Obtain your pools
There is a "folder" icon to the left of the window. Click it and hover your cursor to the two files with the name `Argument pool.txt` and `Issue pool.txt` or whatever you defined, click the 3 dots appeared and `Download` the file.