# Explore the Spacer API

In [1]:
import pandas as pd

from spacer import Posts, Threads, config

## Configuration

*config* exposes configuration options for the http requests that can be set by the user.

The values below are the default values:

In [2]:
config.max_retries = 0  # how often the program attempts to retry after failure
config.retry_backoff_factor = 0.1  # determines delay between retries: https://urllib3.readthedocs.io/en/stable/reference/urllib3.util.html
config.retry_http_codes = [429, 500, 503]  # error codes after which to retry

## Extract threads from a forum

In [3]:
forum = 'space-travel-spacex.47'

In [4]:
# Retrieve and extract only the first page
threads = Threads().get(forum).extract()

In [5]:
threads[0]

'nasas-perseverance-is-exploring-mars-come-watch-updates-with-us.37226'

In [6]:
# Retrieve and extract all pages using loop
pages = Threads().paginate(forum)
all_threads = []
for page in pages:
    all_threads.extend(page.extract())

In [7]:
len(all_threads)

2663

## Extract posts from a thread

In [8]:
thread = threads[0]

In [9]:
# Retrieve and extract the first page
posts = Posts().get(thread).extract()

In [10]:
# Use pandas to turn into dataframe
df = pd.DataFrame(posts)
print(df.columns)

Index(['user', 'time', 'post'], dtype='object')


In [11]:
# Retrieve and extract all pages using loop
pages = Posts().paginate(thread)
posts = []
for page in pages:
    posts.extend(page.extract())

In [12]:
data = pd.DataFrame(posts)

In [13]:
len(data)

133