# Explore the Spacer API

In [1]:
import pandas as pd

from spacer import Posts, Threads, config

## Configuration

*config* exposes configuration options for the http requests that can be set by the user.

The values below are the default values:

In [2]:
config.max_retries = 0  # how often the program attempts to retry after failure
config.retry_backoff_factor = 0.1  # determines delay between retries: https://urllib3.readthedocs.io/en/stable/reference/urllib3.util.html
config.retry_http_codes = [429, 500, 503]  # error codes after which to retry

## Extract threads from a forum

In [3]:
forum = 'space-travel-spacex.47'

In [4]:
# Retrieve and extract only the first page
threads = Threads().get(forum).extract()

In [5]:
threads[0]

'nasas-perseverance-is-exploring-mars-come-watch-updates-with-us.37226'

In [6]:
# Retrieve and extract all pages using loop
pages = Threads().paginate(forum)
all_threads = []
for page in pages:
    all_threads.extend(page.extract())

In [7]:
len(all_threads)

2664

## Extract posts (and users) from a thread

In [8]:
thread = threads[0]

In [9]:
# Retrieve and extract the first page
posts, users = Posts().get(thread).extract()

In [10]:
# Use pandas to turn into dataframe
df_posts = pd.DataFrame(posts)
print(df_posts.columns)

Index(['id', 'user_id', 'username', 'thread', 'message', 'likes',
       'time_posted'],
      dtype='object')


In [11]:
# Same for users
df_users = pd.DataFrame(users)
print(df_users.columns)

Index(['id', 'username', 'role', 'join_date', 'messages', 'reaction_score',
       'points'],
      dtype='object')


In [12]:
# Retrieve and extract all pages using loop
pages = Posts().paginate(thread)
posts = []
for page in pages:
    posts.extend(page.extract())

In [13]:
data = pd.DataFrame(posts)

In [14]:
len(data)

12

## Interact with the database

By default, data is stored in a sqlite database called spacer.db

We can interact with that database to retrieve data we have already downloaded

In [15]:
import pandas as pd

from spacer import models as m

In [16]:
# Get all posts in thread
thread = 'nasas-perseverance-is-exploring-mars-come-watch-updates-with-us.37226'

p = m.get_posts_by_thread(thread)
df_p = pd.DataFrame(p.dicts())

In [17]:
# Get entire user table
u = m.get_all_users()
df_u = pd.DataFrame(u.dicts())

In [18]:
# Get first user's posts
df_u1_posts = pd.DataFrame(u[0].posts.dicts())

In [19]:
# Get user by name
u2 = m.get_user_by_name('C0MMANDER C0DY')
df_u2 = pd.DataFrame(u2.dicts())