In [1]:
import os
from reddit import MyRedditAPI
from processing import EntityRecognition, clean_text

##### Step 1: Create an instance of the MyRedditAPI Class
- Enter your own client_id, secret_key, username, and password
    - To create the Client ID and Secret Key: https://www.reddit.com/prefs/apps
- For source code -> src.reddit.my_reddit_api

In [2]:
session = MyRedditAPI(
    client_id=os.getenv("CLIENT_ID"),  # API Client
    secret_key=os.getenv("SECRET"),    # API Secret
    username=os.getenv("USERNAME"),    # Reddit Username
    password=os.getenv("PASSWORD"),    # Reddit Account Password
    user_agent='MyApiTest'             # Can be anything
)

##### Step 2: Send a request for data
- Note that it is only possible to retrieve the number of posts that are available in reddit's "main search". For example, if you search anything on reddit, the maximum number of posts that will be returned is ~250. So, even if the limit is set higher than that, it will not return more than what is available to be searched in the application.
- Defult behavior is to search all subreddits, here are some others to try:
    - subreddit="ukraine"
    - subreddit="RussiaUkraineWar2022"
    - subreddit="UkraineConflict"

In [23]:
wagner_df = session.search_posts(
    query='Wagner Group',
    sort='new',
    subreddit='all',
    limit=300
)

##### Step 3: Text Cleaning

In [24]:
wagner_df = clean_text(wagner_df, columns=['title', 'selftext'])

##### Step 4: Named Entity Recognition to extract locations, organizations, and people from the title (or selftext if desired)
- Note: may need to download the en_core_web_sm spacy model: python -m spacy download en_core_web_sm

In [25]:
wagner_ner = EntityRecognition(wagner_df, 'title')
wagner_df['locations'] = wagner_ner.get_gpe()
wagner_df['organizations'] = wagner_ner.get_org()
wagner_df['people'] = wagner_ner.get_person()

In [None]:
wagner_df