In [None]:
import pandas as pd
import dotenv
import os
import syft as sy

In [None]:
# we recommend that you source these values using environment variables
dotenv.load_dotenv()
URL = "https://reddit-for-researchers.snooguts.net"
EMAIL = os.getenv("EMAIL")
PASSWORD = os.getenv("PASSWORD")

# you can provide a "password" keyword argument, but if you don't...
# the browser will prompt you for input
client = sy.login(
    url=URL,
    email=EMAIL,
    password=PASSWORD
)

In [None]:
# posts
queries_dict = {}

for year in range(2020, 2024):
    sql_query = f"""
    SELECT *
    FROM posts 
    WHERE (body LIKE '%wikipedia%' OR title LIKE '%wikipedia%')
    AND created_at >= '{year}-01-01'
    AND created_at < '{year+1}-01-01'
    """
    key = f"pg_wiki_{year}"
    queries_dict[key] = sql_query

# submit the queries for approval
for key, sql_query in queries_dict.items():
    print(key)
    client.api.services.reddit.submit_query(
        func_name=key,
        sql_query=sql_query
    )

In [None]:
# comments
queries_dict = {}
for year in range(2020, 2024):
    for month in range(1, 13):
        start_date = f"{year}-{month:02d}-01"
        if month == 12:
            end_date = f"{year + 1}-01-01"
        else:
            end_date = f"{year}-{month + 1:02d}-01"
        
        sql_query = f"""
        SELECT *
        FROM comments
        WHERE body LIKE '%wikipedia%'
        AND created_at >= '{start_date}'
        AND created_at < '{end_date}'
        """
        key = f"pg_wiki_comments_{year}{month:02d}"
        queries_dict[key] = sql_query

# submit the queries for approval
for key, sql_query in queries_dict.items():
    print(key)
    client.api.services.reddit.submit_query(
        func_name=key,
        sql_query=sql_query
    )