# Subreddits to explore

In [29]:
subreddit = 'r/explainlikeimfive'
subreddits = ['r/explainlikeimfive', 'r/askscience', 'r/writingprompts', 'r/changemyview','r/outoftheloop','r/jokes']

# Get auth headers from Reddit

In [30]:
import requests
from config import CLIENT_ID, SECRET_KEY
from config import OPENAI_API
from config import reddit_auth_data as data

auth = requests.auth.HTTPBasicAuth(CLIENT_ID, SECRET_KEY)

headers = {
    'User-Agent': 'RedditAutomod/0.0.1'
}

res = requests.post('https://www.reddit.com/api/v1/access_token',
                    auth=auth,
                    data=data,
                    headers=headers)
TOKEN = res.json()['access_token']

headers['Authorization'] = f'bearer {TOKEN}'

# Generate File Structure for Project

In [31]:
import os
VALID = 'valid'
INVALID = 'invalid'
CWD = os.getcwd()

In [9]:
for sr in subreddits:
    if not os.path.exists(os.path.join(os.getcwd(),sr)):
        os.makedirs(os.path.join(CWD,sr))
        os.makedirs(os.path.join(CWD,sr,VALID))
        os.makedirs(os.path.join(CWD,sr,INVALID))

    with open(os.path.join(CWD,sr,'rules.txt'),'+w') as file:
        resp = requests.get(f'https://oauth.reddit.com/{sr}/about/rules', headers=headers).json()
        # print(resp)
        rulesPrompt = ""

        for ix,el in enumerate(resp['rules']):
            
            ruleDesc = (el['description'])
            singleRuleTemplate = f'Rule {ix}) {ruleDesc}\n\n'
            rulesPrompt += singleRuleTemplate
        file.write(rulesPrompt)

# Get good posts

In [14]:
# Get good posts
import time
for sr in subreddits:
    resp = requests.get(f'https://oauth.reddit.com/{sr}/hot?limit=100', headers=headers).json()
    i = 0
    for el in resp['data']['children']:
        i += 1
        post_details = f'Title: {el["data"]["title"]} \nPost Text: {el["data"]["selftext"]}\n'
        with open(os.path.join(CWD,sr,VALID,f'good_{i}.txt'), '+w', encoding="utf-8") as file:
            file.write(post_details)

time.sleep(1)

# Violating Post Generation

In [50]:
from langchain.llms import OpenAI

llm = OpenAI(openai_api_key=OPENAI_API)
             
from langchain.prompts.chat import ChatPromptTemplate
from langchain.schema import StrOutputParser
from langchain.chains import LLMChain

In [59]:
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
from langchain.output_parsers import PydanticOutputParser
from langchain.pydantic_v1 import BaseModel, Field, validator
from langchain.prompts import PromptTemplate, ChatPromptTemplate

# Remove dupes with chat memory

In [85]:
from langchain.memory import ConversationBufferMemory

response_schemas = [
    ResponseSchema(name="Title", description="Reddit post title"),
    ResponseSchema(name="Post Text", description="Reddit post main body of text")
]

class Post(BaseModel):
    title: str = Field(description="Reddit post title")
    post_text: str = Field(description="Reddit post main body of text")

output_parser = PydanticOutputParser(pydantic_object=Post)
format_instructions = output_parser.get_format_instructions()
rule_number = 2

prompt = PromptTemplate(
    template="""You are generating posts for the subreddit {subreddit}. Generate a post that would violate this rule in the subreddit community guidelines: {rule}. 
    \n{format_instructions}
    \n {human_input}""",
    input_variables=["rule","human_input", "subreddit"],
    partial_variables={"format_instructions": format_instructions}
)

memory = ConversationBufferMemory(memory_key = "chat_history",input_key="human_input")

chain = LLMChain(llm=llm, prompt=prompt, output_parser=StrOutputParser())


In [88]:
subreddit = 'r/writingprompts'
# Get Rules
resp = requests.get(f'https://oauth.reddit.com/{subreddit}/about/rules', headers=headers).json()
# print(resp)
rulesPrompt = []

for ix,el in enumerate(resp['rules']):
    
    ruleDesc = (el['description'])
    singleRuleTemplate = f'Rule {ix}) {ruleDesc}\n\n'
    rulesPrompt.append(singleRuleTemplate)
print(rulesPrompt)

['Rule 0) * 100 words minimum for stories, 30 for poems but include [Poem]\n* Newly written by you for the prompt (no AI)\n* Plagiarism will result in a ban\n* No joke, copypasta, or AI-generated responses\n* For off topic/clarification, reply to the sticky comment\n\n', 'Rule 1) * No explicit sexual responses / erotica\n* Includes, but is not limited to, forms of pedophilia, bestiality, incest, rape, suicide, violence against children, and explicit torture\n* Avoid racism and detailed uses of suicide, mental health stereotypes, and political debate\n* Use your best judgment, but mods have final say\n\n', "Rule 2) * Users are held to a higher standard here. Think before posting\n* We are here to encourage writing\n* Understand that people here are at all different levels:\n * New writers\n * Experienced writers\n * Professional writers\n * Users who are just learning English\n* Typos and grammatical errors are possible.  Be nice if you point it out\n* Feedback and critiques are encoura

In [87]:
from tqdm import tqdm
for rule_number in [0,5, 6]:
    for i in tqdm(range(10)):
        output = chain.predict(human_input= "", rule=rulesPrompt[rule_number], subreddit=subreddit)
        op = output_parser.parse(output)
        # op = chain.predict(subreddit=subreddit,rules=rulesPrompt,text="", rule_number=2)
        with open(os.path.join(os.getcwd(), subreddit, INVALID, f'rule_{rule_number}_{i}.txt'),'+w') as file:
            print(f'Title: {op.title}')
            file.write(f'Title: {op.title}\nPost Text:{op.post_text}')
        time.sleep(1)

  0%|          | 0/10 [00:00<?, ?it/s]

Title: Breaking the Rules!


 10%|█         | 1/10 [00:02<00:23,  2.57s/it]

Title: What's Going On??


 20%|██        | 2/10 [00:04<00:18,  2.29s/it]

Title: Breaking the Rules??!!


 30%|███       | 3/10 [00:07<00:16,  2.38s/it]

Title: !!!Breaking News!!!


 40%|████      | 4/10 [00:09<00:13,  2.22s/it]

Title: Weird and Wacky News From Around the World!


 50%|█████     | 5/10 [00:11<00:11,  2.29s/it]

Title: Breaking the Rules: Titles Must Be Unbiased!


 60%|██████    | 6/10 [00:14<00:10,  2.60s/it]

Title: Breaking Rule 0: Unbiased and Coherent Titles??


 70%|███████   | 7/10 [00:17<00:07,  2.58s/it]

Title: Breaking News: Unbiased and Incoherent!


 80%|████████  | 8/10 [00:19<00:04,  2.49s/it]

Title: ???!?!?!?!


 90%|█████████ | 9/10 [00:21<00:02,  2.33s/it]

Title: Breaking the Rules Here!!


100%|██████████| 10/10 [00:23<00:00,  2.38s/it]
  0%|          | 0/10 [00:00<?, ?it/s]

Title: Help! I'm Out of the Loop!


 10%|█         | 1/10 [00:02<00:20,  2.23s/it]

Title: Question about something


 20%|██        | 2/10 [00:04<00:18,  2.35s/it]

Title: Help me out!


 30%|███       | 3/10 [00:06<00:15,  2.28s/it]

Title: Help! I'm Out of the Loop!


 40%|████      | 4/10 [00:09<00:13,  2.31s/it]

Title: Help me out of the loop!


 50%|█████     | 5/10 [00:11<00:11,  2.36s/it]

Title: Help me understand this!


 60%|██████    | 6/10 [00:14<00:09,  2.42s/it]

Title: Help! I'm Out of the Loop!


 70%|███████   | 7/10 [00:16<00:07,  2.38s/it]

Title: What's going on?


 80%|████████  | 8/10 [00:18<00:04,  2.33s/it]

Title: Help! I'm Out Of the Loop!


 90%|█████████ | 9/10 [00:21<00:02,  2.39s/it]

Title: What's going on?


100%|██████████| 10/10 [00:23<00:00,  2.32s/it]


In [48]:
# op = output_parser.parse(output)
# output
format_instructions = output_parser.get_format_instructions()
print(format_instructions)

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"title": {"title": "Title", "description": "Reddit post title", "type": "string"}, "post_text": {"title": "Post Text", "description": "Reddit post main body of text", "type": "string"}}, "required": ["title", "post_text"]}
```


In [18]:


# print(out)

OutputParserException: Got invalid JSON object. Error: Expecting value: line 1 column 1 (char 0)

Limitations:
would need to recursively follow links and only operate on text of webpages - parsing webpages is not simple. 
