# Subreddits to explore

In [29]:
subreddit = 'r/explainlikeimfive'
subreddits = ['r/explainlikeimfive', 'r/askscience', 'r/writingprompts', 'r/changemyview','r/outoftheloop','r/jokes']

# Get auth headers from Reddit

In [5]:
import requests
from config import CLIENT_ID, SECRET_KEY
from config import OPENAI_API
from config import reddit_auth_data as data

auth = requests.auth.HTTPBasicAuth(CLIENT_ID, SECRET_KEY)

headers = {
    'User-Agent': 'RedditAutomod/0.0.1'
}

res = requests.post('https://www.reddit.com/api/v1/access_token',
                    auth=auth,
                    data=data,
                    headers=headers)
TOKEN = res.json()['access_token']

headers['Authorization'] = f'bearer {TOKEN}'

# Generate File Structure for Project

In [31]:
import os
VALID = 'valid'
INVALID = 'invalid'
CWD = os.getcwd()

In [9]:
for sr in subreddits:
    if not os.path.exists(os.path.join(os.getcwd(),sr)):
        os.makedirs(os.path.join(CWD,sr))
        os.makedirs(os.path.join(CWD,sr,VALID))
        os.makedirs(os.path.join(CWD,sr,INVALID))

    with open(os.path.join(CWD,sr,'rules.txt'),'+w') as file:
        resp = requests.get(f'https://oauth.reddit.com/{sr}/about/rules', headers=headers).json()
        # print(resp)
        rulesPrompt = ""

        for ix,el in enumerate(resp['rules']):
            
            ruleDesc = (el['description'])
            singleRuleTemplate = f'Rule {ix}) {ruleDesc}\n\n'
            rulesPrompt += singleRuleTemplate
        file.write(rulesPrompt)

# Get good posts

In [14]:
# Get good posts
import time
for sr in subreddits:
    resp = requests.get(f'https://oauth.reddit.com/{sr}/hot?limit=100', headers=headers).json()
    i = 0
    for el in resp['data']['children']:
        i += 1
        post_details = f'Title: {el["data"]["title"]} \nPost Text: {el["data"]["selftext"]}\n'
        with open(os.path.join(CWD,sr,VALID,f'good_{i}.txt'), '+w', encoding="utf-8") as file:
            file.write(post_details)

time.sleep(1)

# Violating Post Generation

In [50]:
from langchain.llms import OpenAI

llm = OpenAI(openai_api_key=OPENAI_API)
             
from langchain.prompts.chat import ChatPromptTemplate
from langchain.schema import StrOutputParser
from langchain.chains import LLMChain

In [59]:
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
from langchain.output_parsers import PydanticOutputParser
from langchain.pydantic_v1 import BaseModel, Field, validator
from langchain.prompts import PromptTemplate, ChatPromptTemplate

# Remove dupes with chat memory

In [85]:
from langchain.memory import ConversationBufferMemory

response_schemas = [
    ResponseSchema(name="Title", description="Reddit post title"),
    ResponseSchema(name="Post Text", description="Reddit post main body of text")
]

class Post(BaseModel):
    title: str = Field(description="Reddit post title")
    post_text: str = Field(description="Reddit post main body of text")

output_parser = PydanticOutputParser(pydantic_object=Post)
format_instructions = output_parser.get_format_instructions()
rule_number = 2

prompt = PromptTemplate(
    template="""You are generating posts for the subreddit {subreddit}. Generate a post that would violate this rule in the subreddit community guidelines: {rule}. 
    \n{format_instructions}
    \n {human_input}""",
    input_variables=["rule","human_input", "subreddit"],
    partial_variables={"format_instructions": format_instructions}
)

memory = ConversationBufferMemory(memory_key = "chat_history",input_key="human_input")

chain = LLMChain(llm=llm, prompt=prompt, output_parser=StrOutputParser())


In [6]:
subreddit = 'r/nostupidquestions'
# Get Rules
resp = requests.get(f'https://oauth.reddit.com/{subreddit}/about/rules', headers=headers).json()
# print(resp)
rulesPrompt = []

for ix,el in enumerate(resp['rules']):
    
    ruleDesc = (el['description'])
    singleRuleTemplate = f'Rule {ix}) {ruleDesc}\n\n'
    rulesPrompt.append(singleRuleTemplate)
    print(singleRuleTemplate)
# print(rulesPrompt)

Rule 0) All direct answers to a post must make a genuine attempt to answer the question. Joke responses at the parent-level will be removed. Accounts using AI to generate answers will be banned. Follow-up questions at the top level are allowed. (Link only responses are permitted if they are asking for a subreddit.)

Do not answer by only dropping a link and do not tell users they should "google it." Include a summary of the link or answer the question yourself.


Rule 1) Try to keep repeat posts to a minimum. As much as we love answering questions here, it doesn't help when we see repeat questions on the front page every day. You can find the questions that have come up here again and again in our [wiki](https://www.reddit.com/r/NoStupidQuestions/wiki/index).


Rule 2) Be polite and respectful in your exchanges. NSQ is supposed to be a helpful resource for confused redditors. Civil disagreements can happen, but insults should not. Personal attacks, slurs, bigotry, etc. are not permitte

In [89]:
from tqdm import tqdm
for rule_number in [0,5, 6]:
    for i in tqdm(range(10)):
        output = chain.predict(human_input= "", rule=rulesPrompt[rule_number], subreddit=subreddit)
        op = output_parser.parse(output)
        # op = chain.predict(subreddit=subreddit,rules=rulesPrompt,text="", rule_number=2)
        with open(os.path.join(os.getcwd(), subreddit, INVALID, f'rule_{rule_number}_{i}.txt'),'+w') as file:
            print(f'Title: {op.title}')
            file.write(f'Title: {op.title}\nPost Text:{op.post_text}')
        time.sleep(1)

  0%|          | 0/10 [00:00<?, ?it/s]

Title: No Work, All Play


 10%|█         | 1/10 [00:03<00:33,  3.71s/it]

Title: AI-Generated Poem


 20%|██        | 2/10 [00:07<00:28,  3.50s/it]

Title: Short Poem - No Newly Written Content


 30%|███       | 3/10 [00:09<00:21,  3.13s/it]

Title: My Life in 30 Words


 40%|████      | 4/10 [00:12<00:17,  2.90s/it]

Title: My Plagiarized Story


 50%|█████     | 5/10 [00:15<00:14,  2.98s/it]

Title: My AI-generated story


 60%|██████    | 6/10 [00:17<00:11,  2.80s/it]

Title: Copy and Paste Writing Prompt


 70%|███████   | 7/10 [00:20<00:08,  2.88s/it]

Title: AI Writing Prompts?


 80%|████████  | 8/10 [00:23<00:05,  2.89s/it]

Title: A Short Poem About Writing Prompts


 90%|█████████ | 9/10 [00:26<00:02,  2.77s/it]

Title: A Joke Response


100%|██████████| 10/10 [00:28<00:00,  2.82s/it]
  0%|          | 0/10 [00:00<?, ?it/s]

Title: Help Me Pick a Title For My Writing Prompt - [MP]


 10%|█         | 1/10 [00:02<00:23,  2.64s/it]

Title: Need help with my writing prompt!


 20%|██        | 2/10 [00:05<00:20,  2.52s/it]

Title: Help With Writing Services?


 30%|███       | 3/10 [00:07<00:17,  2.49s/it]

Title: Need help with a writing project?


 40%|████      | 4/10 [00:10<00:15,  2.65s/it]

Title: Help me pick a title for my story!


 50%|█████     | 5/10 [00:13<00:13,  2.73s/it]

Title: Writing Prompt: Create a Story Using Only Autocorrect and Autocomplete Words!


 60%|██████    | 6/10 [00:15<00:10,  2.71s/it]

Title: Help me pick a title for my story!


 70%|███████   | 7/10 [00:18<00:07,  2.59s/it]

Title: Need help with a writing prompt title!


 80%|████████  | 8/10 [00:20<00:05,  2.52s/it]

Title: Need help with writing a story about horror and romance!


 90%|█████████ | 9/10 [00:24<00:02,  2.77s/it]

Title: 🤔 Need Help Writing My Story 🤔





UnicodeEncodeError: 'charmap' codec can't encode character '\U0001f914' in position 7: character maps to <undefined>

In [48]:
# op = output_parser.parse(output)
# output
format_instructions = output_parser.get_format_instructions()
print(format_instructions)

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"title": {"title": "Title", "description": "Reddit post title", "type": "string"}, "post_text": {"title": "Post Text", "description": "Reddit post main body of text", "type": "string"}}, "required": ["title", "post_text"]}
```


In [18]:


# print(out)

OutputParserException: Got invalid JSON object. Error: Expecting value: line 1 column 1 (char 0)

Limitations:
would need to recursively follow links and only operate on text of webpages - parsing webpages is not simple. 
