In [1]:
# Pre-Process CMV Data
# ====================

import pandas as pd
import json
import re
from IPython.display import Markdown

In [2]:
# Clean Post
def cleanup(cmv_post):
    lines = [
        line for line in cmv_post.splitlines()
        if not line.lstrip().startswith("&gt;")
        and not line.lstrip().startswith("____")
        and "edit" not in " ".join(line.lower().split()[:2])
    ]
    return "\n".join(lines)

# Display Post IPython Markdown
def show_post(title, cmv_post, counter):
    cmv_post = cleanup(cmv_post)
    md_format = "**{}** \n \n {} \n \n **Counter** \n \n {}".format(title, cmv_post, counter)
    md_format = "\n".join(["> " + line for line in md_format.splitlines()])

    return Markdown(md_format)

# Clean Text
def full_clean(data):
    # TODOs: Consider .strip()
    cleaned = []

    for i in data:
        # CMV clean-up
        clean = cleanup(i)

        # RegEx clean-up
        clean = re.sub("CMV:", " ", clean)
        clean = re.sub("CMV", " ", clean)
        clean = re.sub(r"http\S+", "", clean)
        clean = re.sub(r"\n", "", clean)
        clean = re.sub(r'(?<=[a-z])\'(?=[a-z])', '', clean)
        clean = re.sub('([^a-zA-Z\s.!?])', "", clean)
        clean = re.sub('\s+', ' ', clean)

        clean = re.sub(r"www\S+", "", clean)
        cleaned.append(clean.strip())

        #clean = re.sub("^\s", "", clean)

    return cleaned

In [3]:
# LOAD

arg_load = []
for line in open('../data/train_cmv.jsonlist', 'r'):
    arg_load.append(json.loads(line))

args = pd.DataFrame(arg_load)
titles = args["op_title"]
props = args["op_text"]
id = args["op_name"]

wins = [
    args["positive"][i]["comments"][0]["body"] for i in range(0, len(args))
]

debate = {
    "id": id,
    "Titles": titles,
    "Arguments": props,
    "Counters": wins
}

debate = pd.DataFrame(data = debate, columns = ["id", "Titles", "Arguments", "Counters"]).astype(str)

In [4]:
debate

Unnamed: 0,id,Titles,Arguments,Counters
0,t3_2ro9ux,CMV: Anything that is man-made is natural.,I can't remember the topic that spurred this d...,"Look at the definition you provided, if we rem..."
1,t3_2ro0ti,CMV:Essential Oils are bullshit,My wife has recently gotten deeply involved/ob...,[This answer in /r/askscience does a pretty go...
2,t3_2rnr30,CMV: I think the Paris shooting makes a good c...,Thinking about today's news: [BBC](http://www....,I guess I look at it this way- there have abso...
3,t3_2rnfn0,"CMV: Explaining causation is not ""blaming"" the...",I've been thinking about this issue for a whil...,I would say that the difficulty here is in ass...
4,t3_2rnfn0,"CMV: Explaining causation is not ""blaming"" the...",I've been thinking about this issue for a whil...,"I think you're correct for the most part, but ..."
...,...,...,...,...
3451,t3_2rsgv3,CMV: Drawing images of Mohammed and posting th...,"In opposing injustice, we must strive not to p...",It's an issue of context.\n\nIf the purpose is...
3452,t3_2rs57a,CMV: The rate at which one's income is taxed s...,The rate at which income is taxed (at least in...,Their take-home pay will still be directly tie...
3453,t3_2rs57a,CMV: The rate at which one's income is taxed s...,The rate at which income is taxed (at least in...,This is a common misconception. The system is...
3454,t3_2rqvf8,CMV: School hours should be 9am to 5pm to matc...,It seems logical to me that school hours shoul...,&gt;The extra hours of school need not be fill...


In [7]:
# Search Keywords
keyword = "Philosophy"
debate[debate['Counters'].str.contains(keyword,case=False)]

Unnamed: 0,id,Titles,Arguments,Counters
5,t3_2rmy6e,CMV: Philosophy has no tangible value as an ac...,"As an independent form of study, philosophy do...",&gt;What do you gain from studying philosophy ...
6,t3_2rmy6e,CMV: Philosophy has no tangible value as an ac...,"As an independent form of study, philosophy do...","Let me take a crack at this, as someone with a..."
225,t3_2n6s5w,CMV: Human civilization reached a high point d...,The Enlightenment Period brought the most fund...,I'd argue that the defense you're making of en...
227,t3_2n4aay,"CMV: If a person is triggered by something, th...",I say this as someone who has spent much of he...,"I'm arachnophobic. I have a serious, debilitat..."
354,t3_2k19cr,CMV: When one accepts that eating meat is immo...,"This is almost a tongue-in-cheek CMV, but the ...",Most vegetarians think that eating meat is imm...
...,...,...,...,...
3083,t3_30bmbr,"CMV:I feel like math, philosophy, first and se...","Firstly, mathematics, first and second languag...","Firstly, I would argue that just as important ..."
3084,t3_30bmbr,"CMV:I feel like math, philosophy, first and se...","Firstly, mathematics, first and second languag...",IT is taught to an extent. They're taught how ...
3085,t3_30bmbr,"CMV:I feel like math, philosophy, first and se...","Firstly, mathematics, first and second languag...","Couple things here:\n\nFirst, do you have kids..."
3337,t3_2unn7n,CMV:Modernity is evil.,What I am referring to when I say modernity is...,The problem is that the categories you can use...


In [8]:
# Explore Post
show_post(debate["Titles"][5], debate["Arguments"][5], debate["Counters"][5])

> **CMV: Philosophy has no tangible value as an academic field of study.** 
>  
>  As an independent form of study, philosophy doesn't seem to have any practical applications. What value does philosophy have in the modern age, right now, aside from contemplating things. Is it truly worth it to invest a significant amount of time and money studying this field? 
> 
> There do not seem to be any tangible applications or appreciable benefits from studying philosophy aside from personal growth and the expansion of one's intellectual perspective, which I argue can be gained without studying philosophy in a rigorously academic manner.  
> 
> I often have read the argument that it is impossible to argue that philosophy is useless without using philosophy, or something along those lines.
> 
> I acknowledge this. Yes, I am engaging in the use of philosophy right now, at this very moment. However, this does not provide an argument as to why it would be worthwhile to STUDY philosophy. 
> 
> What do you gain from studying philosophy that could not be gained from thoughtful introspection? 
> 
> Certainly, important tools have originated from philosophical study, such as the scientific method, and science could be described as a subset of philosophy itself but that isn't an argument against the lack of tangible benefits to be gained from studying philosophy.
> 
> You don't need to study philosophy to become a capable scientist. You shouldn't need to study philosophy to cultivate a reasonable set of moral principles, or to be thoughtful about the circumstances and situations you encounter in your life. 
> 
> 
> 
>  
>  
>  **Counter** 
>  
>  &gt;What do you gain from studying philosophy that could not be gained from thoughtful introspection?
> 
> Two answers. #1 rigor and #2 it saves us from reinventing the wheel.
> 
> When you ask the question of whether philosophy is useful, you give us an example of a practical application of philosophy. We're on the same page there. To expand on that, we face a lot of practical questions about what things have value. When we create school curricula we're making value judgments, when we allot budgets. These are, at least in one facet, philosophical questions, and important ones.
> 
> But you suggest we can answer these questions without studying philosophy. Can we answer high level economic questions without studying economics and math? Can we figure out plumbing questions without training in plumbing?
> 
> These value decisions (and value is just one kind of philosophical question we deal with constantly) can be based on valid or invalid arguments. If no one studies arguments of value, why do you believe that we can naturally recognize these with no study? Why do you think we should start from scratch in all value decisions rather than seeking to understand the work that has been done in the past?
> 
> A lot of philosophy seems to be common sense just because it's been so thoroughly integrated into our culture. The place that we're currently at though is the result of a lot of work in ethics, epistemology and metaphysics. To assume we can arrive at the best answers just by thinking hard is ignoring the work that got us where we are, both in failing to understand how much labor was required to get us here, and failing to challenge some of our current baseline.
> 
> Not everyone needs to study philosophy for their day to day lives, but everyone consumes it whether they know it or not. If no one studies it, then no one will be able to teach it. If no one is able to teach it, then everyone's philosophical positions will have no rigor, will make no use of the work that has already been done before.
> 
> All our philosophical questions then may as well be answered by /r/showerthoughts or /r/trees. Without rigor and background, that's where introspection gets you.

In [9]:
titles_clean = full_clean(titles)
args_clean = full_clean(props)
counters_clean = full_clean(wins)

debate_clean = {
    "id": id,
    "Titles": titles_clean,
    "Arguments": args_clean,
    "Counters": counters_clean
}

debate_clean = pd.DataFrame(debate_clean)
debate_clean

Unnamed: 0,id,Titles,Arguments,Counters
0,t3_2ro9ux,Anything that is manmade is natural.,I cant remember the topic that spurred this di...,Look at the definition you provided if we remo...
1,t3_2ro0ti,Essential Oils are bullshit,My wife has recently gotten deeply involvedobs...,This answer in raskscience does a pretty good ...
2,t3_2rnr30,I think the Paris shooting makes a good case f...,Thinking about todays news BBC WikipediaI thin...,I guess I look at it this way there have absol...
3,t3_2rnfn0,Explaining causation is not blaming the victim...,Ive been thinking about this issue for a while...,I would say that the difficulty here is in ass...
4,t3_2rnfn0,Explaining causation is not blaming the victim...,Ive been thinking about this issue for a while...,I think youre correct for the most part but st...
...,...,...,...,...
3451,t3_2rsgv3,Drawing images of Mohammed and posting them on...,In opposing injustice we must strive not to pe...,Its an issue of context.If the purpose is to s...
3452,t3_2rs57a,The rate at which ones income is taxed should ...,The rate at which income is taxed at least in ...,Their takehome pay will still be directly tied...
3453,t3_2rs57a,The rate at which ones income is taxed should ...,The rate at which income is taxed at least in ...,This is a common misconception. The system is ...
3454,t3_2rqvf8,School hours should be am to pm to match offic...,It seems logical to me that school hours shoul...,They already have these. Every school in my lo...


In [10]:
# Explore Post
show_post(debate_clean["Titles"][5], debate_clean["Arguments"][5], debate_clean["Counters"][5])

> **Philosophy has no tangible value as an academic field of study.** 
>  
>  As an independent form of study philosophy doesnt seem to have any practical applications. What value does philosophy have in the modern age right now aside from contemplating things. Is it truly worth it to invest a significant amount of time and money studying this field? There do not seem to be any tangible applications or appreciable benefits from studying philosophy aside from personal growth and the expansion of ones intellectual perspective which I argue can be gained without studying philosophy in a rigorously academic manner. I often have read the argument that it is impossible to argue that philosophy is useless without using philosophy or something along those lines.I acknowledge this. Yes I am engaging in the use of philosophy right now at this very moment. However this does not provide an argument as to why it would be worthwhile to STUDY philosophy. What do you gain from studying philosophy that could not be gained from thoughtful introspection? Certainly important tools have originated from philosophical study such as the scientific method and science could be described as a subset of philosophy itself but that isnt an argument against the lack of tangible benefits to be gained from studying philosophy.You dont need to study philosophy to become a capable scientist. You shouldnt need to study philosophy to cultivate a reasonable set of moral principles or to be thoughtful about the circumstances and situations you encounter in your life. 
>  
>  **Counter** 
>  
>  Two answers. rigor and it saves us from reinventing the wheel.When you ask the question of whether philosophy is useful you give us an example of a practical application of philosophy. Were on the same page there. To expand on that we face a lot of practical questions about what things have value. When we create school curricula were making value judgments when we allot budgets. These are at least in one facet philosophical questions and important ones.But you suggest we can answer these questions without studying philosophy. Can we answer high level economic questions without studying economics and math? Can we figure out plumbing questions without training in plumbing?These value decisions and value is just one kind of philosophical question we deal with constantly can be based on valid or invalid arguments. If no one studies arguments of value why do you believe that we can naturally recognize these with no study? Why do you think we should start from scratch in all value decisions rather than seeking to understand the work that has been done in the past?A lot of philosophy seems to be common sense just because its been so thoroughly integrated into our culture. The place that were currently at though is the result of a lot of work in ethics epistemology and metaphysics. To assume we can arrive at the best answers just by thinking hard is ignoring the work that got us where we are both in failing to understand how much labor was required to get us here and failing to challenge some of our current baseline.Not everyone needs to study philosophy for their day to day lives but everyone consumes it whether they know it or not. If no one studies it then no one will be able to teach it. If no one is able to teach it then everyones philosophical positions will have no rigor will make no use of the work that has already been done before.All our philosophical questions then may as well be answered by rshowerthoughts or rtrees. Without rigor and background thats where introspection gets you.

In [11]:
# Search Keywords; Assert Clean (URLs: 'http', 'www')
keyword = "women"
debate_clean[debate_clean['Counters'].str.contains(keyword,case=False)]

Unnamed: 0,id,Titles,Arguments,Counters
25,t3_2rawgb,I believe that regularly browsing chan will ma...,Because of the anonymity on chan people are mo...,To be fair thats not specific to chan and that...
40,t3_2r1qfw,Women who are antifeminism do not have interna...,First let me clarify that antifeminism isnt ne...,You cannot be antifeminism without being misog...
48,t3_2r07cv,There is no sound biblical argument that makes...,Im not some hippy liberal christian Im a serio...,Before I start here I want to say that Im an a...
54,t3_2qtorn,Femaleonly gyms and womens shelters are not di...,Well while I was discussing the effects of fem...,I do a little volunteer work with a womens she...
65,t3_2qnu4z,The highest form of relationship between men a...,Maybe it has to do with being an adolescent an...,Well first off theres the fact that most peopl...
...,...,...,...,...
3327,t3_2uyc2l,Sexual education should not be limited to safe...,I reiterate The message of sex ed shouldnt be ...,A lot of people in this thread are taking your...
3329,t3_2uudze,Identifying as a gamer is meaningless,This is my first time posting and Im not very ...,The issues come from when gamer is used as mis...
3346,t3_2uk6fx,I dont believe that identifying as a gender ac...,I believe that gender is a simple matter of se...,Generally we separate sex which is your biolog...
3389,t3_2tj4pv,The Media should not be allowed to report on a...,Hi !Over here in the UK theres been many alleg...,I agree with your primary point but would like...


In [12]:
# Search Keywords; Assert missing values at start of sentence
keyword = "harassment"
debate_clean[debate_clean['Titles'].str.contains(keyword,case=False)]

Unnamed: 0,id,Titles,Arguments,Counters
279,t3_2lqcby,Catcalling and Street Harassment should be a t...,With the recent discussion of Street Harassmen...,Well theres a pretty big free speech problem. ...
282,t3_2lorq2,Calling compliments and greetings harassment s...,You may have seen recent viral video that aims...,Sexual harassment Sexual harassment includes u...
3193,t3_2ybjc1,Complimenting women are not sexual harassment,So this story broke yesterday. Pretty much its...,Are you asking this question in general or as ...


In [12]:
# Output .csv
# pd.DataFrame.to_csv(debate_clean, "../data/debate_data.csv", index=False)

In [13]:
# Output JSON List .jsonl
import json

data = []
for idx, row in debate_clean.iterrows():
    data.append({
        "id": row["id"], "titles": row["Titles"], "arguments": row["Arguments"], "counters": row["Counters"]
    })

with open("../data/train_cmv_cleaned.jsonl", "w", encoding='utf-8') as f:
    for d in data:
        f.write(json.dumps(d))
        f.write("\n")