In [6]:
### TODOs ###

# TODOs: Mine Args
# TODO: Enhance Stance Module; Determine stance over entire argument. Only implicate stance for Noun
# TODOs: Mine Counters
# TODOs: Add Concepts
# TODOs: Commonsense Query and Concept Expansion: Topics, Concepts, Synonyms
# TODOs: Parallel process
# TODOs: Prior tokenization and sentence segmentation to speed processing
# TODOs: Domain Restrict. Polarising social and political debate (Class labelling) only for higher-quality argument-knowledge set

In [8]:
# TODOs: Adu, Counter + KP Extraction as 'Argument Mining' preprocessing module
# TODOs: Implement Query Expansion at Query-time
# TODOs: Manage Duplicate Keywords
# DONE: Sentential Ranking
# DONE: Include Topic Label
# DONE: Include Concept Label
# DONE: Add News
# TODOs: Targeted Retreival with Semantic Graphs
# TODOs: Target Argumentative Content Only
# TODOs: Targeted Argument Content: Adus + Extractive Summary
# TODOs: Query Expansion
# TODOs: Multi-Field Search
# TODOs: Additional News and Knowledge Sources

In [7]:
### INIT LOGGING ###
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("ARGUMENT-EXTRACTOR")

In [8]:
### NLP FUNCTIONS ###
from src.utils_.utils import tokeniser, sentences_segment

print(tokeniser("hello, my name is Josh!"))
print(sentences_segment("hello, my name is Josh! How are you doing today? I'm curious ... will this line seperate? I'm not so sure Dr. Evil"))

['hello', ',', 'my', 'name', 'is', 'Josh', '!']
['hello, my name is Josh!', 'How are you doing today?', "I'm curious ... will this line seperate?", "I'm not so sure Dr.", 'Evil']


In [9]:
### LOAD DATASETS ###
import json
import random

args = [json.loads(ln) for ln in open("../data/cmv_processed.jsonl")]
topics = [json.loads(ln) for ln in open("../data/argument_topic_concept.jsonl")]
concepts = [json.loads(ln) for ln in open("../data/argument_concept.jsonl")]

In [10]:
len(topics), len(concepts), len(args)

(5990, 5990, 10303)

In [11]:
### ASSERT BLANKS ###
args_ = [json.loads(ln)["argument"]["argument"] for ln in open("../data/cmv_processed.jsonl")]
ids = [json.loads(ln)["id"] for ln in open("../data/cmv_processed.jsonl")]

for j, k in zip(args_, ids):
    if j == "":
        print("blanks", j, k)

blanks  t3_3cm6jy
blanks  t3_1egv4k
blanks  t3_1egv4k
blanks  t3_5wjdve


In [12]:
### SUBJECT ARG ###
import random
sample = random.randint(0, 99)

arg = args[sample]["argument"]["argument"]
claim = args[sample]["claim"]

print(sample, "\n")
print(claim, "\n")
print(arg, "\n")

16 

Microsoft Windows should not be used in schools 

This view is probably a bit controversial given the wide use of Microsoft Windows in schools in the US and quite a few countries all over the world and might seem as a bit of a rant against Microsoft but bear with me. Im definitely not a Microsoft fan but Im not exactly an Apple or Linux fanboy either. Having an emphasis on Microsoft Office in school is geared to a desk job that uses Office. And you dont even learn Office properlyYoure ignoring the programmers engineers arts and literally any one who does not use Office on a regular basis. Except engineers who might use Excel a ton. 



In [13]:
### EXTRACTORS ###
from src.utils_.keyphrase_extraction import yake_extract_keyphrase, summa_extract_keyphrase

test = "Brazil's minimum income has increasingly been accepted."
ev_kp = yake_extract_keyphrase(test)
ev_kp_ = summa_extract_keyphrase(test)

test_2 = " "
ev_kp_2 = yake_extract_keyphrase(test_2)
ev_kp_2_ = summa_extract_keyphrase(test_2)

print(ev_kp)
print(ev_kp_)

# Can Handel Blanks
print(ev_kp_2)
print(ev_kp_2_)

['Brazil minimum income', 'Brazil minimum', 'increasingly been accepted', 'minimum income', 'income has increasingly']
['minimum']
[]
[]


In [14]:
from tqdm.notebook import tqdm
from src.detection.stance_classifier import sentence_stance, compare_stance
from src.utils_.word_net_expansion import expand_query
from src.detection.stance_classifier import sentence_stance
import multiprocessing
import json
import time

# Disable Huggingface Logging
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"

topic_ids = [json.loads(ln)["id"] for ln in open("../data/argument_topic_concept.jsonl")]
concept_ids = [json.loads(ln)["id"] for ln in open("../data/argument_concept.jsonl")]

# Where notion == topic or concept
def get_notion(notions_ids, notions_lst, arg_id, label):
    notion_id = notions_ids.index(arg_id)
    notion = notions_lst[notion_id][label]
    return str(notion) if notion else None

# Extract Argument Discourse as Sentences, Keyphrases, Topics and Concepts
def extract_adus(arg_):

    id_ = arg_["id"]
    arg = arg_["argument"]["argument"]

    print("\n", id_)

    topic = get_notion(topic_ids, topics, id_, "topic_label")
    concept = get_notion(concept_ids, concepts, id_, "concept_label")

    adu_sents = sentences_segment(arg)

    extract_adus = []
    for _ in adu_sents:
        if len(tokeniser(_)) <= 5:
            continue

        try:
            kp = yake_extract_keyphrase(_)
        except:
            kp = [" "]

        print(kp)

        aspect = " " if kp == [] else kp[0]

        try:
            stance = sentence_stance(_, aspect)
        except:
            stance = " "

        adu = {"sentence": _, "kp": [i for i in kp], "stance": stance, "aspect": aspect, "topic": topic, "concept": aspect}

        extract_adus.append(adu)

    return ({
        "id": id_,
        "argument": [i for i in extract_adus]
    })

#SAMPLE = args[0:1000]

SAMPLE = args
STEPS = 10
STEP = max(int(len(SAMPLE) / STEPS), 1)
BATCHES = [args[i:i + STEP] for i in range(0, len(SAMPLE), STEP)]

mined_args = []
for idx, batch in enumerate(BATCHES):
    print('-' * 25 + 'Batch %d/%d' % (idx + 1, len(BATCHES)) + '-' * 25)

    with multiprocessing.Pool(8) as pool:
        with tqdm(total=(len(batch))) as pbar:
            for arg in batch:
                mined_args.append(extract_adus(arg))
                pbar.update()

-------------------------Batch 1/11-------------------------


  0%|          | 0/1030 [00:00<?, ?it/s]


 t3_30oi71
['Income Increasingly Popular', 'Basic Income Increasingly', 'Increasingly Popular', 'Basic Income', 'Income Increasingly']
['Basic income', 'broad support', 'progressive left', 'left and libertarian', 'Basic']
['including Paul Krugman', 'economists including Paul', 'Centerleft economists including', 'Paul Krugman', 'including Paul']
['effective antipoverty measure', 'antipoverty measure', 'effective antipoverty', 'measure', 'effective']
['capital to labor', 'reduces inequality', 'inequality by redistributing', 'redistributing income', 'income from capital']

 t3_30oi71
['Income Increasingly Popular', 'Basic Income Increasingly', 'Increasingly Popular', 'Basic Income', 'Income Increasingly']
['Basic income', 'broad support', 'progressive left', 'left and libertarian', 'Basic']
['including Paul Krugman', 'economists including Paul', 'Centerleft economists including', 'Paul Krugman', 'including Paul']
['effective antipoverty measure', 'antipoverty measure', 'effective antipov

  0%|          | 0/1030 [00:00<?, ?it/s]


 t3_2qia6o
['standard hiring method', 'amp merits relevant', 'position amp merits', 'standard hiring', 'hiring method']
['applicant', 'occasionally', 'jobs', 'require', 'maintaining']
['functionally irrelevant', 'vast majority', 'majority of jobs', 'sales', 'irrelevant']

 t3_3bd8kh
['Disney cartoons', 'sounds kind', 'kind of stupid', 'Disney', 'cartoons']
['theyre thinking feeling', 'thinking feeling creatures', 'theyre thinking', 'thinking feeling', 'feeling creatures']
['literally asserting dominance', 'asserting dominance', 'dog leaping', 'expressing affection', 'affection its literally']
['eager to play', 'play its showing', 'follow the pack', 'pack leader', 'dog dragging']
['dog making eye', 'making eye contact', 'dog making', 'asserting dominanceread', 'dominanceread not affectionately']

 t3_3bd8kh
['Disney cartoons', 'sounds kind', 'kind of stupid', 'Disney', 'cartoons']
['theyre thinking feeling', 'thinking feeling creatures', 'theyre thinking', 'thinking feeling', 'feeling 

  0%|          | 0/1030 [00:00<?, ?it/s]


 t3_5zq686
['good mind exercise', 'mind exercise', 'make this claim', 'claim I hope', 'honestly doubt']
['made any logical', 'logical sense', 'people deny', 'religions and simply', 'simply choose']
['makes no sense', 'illogical and makes', 'sense', 'illogical', 'makes']
['religions', 'deny']
['logical manner', 'work of studying', 'religions before choosing', 'manner', 'religions']

 t3_4z2wcn
['issue is framed', 'election season', 'shone a light', 'light on race', 'race and racism']
['negative defensive remark', 'meaningful conversation', 'actual members', 'members of supremacist', 'supremacist groups']
['thing doesnt automatically', 'doesnt automatically change', 'Presumption saying believing', 'change a persona', 'thing doesnt']
['stopped calling people', 'calling people racists', 'racist words actions', 'stupid thing', 'doesnt change']

 t3_2ixatz
['alien invasion', 'scenarios', 'alien', 'invasion', 'outgunned']
['high risk low', 'risk low probability', 'low probability scenarios',

  0%|          | 0/1030 [00:00<?, ?it/s]


 t3_21aaav
['define my terms', 'evolutionary deadend', 'relatives passing', 'genes No kinselection', 'genes nor contributing']
['future genetic contribution.A', 'genetic contribution.A hypocrite', 'view impact', 'neutralpositivenegative impact', 'impact on future']
['children are awful', 'charitable description', 'belief that children', 'awful', 'charitable']
['relatives genetic impact', 'offspring genetic impact', 'genetic impact', 'childfree individual', 'emphasize selfdetermination']
['evolutionary deadend', 'instances said childfree', 'childfree individual', 'deadend', 'instances']

 t3_65p2od
['celebrating Good Friday', 'worth celebrating Good', 'Christianity.The central tenet', 'Good Friday', 'Christian to agree']
['accepting God', 'Jesus sin', 'Lamb of God', 'ritual sacrifice', 'sacrifice of animalsNow']
['sacrificial lamb.But', 'lamb.But now imagine', 'imagine the early', 'early days', 'Jesus']
['Jesus and believed', 'Jesus', 'People', 'heavensent', 'believed']
['mortal men su

  0%|          | 0/1030 [00:00<?, ?it/s]


 t3_5ttr0o
['hypocrisy surrounding rape', 'WORSE THAN DEATH', 'society these days', 'engulfs society', 'dont condone']
['worse than death', 'rape is worse', 'death', 'people', 'rape']
['people', 'outrage']
['worse than death', 'life without freedom', 'constant pain', 'death', 'life']

 t3_5rp57h
['turns violent people', 'attacked property destroyed', 'protest turns violent', 'turns violent', 'violent people']
['hostile', 'protesters', 'protesting', 'people', 'side']
['violent that doesnt', 'doesnt make', 'make you wrong', 'sudden', 'people']
['political affiliation', 'wrong', 'case', 'political', 'affiliation']
['riots or violence', 'arguing against peaceful', 'peaceful protest', 'advocating for riots', 'violence']

 t3_33fzqf
['United Nations Human', 'Saudi Arabia violate', 'United Nations', 'Saudi Arabia', 'World War']
['Sharia Law set', 'Saudi Arabian government', 'Arabian government violates', 'government violates Article', 'Sharia Law']
['vaguely defined reasons', 'Saudi Arabia',

  0%|          | 0/1030 [00:00<?, ?it/s]


 t3_3ec84d
['population replacement rates', 'population replacement', 'stagnation things', 'replacement rates', 'future']
['automation continues', 'continues to improve', 'improve less jobs', 'automation', 'continues']
['nationwide scale decreasing', 'scale decreasing population', 'nationwide scale', 'scale decreasing', 'decreasing population']
['food water fuel', 'Natural resources', 'scarcer everyday', 'resources of food', 'food water']
['global scale', 'scale less population', 'global', 'scale', 'population']

 t3_3ec84d
['population replacement rates', 'population replacement', 'stagnation things', 'replacement rates', 'future']
['automation continues', 'continues to improve', 'improve less jobs', 'automation', 'continues']
['nationwide scale decreasing', 'scale decreasing population', 'nationwide scale', 'scale decreasing', 'decreasing population']
['food water fuel', 'Natural resources', 'scarcer everyday', 'resources of food', 'food water']
['global scale', 'scale less populati

  0%|          | 0/1030 [00:00<?, ?it/s]


 t3_2a9dnv
['Roths child suggests', 'Roths child', 'Rothschild as Roths', 'cases in point', 'point The USEnglish']
['Rothschild meaning red', 'meaning red shield', 'sch in Germanic', 'Germanic languages', 'Rothschild meaning']
['lines of rothshild', 'proper pronunciation', 'rothshild', 'proper', 'pronunciation']
['Latin phrases show', 'speakers complete ignorance', 'pronunciations of Latin', 'Latin phrases', 'USEnglish pronunciations']
['alveolar trill rolled', 'easily learned', 'excuse the inability', 'inability to pronounce', 'pronounce the alveolar']

 t3_2a9dnv
['Roths child suggests', 'Roths child', 'Rothschild as Roths', 'cases in point', 'point The USEnglish']
['Rothschild meaning red', 'meaning red shield', 'sch in Germanic', 'Germanic languages', 'Rothschild meaning']
['lines of rothshild', 'proper pronunciation', 'rothshild', 'proper', 'pronunciation']
['Latin phrases show', 'speakers complete ignorance', 'pronunciations of Latin', 'Latin phrases', 'USEnglish pronunciations'

  0%|          | 0/1030 [00:00<?, ?it/s]


 t3_50ht8b
['clear a couple', 'couple of things', 'clear', 'couple', 'things']
['Lack of education', 'Lack', 'work', 'education', 'teachersparents']
['wont learn', 'kid really doesnt', 'learn', 'kid', 'doesnt']
['amount of education', 'education thrown', 'entered the mind', 'amount', 'education']
['formal school setting', 'Educationtrainingpractice doesnt', 'formal school', 'school setting', 'Educationtrainingpractice']

 t3_50ht8b
['clear a couple', 'couple of things', 'clear', 'couple', 'things']
['Lack of education', 'Lack', 'work', 'education', 'teachersparents']
['wont learn', 'kid really doesnt', 'learn', 'kid', 'doesnt']
['amount of education', 'education thrown', 'entered the mind', 'amount', 'education']
['formal school setting', 'Educationtrainingpractice doesnt', 'formal school', 'school setting', 'Educationtrainingpractice']

 t3_50ht8b
['clear a couple', 'couple of things', 'clear', 'couple', 'things']
['Lack of education', 'Lack', 'work', 'education', 'teachersparents']


  0%|          | 0/1030 [00:00<?, ?it/s]


 t3_35fjb1
['people Persons', 'Persons', 'people', 'live']
['Person', 'citizen', 'born']
['huge idiot', 'idiot and dont', 'happen', 'huge', 'idiot']
['years and registering', 'vote with literally', 'literally no requirements', 'surviving for years', 'registering']
['Albert freaking Einstein', 'basically Albert freaking', 'freaking Einstein', 'basically Albert', 'Albert freaking']

 t3_35fjb1
['people Persons', 'Persons', 'people', 'live']
['Person', 'citizen', 'born']
['huge idiot', 'idiot and dont', 'happen', 'huge', 'idiot']
['years and registering', 'vote with literally', 'literally no requirements', 'surviving for years', 'registering']
['Albert freaking Einstein', 'basically Albert freaking', 'freaking Einstein', 'basically Albert', 'Albert freaking']

 t3_35fjb1
['people Persons', 'Persons', 'people', 'live']
['Person', 'citizen', 'born']
['huge idiot', 'idiot and dont', 'happen', 'huge', 'idiot']
['years and registering', 'vote with literally', 'literally no requirements', 'sur

  0%|          | 0/1030 [00:00<?, ?it/s]


 t3_2idrbt
['Dove Real Beauty', 'Real Beauty campaign', 'Dove Real', 'Real Beauty', 'Beauty campaign']
['commercials portray women', 'commercials portray', 'portray women', 'supposed to represent', 'represent what real']
['women featured', 'average height', 'height and vary', 'vary in skin', 'skin tones']
['skinny or thin', 'thin', 'people', 'skinny']
['athletic or incredibly', 'incredibly fit', 'obese', 'dont', 'label']

 t3_5g4ugl
['Nationalism puts people', 'Nationalism puts', 'arbitrary reasons', 'puts people', 'people in groups']
['group', 'sold', 'individuals', 'pride', 'contribute']
['taxation is compulsory', 'nation by paying', 'paying taxes', 'compulsory', 'people']
['pride is dangerous', 'psychological crutch', 'crutch for people', 'individual achievements', 'achievements hence preventing']

 t3_4v1li0
['candidate adheres to.However', 'United States', 'playing ground', 'bestEach party', 'set philosophy']
['personality and party', 'elections would focus', 'party', 'elections'

  0%|          | 0/3 [00:00<?, ?it/s]


 t3_1tqlde
['Santa Claus', 'Claus is real', 'child that Santa', 'number of discussions', 'Santa']
['opinions differ', 'differ the prevailing', 'prevailing mindset', 'hurt', 'opinions']
['force religious beliefs', 'Grinches and Scrooges', 'magic of Christmas', 'Christmas or whatnot', 'terms more Grinches']
['children of believers', 'morals and judgment.Please', 'nuanced and tolerant', 'tolerant perspective', 'read these debates']

 t3_1qiccr
['acquiring equal legal', 'acquiring equal', 'equal legal', 'Feminism', 'women']
['West', 'feminism', 'thing']
['women face prejudice', 'understand that women', 'women face', 'face prejudice', 'guys albeit']
['eliminating intangible prejudice', 'eliminating intangible', 'intangible prejudice', 'eliminating', 'intangible']
['longer a thing', 'thing', 'feminism', 'looked', 'longer']

 t3_1bc54q
['life is deterministic', 'convinced that life', 'deterministic', 'thinking', 'convinced']
['chemical composure', 'bit of influence', 'chemical', 'composure',

In [15]:
# mined_args
print(len(mined_args))

import random
_ = random.randint(0, len(SAMPLE))
example = mined_args[_]
example

10303


{'id': 't3_2kgrtb',
 'argument': [{'sentence': 'No need for px insanity paleo keto atkins etc.I just need to go to a caloric calculator meet that goal and walk my dog like mins a day.',
   'kp': ['insanity paleo keto',
    'paleo keto atkins',
    'keto atkins etc.I',
    'caloric calculator meet',
    'mins a day'],
   'stance': 'CON',
   'aspect': 'insanity paleo keto',
   'topic': None,
   'concept': 'insanity paleo keto'},
  {'sentence': 'I have been trying to lose weight for a while and the only time I was successful was when I tried Paleo.But that only lasted a short time as I didnt have the discipline nor will to never touch bread pasta or rice again.I have tried doing px and insanity and while awesome and fulfilling I never finished.',
   'kp': ['touch bread pasta',
    'lose weight',
    'lasted a short',
    'touch bread',
    'bread pasta'],
   'stance': 'PRO',
   'aspect': 'touch bread pasta',
   'topic': None,
   'concept': 'touch bread pasta'},
  {'sentence': 'My schedule

In [16]:
# STORE DEEP-COPY
import copy
mined_args_ = copy.deepcopy(mined_args)

In [17]:
### COUNTER-ARGS ###
def extract_counters(arg_):

    id_ = arg_["id"]
    counter = arg_["counter"]["counter"]

    print("\n", id_)

    counter_sents = sentences_segment(counter)

    extract_counters = []
    for _ in counter_sents:
        if len(tokeniser(_)) <= 5:
            continue

        try:
            kp = yake_extract_keyphrase(_)
        except:
            kp = [" "]

        print(kp)

        aspect = " " if kp == [] else kp[0]

        try:
            stance = sentence_stance(_, aspect)
        except:
            stance = " "

        counter_unit = {"sentence": _, "kp": [i for i in kp], "stance": stance, "aspect": aspect}

        extract_counters.append(counter_unit)

    return ({
        "id": id_,
        "counter": [i for i in extract_counters]
    })

STEPS = 10
STEP = max(int(len(SAMPLE) / STEPS), 1)
BATCHES = [args[i:i + STEP] for i in range(0, len(SAMPLE), STEP)]

mined_counters = []
for idx, batch in enumerate(BATCHES):
    print('-' * 25 + 'Batch %d/%d' % (idx + 1, len(BATCHES)) + '-' * 25)

    with multiprocessing.Pool(8) as pool:
        with tqdm(total=(len(batch))) as pbar:
            for counter in batch:
                mined_counters.append(extract_counters(counter))
                pbar.update()

-------------------------Batch 1/11-------------------------


  0%|          | 0/1030 [00:00<?, ?it/s]


 t3_30oi71
['Basic Income requires', 'rejects progressive taxation', 'Basic Income', 'Income requires', 'points seem predicated']
['worded incorrectly', 'conservatives or rightlibertarian', 'prefer those outcomes', 'liberals and leftlibertarians', 'not.The title']
['Basic Income', 'replaced.If that doesnt', 'doesnt change', 'change your view', 'Basic']
['perfectly shot arrow', 'shot arrow aimed', 'wrong target', 'written and laid', 'perfectly shot']

 t3_30oi71
['time search google', 'search google images', 'cliff unemployment trap', 'welfare marginal withdrawal', 'welfare cliff unemployment']
['surprised', 'evidence']
['effect youd', 'significant', 'dont', 'effect', 'youd']
['consensus they exist.Note', 'apply to individuals', 'individuals and households', 'households some benefits', 'left']
['UKs housing benefit', 'UKs housing', 'housing benefit', 'benefit are allocated', 'myriad systems']

 t3_30oi71
['attacking resource taxes', 'fund resource dividends', 'basic income', 'funding i

  0%|          | 0/1030 [00:00<?, ?it/s]


 t3_2qia6o
['challenge your view', 'view that networking', 'networking is unfair', 'challenge', 'view']
['challenge your view', 'discouraged or eliminated.Businesses', 'effective', 'challenge', 'view']
['secondary goal', 'goal', 'fair', 'secondary']
['encourages business practices', 'companysociety which encourages', 'encourages business', 'business practices', 'quickly be outcompeted']
['exceptions that justify', 'justify regulation', 'environmental or fraud', 'fraud protection', 'find employees']

 t3_3bd8kh
['kids assign feelings', 'owners assign feelings', 'assign feelings', 'simply projecting', 'kids assign']
['Dog sees owner', 'Dog', 'amount of time', 'time', 'havent']
['dog sounds', 'overwhelmed with emotion', 'dog', 'sounds', 'overwhelmed']
['owners grave', 'years', 'dogs', 'fed', 'care']

 t3_3bd8kh
['Time articleFurthermore attributing', 'pack leaders hierarchies', 'leaders hierarchies alphas', 'Time articleFurthermore', 'information about pack']
['Huffington Post article', 

  0%|          | 0/1030 [00:00<?, ?it/s]


 t3_5zq686
['illogical automatically make', 'make one foolish', 'illogical automatically', 'automatically make', 'foolish']
['God', 'born', 'culture', 'fool', 'heart']
['considered foolish', 'rejecting this God', 'God', 'foolish', 'rejecting']

 t3_4z2wcn
['individual persons job', 'light bulb', 'job to change', 'youve heard', 'heard the joke']
['racist persons mind', 'persons mind requires', 'discussion most times', 'change a racist', 'racist persons']
['victim blaming experience', 'personofcolor suggesting', 'suggesting I spend', 'spend some effort', 'attempt to engage']
['put']
['fat shaming people', 'change their behavior', 'thread make', 'make a comparison', 'comparison of calling']

 t3_2ixatz
['crazy building weapons', 'nuclear weapons began', 'overwhelming threat', 'curious what makes', 'dont already exist.I']

 t3_2ixatz
['sake of argument', 'aliens exist', 'interstellar travel', 'travel is physically', 'sake']
['Earth earthquakes volcanoes', 'asteroid impacts comets', 'impac

  0%|          | 0/1030 [00:00<?, ?it/s]


 t3_21aaav
['feel']
['evolutionary dead end', 'telling people Youre', 'dead end', 'people Youre', 'telling people']
['wrong', 'rude']
['kids', 'wrong', 'wanting']

 t3_65p2od
['Jesus was killed', 'killed he descended', 'descended into HadesHell', 'HadesHell and saved', 'saved the souls']
['Harrowing of Hell', 'Holy Saturday', 'resurrected on Sunday', 'called the Harrowing', 'celebrated on Holy']
['Good Friday Jesus', 'Friday Jesus died', 'Easter Sunday', 'Earth on Easter', 'Heaven on Easter']
['us.A modern corollary', 'billion people alive', 'years ago', 'ago but today', 'people alive']
['enslaved were unshackled', 'nice that future', 'future generations', 'longer be slaved', 'wrongfully enslaved']

 t3_65p2od
['Christs resurrection', 'resurrection is theologically', 'theologically significant', 'Christs', 'reasons']
['sin of Adam', 'eternal life', 'resurrection ensures', 'born into eternal', 'Adam']
['living proof', 'dead', 'fact', 'living', 'proof']
['Corinthians Paul states', 'Cori

  0%|          | 0/1030 [00:00<?, ?it/s]


 t3_5ttr0o
['worse than death', 'traumatized by rape', 'dont understand', 'understand the argument', 'compare any number']
['irrelevant.For instance imagine', 'children in Africa', 'objectively correct', 'irrelevant.For instance', 'instance imagine']
['fix their problems', 'make the child', 'child feel', 'problems', 'knowing']
['sadness to boot', 'feel worse', 'feel guilty', 'boot', 'make them feel']
['child in Africa', 'obvious reasons', 'dont have younger', 'children whos parents', 'parents died']

 t3_5rp57h
['youve chosen', 'chosen the wrong', 'wrong argument', 'youve', 'chosen']
['protest actions', 'turns violent', 'position or argument', 'protestors are basing', 'protest']
['true.However a protest', 'true.However', 'protest']
['turns violent', 'definition a bad', 'bad protest', 'protest that turns', 'protest']
['protestors', 'difference', 'position', 'behaviour']

 t3_33fzqf
['friend who lives', 'punishments', 'friend', 'lives']
['extreme punishments', 'population', 'govt', 'ext

  0%|          | 0/1030 [00:00<?, ?it/s]


 t3_3ec84d
['nonworking elderly population', 'proprtionately large nonworking', 'large nonworking elderly', 'elderly population', 'proprtionately large']
['maintain the tax', 'tax base', 'caresupport', 'maintain', 'tax']

 t3_3ec84d
['worried about declining', 'population are worried', 'declining population', 'worried', 'reasons']
['absolute populationeconomic size', 'populationeconomic size', 'nations power', 'power is attributed', 'absolute populationeconomic']
['world of growing', 'world of declining', 'systems set', 'longer be sustainable', 'growing population']
['context Social Security', 'Social Security retirement', 'Security retirement SSR', 'working age people', 'Social Security']

 t3_3ec84d
['Shrinking populations', 'dangerous things', 'disruptive and dangerous', 'Shrinking', 'things']
['direct effects', 'basically boils', 'economic activity', 'activity is bad.First', 'effects']
['families to fill', 'houses and families', 'house to accommodate', 'family', 'families']
['mate

  0%|          | 0/1030 [00:00<?, ?it/s]


 t3_2a9dnv
['native English speaker', 'pronouncing foreign words', 'native English', 'English speaker', 'English words']
['expect the person', 'person listening', 'find your accent', 'accent more ridiculous', 'ridiculous when attempting']
['expect to hear', 'speaks the language', 'language the word', 'word is derived', 'Theyre']
['native English speakers', 'native English', 'English speakers', 'conversation partner', 'flex their worldliness']

 t3_2a9dnv
['legal jargon', 'lawyer.corpus delicti', 'fortiori have accepted', 'accepted pronounciations', 'pronounciations as legal']
['immediately understood', 'lawyers', 'pronunciations', 'phrases', 'immediately']
['pronunciations which wouldnt', 'pronunciations', 'wouldnt']
['pronunciation isnt correct', 'spoken Latin', 'make my meaning', 'meaning clear', 'pronunciation isnt']
['wont immediately understand', 'people Im speaking', 'speaking to wont', 'wont immediately', 'immediately understand']

 t3_6sz7w1
['Material willful deceit', 'party 

  0%|          | 0/1030 [00:00<?, ?it/s]


 t3_50ht8b
['coming from.I', 'understand', 'view', 'coming', 'from.I']
['morning person', 'person', 'morning']
['night person', 'person', 'procrastinator', 'night']
['time to spare', 'things done early', 'spare', 'organized', 'tend']
['good study habits', 'lot of clutter', 'study habits', 'good study', 'clutter']

 t3_50ht8b
['rough simulation', 'simulation', 'website', 'rough']
['Dyslexic people', 'finding the pattern', 'past knowledge', 'language and finding', 'Dyslexic']
['Imagine learning', 'learning to read', 'Imagine', 'learning', 'read']
['brain isnt taking', 'disorder and Turing', 'brain isnt', 'recognizable and scrambling', 'isnt taking']

 t3_50ht8b
['dyslexic undergraduate student', 'dyslexic person', 'undergraduate student', 'student of biological', 'unrealistic view']
['words are jumbled', 'page', 'error', 'thinking', 'adapt']
['case']
['jumbling of words', 'words and letters', 'consistent', 'order', 'adapt']
['equate that flawed', 'flawed symbol', 'correct word', 'word',

  0%|          | 0/1030 [00:00<?, ?it/s]


 t3_35fjb1
['Person', 'citizen']
[]

 t3_35fjb1
['jury duty', 'civic duties', 'Person', 'duty', 'civic']
['Person', 'constitution']
['Person A doesnt', 'Person', 'amend', 'doesnt', 'test']

 t3_35fjb1
['Freedom', 'vote']
['ruling elite', 'easily corrupted', 'elite', 'feel', 'idea']

 t3_35fjb1
['practical argument', 'Ill', 'practical', 'argument', 'philosophical']
['testing', 'carries']
['submits the ballot', 'person who passed', 'passed the test', 'ballot', 'ensure']

 t3_35fjb1
['Undereducated', 'idiot', 'person']

 t3_6fmzlf
[]
['content is Askreddit', 'jokes and stories', 'comments both jokes', 'Askreddit', 'stories']
['lack of explanation', 'actual argument', 'argument to put', 'gut reaction', 'case they absolutely']
['people stopped downvoting', 'wrong reasons explanations', 'people stopped', 'stopped downvoting', 'wrong reasons']

 t3_6fmzlf
['couple of thoughts.', 'good points', 'thoughts.', 'bring', 'good']
['unclear', 'arguing', 'reddit']
['lighterless argumentative.', 'majo

  0%|          | 0/1030 [00:00<?, ?it/s]


 t3_2idrbt
['drops run halfmarathons', 'actively promotes conflating', 'lbs as long', 'svelte or bodybuilding', 'drops run']
['inevitably includes larger', 'includes larger women', 'build indiscriminate muscle', 'indiscriminate muscle mass', 'varying body types']

 t3_5g4ugl
['productive and destructive', 'Nationalism', 'destructive', 'pointed', 'productive']
['bunch of people', 'people coming', 'ill', 'bunch', 'people']
['necessarily destructive', 'define nationalism', 'group identity', 'identity is necessarily', 'positives']
['dangerous human trait', 'change it.Group identity', 'single most dangerous', 'dangerous human', 'human trait']

 t3_4v1li0
['group of people', 'purpose of democracy', 'foster change', 'encourage a policybased', 'policybased debate']
['basic']
['President Senator Representative', 'leaders President Senator', 'individual leaders President', 'Simply cast votes', 'President Senator']
['proportionally distribute parliamentary', 'distribute parliamentary seats', 'Ca

  0%|          | 0/3 [00:00<?, ?it/s]


 t3_1tqlde
['entire religious aspect', 'aspect of Christmas', 'Christmas which isnt', 'remove the entire', 'entire religious']
['obligated to follow', 'unfortunate when social', 'social contracts', 'opposing and valid', 'valid viewpoints']
['happen', 'child', 'secret']
['lauded for letting', 'secret', 'child', 'lauded', 'letting']
['believing', 'child', 'ostracized']

 t3_1qiccr
['stereotyped or mistreated', 'mistreated based', 'based on gender', 'gender as well.People', 'hire men']
['identical resumes', 'companies identical', 'Studies', 'female', 'identical']
['significantly higher rate', 'higher rate', 'significantly higher', 'rate', 'male']
['entertainment media', 'bechdel test', 'test of movies', 'movies and entertainment', 'media']
['pass the test', 'test a movie', 'man', 'pass', 'test']

 t3_1bc54q
['thing', 'free', 'determinism']
['free will exists', 'determine the future', 'future due', 'things happen', 'people then respond']


In [19]:
# mined_args
print(len(mined_counters))

import random
_ = random.randint(0, len(SAMPLE))
example = mined_counters[_]
example

10303


{'id': 't3_2ppxgu',
 'counter': [{'sentence': 'I visit that subreddit from time to time and have a good laugh because there is truth with their criticism.',
   'kp': ['visit that subreddit',
    'good laugh',
    'subreddit from time',
    'criticism',
    'time'],
   'stance': 'PRO',
   'aspect': 'visit that subreddit'},
  {'sentence': 'However you have to realize that is is a circlejerk and that alone carries some drawbacks to it.',
   'kp': ['carries some drawbacks',
    'realize',
    'circlejerk',
    'carries',
    'drawbacks'],
   'stance': 'CON',
   'aspect': 'carries some drawbacks'},
  {'sentence': 'I assume you specifically mean rShitRedditSays and not any of its variations.',
   'kp': ['assume you specifically',
    'specifically mean rShitRedditSays',
    'variations',
    'assume',
    'specifically'],
   'stance': 'NEUTRAL',
   'aspect': 'assume you specifically'},
  {'sentence': 'There was a post yesterday where a girl uploaded a picture of her and her boyfriend and a c

In [18]:
import copy
mined_counters_ = copy.deepcopy(mined_counters)

In [20]:
file_name = "cmv_argument_extraction"
fout = open(f"../data/{file_name}.jsonl", "w")

# Deep_copies
mined_args_ = copy.deepcopy(mined_args)
mined_counters_ = copy.deepcopy(mined_counters)

# Working Loop
# for i in mined_counters_:
#     test = {
#         "test": [j for j in i["counter"]]
#     }

with tqdm(total=(len(mined_args))) as pbar:
    with fout:
        for original_post, mined_arg, mined_counter in zip(args, mined_args_, mined_counters_):
            # Extended pre-formatted mined object
            mined_arg["original_post"] = original_post["argument"]
            mined_arg["tgt_counter"] = [_ for _ in mined_counter["counter"]]

            fout.write(json.dumps(mined_arg))

            fout.write("\n")
            pbar.update()

logger.info(f"[{len(args)} Data Stored as {file_name}.jsonl]")

  0%|          | 0/10303 [00:00<?, ?it/s]

INFO:ARGUMENT-EXTRACTOR:[10303 Data Stored as cmv_argument_extraction.jsonl]


In [21]:
### EVALUATE OUTPUT ###
train = [json.loads(ln) for ln in open(f"../data/{file_name}.jsonl", "r")]

In [22]:
len(train)

10303

In [33]:
_ = random.randint(0, len(train))
print(train[_]["argument"], "\n")
print(train[_]["tgt_counter"], "\n")

[{'sentence': 'As a german especially in the city I live theres honestly too many turks and muslims here.', 'kp': ['city I live', 'live theres honestly', 'turks and muslims', 'german', 'city'], 'stance': 'NEUTRAL', 'aspect': 'city I live', 'topic': 'turkish citizens', 'concept': 'city I live'}, {'sentence': 'My old school was turksmuslimsguys from eastern europe which seriously lead to germans being made fun of for being german in their own damn countryBut thats not the only example when I go out I see as much if not more foreigners on the streets.', 'kp': ['school was turksmuslimsguys', 'turksmuslimsguys from eastern', 'eastern europe', 'made fun', 'damn countryBut'], 'stance': 'PRO', 'aspect': 'school was turksmuslimsguys', 'topic': 'turkish citizens', 'concept': 'school was turksmuslimsguys'}, {'sentence': 'And by foreigners I dont mean anything like scandinavians or people from developed countries for that matter I mean all the muslimsturksromanians etc.A little immigration doesnt 

In [None]:
# for i, j in zip(retrieved_ranked, sample):
#     # Add counter to the dictionary (implicitly, i)
#     i["counter"] = j["counter"]
#     fout.write(json.dumps(i))
#     fout.write("\n")