In [15]:
### INIT LOGGING ###
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("ARGUMENT-EXTRACTOR")

In [1]:
### NLP FUNCTIONS ###
from src.utils_.utils import tokeniser, sentences_segment

print(tokeniser("hello, my name is Josh!"))
print(sentences_segment("hello, my name is Josh! How are you doing today? I'm curious ... will this line seperate? I'm not so sure Dr. Evil"))

['hello', ',', 'my', 'name', 'is', 'Josh', '!']
['hello, my name is Josh!', 'How are you doing today?', "I'm curious ... will this line seperate?", "I'm not so sure Dr.", 'Evil']


In [2]:
### LOAD DATASETS ###
import json
import random

args = [json.loads(ln) for ln in open("../data/cmv_processed.jsonl")]
topics = [json.loads(ln) for ln in open("../data/argument_topic_concept.jsonl")]
concepts = [json.loads(ln) for ln in open("../data/argument_concept.jsonl")]

In [3]:
len(topics), len(concepts), len(args)

(5990, 5990, 10303)

In [4]:
### ASSERT BLANKS ###
args_ = [json.loads(ln)["argument"]["argument"] for ln in open("../data/cmv_processed.jsonl")]
ids = [json.loads(ln)["id"] for ln in open("../data/cmv_processed.jsonl")]

for j, k in zip(args_, ids):
    if j == "":
        print("blanks", j, k)

blanks  t3_3cm6jy
blanks  t3_1egv4k
blanks  t3_1egv4k
blanks  t3_5wjdve


In [5]:
### SUBJECT ARG ###
import random
sample = random.randint(0, 99)

arg = args[sample]["argument"]["argument"]
claim = args[sample]["claim"]

print(sample, "\n")
print(claim, "\n")
print(arg, "\n")

98 

Rewinding time is the best superpower for daily life 

A topic that Ive had fun discussing is to imagine what could be done with a superpower. With so many choices and so many implications for each power this little game can spark long conversations on how each power would affect our daily lives. From all of these discussions Ive come away with the view that one power is better than all others by the most metrics a power that I like to call Rewind.Disclaimer If youve never found yourself wondering what your life could be like with superpowers and have no interest in starting then this topic is definitely not for you. The topic is one massive hypothetical so that better be your thing PTLDR Because damn! I wrote way too much to ask you to read it all 



In [6]:
### EXTRACT OVER UNIQUE ARGUMENTS ONLY ###
# unique = set()
# idx = set()
#
# for j, k in zip(args_, ids):
#     unique.add((j, k))
#
# unique = list(unique)
# type(unique), len(unique)
#
# unique

In [7]:
# len(unique)

In [8]:
### TODOs ###

# TODOs: Mine Args
# TODO: Enhance Stance Module; Determine stance over entire argument. Only implicate stance for Noun
# TODOs: Mine Counters
# TODOs: Add Concepts
# TODOs: Commonsense Query and Concept Expansion: Topics, Concepts, Synonyms
# TODOs: Parallel process
# TODOs: Prior tokenization and sentence segmentation to speed processing
# TODOs: Domain Restrict. Polarising social and political debate (Class labelling) only for higher-quality argument-knowledge set

In [6]:
### EXTRACTORS ###
from src.utils_.keyphrase_extraction import yake_extract_keyphrase, summa_extract_keyphrase

test = "Brazil's minimum income has increasingly been accepted."
ev_kp = yake_extract_keyphrase(test)
ev_kp_ = summa_extract_keyphrase(test)

test_2 = " "
ev_kp_2 = yake_extract_keyphrase(test_2)
ev_kp_2_ = summa_extract_keyphrase(test_2)

print(ev_kp)
print(ev_kp_)

# Can Handel Blanks
print(ev_kp_2)
print(ev_kp_2_)

INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: all-MiniLM-L6-v2
INFO:sentence_transformers.SentenceTransformer:Use pytorch device: cpu
INFO:KEYPHRASE_EXTRACTOR:[Test Keyphrase: ] 
 ['heathrow airport', 'environmental impact', 'aviation']


['Brazil minimum income', 'Brazil minimum', 'increasingly been accepted', 'minimum income', 'income has increasingly']
['minimum']
[]
[]


In [7]:
# def get_topic(arg_id):
#     topic_id = topic_ids.index(arg_id)
#     topic = topics[topic_id]["topic_label"]
#     return str(topic) if topic else None
#
# def get_concept(arg_id):
#     concept_id = concept_ids.index(arg_id)
#     concept = concepts[concept_id]["concept_label"]
#     return str(concept) if concept else None

In [8]:
# TODOs: Adu, Counter + KP Extraction as 'Argument Mining' preprocessing module
# TODOs: Implement Query Expansion at Query-time
# TODOs: Manage Duplicate Keywords
# DONE: Sentential Ranking
# DONE: Include Topic Label
# DONE: Include Concept Label
# DONE: Add News
# TODOs: Targeted Retreival with Semantic Graphs
# TODOs: Target Argumentative Content Only
# TODOs: Targeted Argument Content: Adus + Extractive Summary
# TODOs: Query Expansion
# TODOs: Multi-Field Search
# TODOs: Additional News and Knowledge Sources

In [9]:
from tqdm.notebook import tqdm
from src.detection.stance_classifier import sentence_stance, compare_stance
from src.utils_.word_net_expansion import expand_query
from src.detection.stance_classifier import sentence_stance
import multiprocessing
import json
import time

# Disable Huggingface Logging
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"

topic_ids = [json.loads(ln)["id"] for ln in open("../data/argument_topic_concept.jsonl")]
concept_ids = [json.loads(ln)["id"] for ln in open("../data/argument_concept.jsonl")]

# Where notion == topic or concept
def get_notion(notions_ids, notions_lst, arg_id, label):
    notion_id = notions_ids.index(arg_id)
    notion = notions_lst[notion_id][label]
    return str(notion) if notion else None

# Extract Argument Discourse as Sentences, Keyphrases, Topics and Concepts
def extract_adus(arg_):

    id_ = arg_["id"]
    arg = arg_["argument"]["argument"]

    print("\n", id_)

    topic = get_notion(topic_ids, topics, id_, "topic_label")
    concept = get_notion(concept_ids, concepts, id_, "concept_label")

    adu_sents = sentences_segment(arg)

    extract_adus = []
    for _ in adu_sents:
        if len(tokeniser(_)) <= 5:
            continue

        try:
            kp = yake_extract_keyphrase(_)
        except:
            kp = [" "]

        print(kp)

        aspect = " " if kp == [] else kp[0]

        try:
            stance = sentence_stance(_, aspect)
        except:
            stance = " "

        adu = {"sentence": _, "kp": [i for i in kp], "stance": stance, "aspect": aspect, "topic": topic, "concept": aspect}

        extract_adus.append(adu)

    return ({
        "id": id_,
        "argument": [i for i in extract_adus]
    })

SAMPLE = args[0:1000]
STEPS = 10
STEP = max(int(len(SAMPLE) / STEPS), 1)
BATCHES = [args[i:i + STEP] for i in range(0, len(SAMPLE), STEP)]

mined_args = []
for idx, batch in enumerate(BATCHES):
    print('-' * 25 + 'Batch %d/%d' % (idx + 1, len(BATCHES)) + '-' * 25)

    with multiprocessing.Pool(8) as pool:
        with tqdm(total=(len(batch))) as pbar:
            for arg in batch:
                mined_args.append(extract_adus(arg))
                pbar.update()

INFO:STANCE_CLASSIFIER:[Initialised ... ]
INFO:STANCE_CLASSIFIER:[Test Stance ... ] 
 The mutual trust and understanding you share with your partner will lead to better sex, but that's not the only reason sex can be better when you're in a relationship., PRO, better sex


-------------------------Batch 1/10-------------------------


  0%|          | 0/100 [00:00<?, ?it/s]


 t3_30oi71
['Income Increasingly Popular', 'Basic Income Increasingly', 'Increasingly Popular', 'Basic Income', 'Income Increasingly']
['Basic income', 'broad support', 'progressive left', 'left and libertarian', 'Basic']
['including Paul Krugman', 'economists including Paul', 'Centerleft economists including', 'Paul Krugman', 'including Paul']
['effective antipoverty measure', 'antipoverty measure', 'effective antipoverty', 'measure', 'effective']
['capital to labor', 'reduces inequality', 'inequality by redistributing', 'redistributing income', 'income from capital']

 t3_30oi71
['Income Increasingly Popular', 'Basic Income Increasingly', 'Increasingly Popular', 'Basic Income', 'Income Increasingly']
['Basic income', 'broad support', 'progressive left', 'left and libertarian', 'Basic']
['including Paul Krugman', 'economists including Paul', 'Centerleft economists including', 'Paul Krugman', 'including Paul']
['effective antipoverty measure', 'antipoverty measure', 'effective antipov

  0%|          | 0/100 [00:00<?, ?it/s]


 t3_2lvdpt
['Baltimore Ravens cheerleader', 'Baltimore Ravens', 'Ravens cheerleader', 'yearold boy', 'weeks ago']
['age of consent', 'plenty of instances', 'instances when older', 'older women', 'women often teachers']
['promiscuous than females', 'older men', 'men who prey', 'prey on young', 'young girls']
['species', 'cultures']
['Simply put men', 'Simply put', 'put men', 'men want sex', 'Simply']

 t3_6baulc
['wellaccepted fact', 'make money', 'money to make', 'money', 'wellaccepted']
['Tax law recognizes', 'Tax law', 'law recognizes', 'Tax', 'businesses']
['tax deductible', 'business', 'expenses', 'profit', 'tax']
['spend money commuting.For', 'money commuting.For employees', 'commuting.For employees commuting', 'spend money', 'money commuting.For']
['spend X dollars', 'work', 'spend', 'dollars', 'buscartrain']

 t3_6baulc
['wellaccepted fact', 'make money', 'money to make', 'money', 'wellaccepted']
['Tax law recognizes', 'Tax law', 'law recognizes', 'Tax', 'businesses']
['tax ded

  0%|          | 0/100 [00:00<?, ?it/s]


 t3_5t20f8
['white American citizen', 'American citizen', 'white American', 'American', 'citizen']
['despise that term', 'term since people', 'me.I was abused', 'child constantly', 'constantly in survival']
['car we couldnt', 'couldnt afford', 'squatted with friends', 'needed food', 'food or ran']
['Family Court system', 'Court system left', 'Family Court', 'police and Family', 'Court system']
[]

 t3_5t20f8
['white American citizen', 'American citizen', 'white American', 'American', 'citizen']
['despise that term', 'term since people', 'me.I was abused', 'child constantly', 'constantly in survival']
['car we couldnt', 'couldnt afford', 'squatted with friends', 'needed food', 'food or ran']
['Family Court system', 'Court system left', 'Family Court', 'police and Family', 'Court system']
[]

 t3_2x5tun
['resist arrest', 'killed', 'clear', 'resist', 'arrest']
['substantial evidence proving', 'police officers involved', 'murder the suspect', 'resist arrest', 'killed the police']
['avoid 

  0%|          | 0/100 [00:00<?, ?it/s]


 t3_48waq6
['Isnt the point', 'highly nuanced', 'dont understand', 'understand why people', 'people paint']
['Walter poisoning Brock', 'showing us.Sure poisoning', 'Vince Gilligan', 'Ill use Walter', 'Gilligan is showing']
['Hurting a childbringing', 'appalling notion', 'childbringing an innocent', 'drug war', 'war is generally']
['poisoning Brock', 'Brock as crossing', 'crossing a line', 'line and shun', 'factors at play']
['easier smarter choice', 'Jesse against Gus', 'Valley plant', 'turn Jesse', 'smarter choice']

 t3_4nzy6k
['Quran Doesnt Support', 'Muslims The Quran', 'Quran Doesnt', 'Doesnt Support', 'Islamic Terrorism']
['verses supporting violence', 'Quran will urn', 'cursory examination', 'supporting violence', 'terrorist groups']
['legitimize Islam', 'holy book', 'ideology which isnt', 'attempt to legitimize', 'clear connection']

 t3_3xyj6d
['World War', 'Nazis had won', 'War', 'Nazis', 'World']
['Nazi', 'meantime', 'form of society', 'form', 'society']
['views', 'youre']


  0%|          | 0/100 [00:00<?, ?it/s]


 t3_6hacgk
['visiting other lifeforms', 'managed to resolve', 'resolve the problems', 'problems that prevent', 'lifeforms']
['planet Kworsk ready', 'natural iron supplies', 'planet Kworsk', 'Kworsk ready', 'home galaxy']
['modern era due', 'ethics and politics', 'group alone.Science', 'exact clone', 'individuality no differences']
['stars close', 'stop caring', 'individual parts', 'group', 'stars']

 t3_1off27
['completely understand supporting', 'understand supporting students', 'high school', 'completely understand', 'understand supporting']
['high school degree', 'high school', 'school degree', 'incredibly vital', 'learning disorder']
['undergraduate university', 'understand helping', 'university', 'understand', 'helping']
['produce good graduates', 'matter the reason', 'responsibility to produce', 'produce good', 'good graduates']
['professional level school', 'medical school veterinary', 'veterinary school law', 'school veterinary school', 'school law school']

 t3_1off27
['compl

  0%|          | 0/100 [00:00<?, ?it/s]


 t3_3l58uz
['Hiya havent', 'detrimental to society', 'carefully word', 'make it clear', 'surefire support']
['brought me Ive', 'education has brought', 'fulfill my end', 'burden of proof.However', 'peruse some abstracts']
['Ive seen Christian', 'Christian politicians', 'quality of life', 'edge churches', 'churches being taxfree']
['understand the individual', 'individual benefits', 'shaky source', 'source once told', 'faith can increase']
['religions effect', 'effect on society', 'society today', 'today would expand', 'expand my mind.If']

 t3_61u63t
['Reddit please change', 'change my view', 'young child', 'Reddit', 'Note']
['electronic device', 'iPad an iPhone', 'parent letting', 'letting their child', 'phone I die']
['shows irresponsibility', 'irresponsibility', 'shows']
['bad parenting', 'device is bought', 'parenting', 'device', 'bought']
['pay attention', 'easy', 'distract', 'dont', 'pay']

 t3_4zoqt4
['Abrahamic religions', 'acid opinion', 'Abrahamic', 'religions', 'acid']
['ag

  0%|          | 0/100 [00:00<?, ?it/s]


 t3_5tlcgx
['confessing unrequited love', 'unrequited love', 'purpose to confessing', 'confessing unrequited', 'love']
['potentially make', 'make you feel', 'produce any positive', 'positive gain', 'potentially']
['crush on Person', 'Person', 'scenarioPerson', 'crush']
['big secret burden', 'surface level meaning', 'straight.Person A confesses', 'romantic feelings', 'incapable of reciprocating.Person']

 t3_5tlcgx
['confessing unrequited love', 'unrequited love', 'purpose to confessing', 'confessing unrequited', 'love']
['potentially make', 'make you feel', 'produce any positive', 'positive gain', 'potentially']
['crush on Person', 'Person', 'scenarioPerson', 'crush']
['big secret burden', 'surface level meaning', 'straight.Person A confesses', 'romantic feelings', 'incapable of reciprocating.Person']

 t3_31n763
['return to society', 'rehabilitate individuals', 'society', 'prison', 'rehabilitate']
['dont understand', 'understand why locking', 'life is beneficial', 'dont', 'understand

  0%|          | 0/100 [00:00<?, ?it/s]


 t3_5d6hhq
['dictionary.com definitions adapted', 'elements mosquitoes minor', 'mosquitoes minor injures', 'comprehend the usefulness', 'dictionary.com definitions']
['rank and allegiance', 'methods of camouflage', 'intimidate foes', 'denoting rank', 'allegiance']
['places']
['street crips Tattoos', 'fiftyfifth street crips', 'crips Tattoos', 'wanted to show', 'show my allegiance']
['gave rank regiment', 'military tattoos', 'tattoos that gave', 'gave rank', 'rank regiment']

 t3_5d6hhq
['dictionary.com definitions adapted', 'elements mosquitoes minor', 'mosquitoes minor injures', 'comprehend the usefulness', 'dictionary.com definitions']
['rank and allegiance', 'methods of camouflage', 'intimidate foes', 'denoting rank', 'allegiance']
['places']
['street crips Tattoos', 'fiftyfifth street crips', 'crips Tattoos', 'wanted to show', 'show my allegiance']
['gave rank regiment', 'military tattoos', 'tattoos that gave', 'gave rank', 'rank regiment']

 t3_5s102j
['Emoluments Clause', 'remov

  0%|          | 0/100 [00:00<?, ?it/s]


 t3_24sevz
['hurdles to overcome', 'fundamental hurdles', 'overcome', 'read', 'suggests']
['computations its making', 'understand its impossible', 'impossible to measure', 'measure a quantum', 'quantum system']
['extremely low temperatures', 'temperatures to work', 'passed that.They', 'extremely low', 'low temperatures']
['fundamentally flawed concept', 'end I predict', 'them.Quantum computing', 'fundamentally flawed', 'flawed concept']

 t3_1tjhom
['Prescriptions require access', 'Prescriptions require', 'require access', 'Prescriptions', 'doctor']
['privately managed.', 'represents a significant', 'significant barrier', 'barrier to treatment', 'healthcare is privately']
['drug addict robs', 'helping make drugs', 'legally obtain', 'addict robs', 'robs a pharmacy']
['personal decision', 'high addict', 'happy addict', 'addict', 'high']

 t3_6so9sk
['information we obtain', 'comprehend every piece', 'piece of information', 'Language', 'obtain']
['thinking silently', 'thinking using word

  0%|          | 0/100 [00:00<?, ?it/s]


 t3_1udlql
['baths to showers', 'young age', 'time you switch', 'switch from baths', 'showers']
['saves water', 'varies from person', 'person to person.I', 'theory it saves', 'water']
['toilet flush', 'drain with running', 'running water', 'address the gross', 'gross factor']
['shower then dont', 'dont', 'pee', 'shower']
['anything.I dont', 'doesnt hurt', 'force', 'anything.I', 'dont']

 t3_1udlql
['baths to showers', 'young age', 'time you switch', 'switch from baths', 'showers']
['saves water', 'varies from person', 'person to person.I', 'theory it saves', 'water']
['toilet flush', 'drain with running', 'running water', 'address the gross', 'gross factor']
['shower then dont', 'dont', 'pee', 'shower']
['anything.I dont', 'doesnt hurt', 'force', 'anything.I', 'dont']

 t3_48choe
['rpolitics about Sanders', 'Sanders chances', 'win the nomination', 'lot of discussion', 'discussion on political']
['stands that Sanders', 'Sanders is hopeful', 'win the nomination', 'consensus stands', 'Sa

In [10]:
# mined_args
print(len(mined_args))

import random
_ = random.randint(0, len(SAMPLE))
example = mined_args[_]
example

1000


{'id': 't3_3bq7f3',
 'argument': [{'sentence': 'I am American and have used toilet paper my whole life.',
   'kp': ['toilet paper', 'American', 'life', 'toilet', 'paper'],
   'stance': 'NEUTRAL',
   'aspect': 'toilet paper',
   'topic': None,
   'concept': 'toilet paper'},
  {'sentence': 'Ive had to use a bidet the past week for medical reasons and I think that toilet paper is the far superior option.CleanlinessIn this area I prefer toilet paper for a few reasons.',
   'kp': ['prefer toilet paper',
    'toilet paper',
    'bidet the past',
    'past week',
    'week for medical'],
   'stance': 'PRO',
   'aspect': 'prefer toilet paper',
   'topic': None,
   'concept': 'prefer toilet paper'},
  {'sentence': 'First of all there is actual visual confirmation that youve got it all.',
   'kp': ['actual visual confirmation',
    'actual visual',
    'visual confirmation',
    'confirmation that youve',
    'actual'],
   'stance': 'NEUTRAL',
   'aspect': 'actual visual confirmation',
   'topic

In [11]:
### COUNTER-ARGS ###
def extract_counters(arg_):

    id_ = arg_["id"]
    counter = arg_["counter"]["counter"]

    print("\n", id_)

    counter_sents = sentences_segment(counter)

    extract_counters = []
    for _ in counter_sents:
        if len(tokeniser(_)) <= 5:
            continue

        try:
            kp = yake_extract_keyphrase(_)
        except:
            kp = [" "]

        print(kp)

        aspect = " " if kp == [] else kp[0]

        try:
            stance = sentence_stance(_, aspect)
        except:
            stance = " "

        adu = {"sentence": _, "kp": [i for i in kp], "stance": stance, "aspect": aspect}

        extract_counters.append(adu)

    return ({
        "id": id_,
        "counter": [i for i in extract_counters]
    })

STEPS = 10
STEP = max(int(len(SAMPLE) / STEPS), 1)
BATCHES = [args[i:i + STEP] for i in range(0, len(SAMPLE), STEP)]

mined_counters = []
for idx, batch in enumerate(BATCHES):
    print('-' * 25 + 'Batch %d/%d' % (idx + 1, len(BATCHES)) + '-' * 25)

    with multiprocessing.Pool(8) as pool:
        with tqdm(total=(len(batch))) as pbar:
            for counter in batch:
                mined_counters.append(extract_counters(counter))
                pbar.update()

-------------------------Batch 1/10-------------------------


  0%|          | 0/100 [00:00<?, ?it/s]


 t3_30oi71
['Basic Income requires', 'rejects progressive taxation', 'Basic Income', 'Income requires', 'points seem predicated']
['worded incorrectly', 'conservatives or rightlibertarian', 'prefer those outcomes', 'liberals and leftlibertarians', 'not.The title']
['Basic Income', 'replaced.If that doesnt', 'doesnt change', 'change your view', 'Basic']
['perfectly shot arrow', 'shot arrow aimed', 'wrong target', 'written and laid', 'perfectly shot']

 t3_30oi71
['time search google', 'search google images', 'cliff unemployment trap', 'welfare marginal withdrawal', 'welfare cliff unemployment']
['surprised', 'evidence']
['effect youd', 'significant', 'dont', 'effect', 'youd']
['consensus they exist.Note', 'apply to individuals', 'individuals and households', 'households some benefits', 'left']
['UKs housing benefit', 'UKs housing', 'housing benefit', 'benefit are allocated', 'myriad systems']

 t3_30oi71
['attacking resource taxes', 'fund resource dividends', 'basic income', 'funding i

  0%|          | 0/100 [00:00<?, ?it/s]


 t3_2lvdpt
['true on average', 'average', 'true']
['properties are distributions', 'reality these properties', 'distributions', 'reality', 'properties']
['promiscuity or physical', 'physical strength', 'plot the curves', 'curves overlap.There', 'curves for promiscuity']
['race.On average Africans', 'stronger than Asians', 'average Africans', 'Africans are physically', 'unjust make']
['assaults an African', 'Asian who assaults', 'assault an Asian.Different', 'punishing a crime', 'receive less punishment']

 t3_6baulc
['Youve brought', 'brought up helping', 'helping the poorworking', 'poorworking class', 'class a lot']
['federal tax deductions', 'federal tax', 'tax deductions', 'federal', 'tax']
['poorworking class people', 'class people itemize', 'itemize their taxes', 'poorworking class', 'class people']
['tax deduction completely', 'deduction completely worthless', 'makes this tax', 'tax deduction', 'deduction completely']
['marginal tax rate', 'tax deductions increase', 'tax rate', 

  0%|          | 0/100 [00:00<?, ?it/s]


 t3_5t20f8
['brown skin lacks.To', 'white privilege basically', 'darker skin colors', 'brown skin', 'black or brown']
['illegal black Hispanic', 'overt discrimination made', 'discrimination made illegal', 'made illegal black', 'finances shown fewer']
['factor your race', 'race working', 'didnt', 'factor', 'race']
['life things', 'white', 'problems', 'life', 'things']
['white privilege', 'white', 'privilege']

 t3_5t20f8
['privileged']
['women rich people', 'poor people white', 'minorities educated people', 'uneducated people young', 'ugly people healthy']
['youll be treated', 'privileged qualities', 'treated', 'privileged', 'qualities']
['metrics.The specific problems', 'metrics.The specific', 'specific problems', 'problems that people', 'people face']
['poor and abused', 'problems are related', 'poor', 'abused', 'problems']

 t3_2x5tun
['criminal act', 'arrest has committed', 'committed a criminal', 'act', 'presuming']
['arrest isnt valid', 'cop is abusing', 'abusing his power', 'isn

  0%|          | 0/100 [00:00<?, ?it/s]


 t3_48waq6
['respect his motivations', 'strange premise', 'monster or irredeemably', 'irredeemably evil', 'understand his perspective']
['evil character', 'nuance', 'imply', 'evil', 'character']

 t3_4nzy6k
['mischaracterization', 'dishonest']
['enemy.The basic fundamental', 'basic fundamental principle', 'terrorism with Islam', 'frame an argument', 'thwart the intentions']
['wording arent avoiding', 'arent avoiding connecting', 'avoiding connecting terrorism', 'West and Christianity', 'moderate Muslims']
['middle.Obama frequently talks', 'perverted Islam', 'middle.Obama frequently', 'frequently talks', 'talks about terrorists']
['factual accuracy', 'answer both tactically', 'matter of factual', 'accuracy', 'middle']

 t3_3xyj6d
['personal views', 'personal', 'views', 'reductive', 'extreme']
['real reason lies', 'real reason', 'reason lies', 'real', 'reason']
['origin lies', 'bit too much.I', 'individual more depends', 'environment', 'depends']
['argue that Man', 'inculcated by societ

  0%|          | 0/100 [00:00<?, ?it/s]


 t3_6hacgk
['Hypothetical territory', 'Hypothetical', 'arent kidding', 'territory', 'arent']
['Iron isnt rare', 'Iron isnt', 'isnt rare', 'Iron', 'isnt']
['scarce element', 'inhabited planet', 'absolute stupidest', 'aquisition', 'scarce']
['Korean ESports athletes', 'uninhabbited asteroids moons', 'inhabitable world can.Invaders', 'arent ethically opposed', 'ethics havent stopped']

 t3_1off27
['Americans with Disabilities', 'arent called exceptions', 'called exceptions theyre', 'exceptions theyre called', 'theyre called accommodations']
['undergraduates and graduate', 'graduate students.Theres', 'students.Theres a common', 'common misconception', 'accommodations offered']
['laws applying', 'laws', 'applying', 'completely']
['collegeuniversity level accommodations', 'provide equal access', 'collegeuniversity level', 'level accommodations', 'provide equal']

 t3_1off27
['learning disabilities', 'wife has learning', 'disabilities', 'wife', 'learning']
['demonstrates exceptionally high',

  0%|          | 0/100 [00:00<?, ?it/s]


 t3_3l58uz
['August President George', 'Bush set limits', 'stem cell research', 'religious fundamentalist wing', 'Republican Party']
['consistently true', 'consistently', 'true']
['Religious people cast', 'Intelligent Design', 'people cast doubt', 'blockslow scientific progress.Theres', 'Evolution denialists']

 t3_61u63t
['huge benefits', 'benefits', 'parent', 'huge']
['address the elephant', 'room as parents', 'address', 'elephant', 'room']
['important oneonone time', 'parents do stuff', 'stuff like work', 'work have grownup', 'grownup conversations']
['open up worlds', 'simply not reasonable', 'healthy or desirable', 'constantly focused', 'focused on them.Second']
['lives work', 'worlds', 'lives', 'work']

 t3_4zoqt4
['Death', 'happen', 'accept']
['solution']

 t3_4zoqt4
['intentional action leading', 'Suicide is intentional', 'desired death', 'intentional action', 'action leading']
['desire death', 'intentional action', 'Aging', 'death', 'intentional']

 t3_4zoqt4
['escape death',

  0%|          | 0/100 [00:00<?, ?it/s]


 t3_5tlcgx
['benefit.My personal argument', 'making that choice', 'friendship.Perhaps confessing', 'denying telling', 'reasons.confessing your affections']

 t3_5tlcgx
['extreme stance Believing', 'fairly extreme stance', 'stance Believing', 'fairly extreme', 'extreme stance']
['ability to deal', 'deal with frankly', 'underestimating Bs ability', 'deal', 'life']
['overprotective']

 t3_31n763
['time to appeal', 'innocent', 'time', 'appeal']
['people who didnt', 'didnt commit', 'commit the crime.It', 'certainty', 'beneficial']
['bit of money', 'innocent people', 'ready to kill', 'save a bit', 'money']

 t3_3c79ax
['Prisoners Dilemma', 'optimisation.Take the Prisoners', 'game theory', 'theory of optimisation.Take', 'Dilemma']
['bothconfess situation', 'individual result', 'betrayal ensures', 'cartel breaks', 'things being equal']
['pursuing short term', 'multiparty cartel agreeing', 'gulls doesnt advantage', 'greater personal reward', 'short term personal']

 t3_1bc7uw
['police officers

  0%|          | 0/100 [00:00<?, ?it/s]


 t3_5d6hhq
['Theyre purely decorative.But', 'things genital piercings', 'Theyre purely', 'practical purpose', 'increased sensation']
['tongue piercings', 'tongue', 'piercings']
['modern world', 'world will rarely', 'risks of injury', 'People', 'piercings']
['incredibly minor', 'damage is incredibly', 'minor', 'damage', 'incredibly']
['add a negligible', 'negligible amount', 'amount of extra', 'blunt trauma', 'extra trauma']

 t3_5d6hhq
['nipples pierced', 'pierced', 'recently', 'nipples']
['sexual purposes', 'enjoy the aesthetic', 'aesthetic enjoyed', 'enjoyed the endorphin', 'endorphin rush']
['rite of passage', 'passage for pain', 'pain tolerance.Further', 'tribes and cultures', 'piercings']
['bottom lip pierced', 'bottom lip', 'lip pierced', 'pierced and stretched', 'women']

 t3_5s102j
['obvious repugnance Trump', 'repugnance Trump possesses', 'Pence is awful', 'repugnance Trump', 'Trump possesses']
['lessen considerably leading', 'Trumps administration', 'leftist resistance', 'le

  0%|          | 0/100 [00:00<?, ?it/s]


 t3_24sevz
['Reading your post', 'basic misunderstandings', 'Reading', 'misunderstandings', 'post']
['Quantum systems', 'systems are measured', 'Quantum', 'systems', 'measured']
['quantum algorithms include', 'quantum algorithms', 'algorithms include', 'include an exact', 'exact understanding']
['states', 'qubits']

 t3_1tjhom
['camels back', 'rapid increase', 'increase of antibioticresistant', 'antibioticresistant strains', 'straw that broke']
['Nobel Prize speech', 'Alexander Fleming', 'Nobel Prize', 'Prize speech', 'quote thee inventor']

 t3_6so9sk
['arbitrary linguistic category', 'linguistic category', 'arbitrary linguistic', 'category', 'pronouns']
['languages dont', 'dont specify gender', 'gender in pronouns', 'languages', 'dont']
['neuter pronoun', 'pronoun', 'neuter']
['speaker chooses based', 'speaker chooses', 'chooses based', 'situation', 'pronouns']
['strange about choosing', 'choosing a pronoun', 'pronoun to project', 'image', 'strange']

 t3_6so9sk
['pronoun debate', '

  0%|          | 0/100 [00:00<?, ?it/s]


 t3_1udlql
['world personal contexts', 'Brazil and Argentina', 'hotels in Brazil', 'Argentina The problem', 'live this logic']
['open wound', 'house hold', 'shower the drain', 'grow bacteria', 'increase risk']
['communal showers', 'university its horrible', 'horrible people', 'sorts of bad', 'bad things']
['shower peers', 'safe using bare', 'bare feet', 'peers', 'shower']

 t3_1udlql
['early age', 'disgusting to pee', 'age', 'dont', 'disgusting']
['shower then logically', 'toilet', 'pee', 'shower', 'logically']
['Meaning', 'shower', 'clean']
['childs poop', 'poop', 'childs']

 t3_48choe
['support Sanders', 'trump claiming', 'true progressives', 'progressive label', 'switch from Sanders']

 t3_48choe
['actual true progressive', 'progressive like Sanders', 'reason to vote', 'true progressivism', 'actual true']
['United States', 'Barack Obama', 'Obama in made', 'blue America', 'red America']
['world isnt blue', 'blue and red', 'hijack that statement', 'world isnt', 'isnt blue']
['Morals 

In [110]:
# mined_args
print(len(mined_counters))

import random
_ = random.randint(0, len(SAMPLE))
example = mined_counters[_]
example

1000


{'id': 't3_4eqq6u',
 'counter': [{'sentence': 'Could you please name a specific policy position which both the republican and democratic establishment agree upon which you and a majority of americans do not agree with?You dont like the establishment but why not?',
   'kp': ['specific policy position',
    'democratic establishment agree',
    'specific policy',
    'policy position',
    'republican and democratic'],
   'stance': 'PRO',
   'aspect': 'specific policy position'},
  {'sentence': 'What is your problem with the establishment beyond that they are bought and paid for by corporations?',
   'kp': ['bought and paid',
    'corporations',
    'problem',
    'establishment',
    'bought'],
   'stance': 'CON',
   'aspect': 'bought and paid'},
  {'sentence': 'Is there a law that says CEOs get to murder people and not go to jail?',
   'kp': ['murder people', 'jail', 'law', 'CEOs', 'murder'],
   'stance': 'CON',
   'aspect': 'murder people'},
  {'sentence': 'How is this bought and paid

In [30]:
import copy
mined_counters_ = copy.deepcopy(mined_counters)

In [28]:
for i in mined_counters_:
    test = {
        "test": [j for j in i["counter"]]
    }

In [29]:
test

{'test': [{'sentence': 'Its not impossible to be both sad that you lost someone and try to find comfort in the knowledge that there is some kind of plan that makes up for it.If your mom tragically died and God came up to you and said Your mothers death was a vital part of a large plan to cure cancer.',
   'kp': ['mom tragically died',
    'died and God',
    'cure cancer',
    'find comfort',
    'it.If your mom'],
   'stance': 'PRO',
   'aspect': 'mom tragically died'},
  {'sentence': 'You would still feel sad.',
   'kp': ['feel sad', 'sad', 'feel'],
   'stance': 'CON',
   'aspect': 'feel sad'},
  {'sentence': 'It doesnt matter that you could probably argue that her death was meaningful and worth saving millions of others its still sad.',
   'kp': ['worth saving millions',
    'doesnt matter',
    'death was meaningful',
    'meaningful and worth',
    'worth saving'],
   'stance': 'PRO',
   'aspect': 'worth saving millions'},
  {'sentence': 'And you would still have every right to wa

In [31]:
file_name = "cmv_argument_extraction"
fout = open(f"../data/{file_name}.jsonl", "w")

# Deep_copies
mined_args_ = copy.deepcopy(mined_args)
mined_counters_ = copy.deepcopy(mined_counters)

# Working Loop
# for i in mined_counters_:
#     test = {
#         "test": [j for j in i["counter"]]
#     }

with tqdm(total=(len(mined_args))) as pbar:
    with fout:
        for original_post, mined_arg, mined_counter in zip(args, mined_args_, mined_counters_):
            # Extended pre-formatted mined object
            mined_arg["original_post"] = original_post["argument"]
            mined_arg["tgt_counter"] = [_ for _ in mined_counter["counter"]]

            fout.write(json.dumps(mined_arg))

            fout.write("\n")
            pbar.update()

logger.info(f"[{len(args)} Data Stored as {file_name}.jsonl]")

  0%|          | 0/1000 [00:00<?, ?it/s]

INFO:ARGUMENT-EXTRACTOR:[10303 Data Stored as cmv_argument_extraction.jsonl]


In [32]:
### EVALUATE OUTPUT ###
train = [json.loads(ln) for ln in open(f"../data/{file_name}.jsonl", "r")]

In [34]:
len(train)

1000

In [33]:
_ = random.randint(0, len(train))
print(train[_]["argument"], "\n")
print(train[_]["tgt_counter"], "\n")

[{'sentence': 'As a german especially in the city I live theres honestly too many turks and muslims here.', 'kp': ['city I live', 'live theres honestly', 'turks and muslims', 'german', 'city'], 'stance': 'NEUTRAL', 'aspect': 'city I live', 'topic': 'turkish citizens', 'concept': 'city I live'}, {'sentence': 'My old school was turksmuslimsguys from eastern europe which seriously lead to germans being made fun of for being german in their own damn countryBut thats not the only example when I go out I see as much if not more foreigners on the streets.', 'kp': ['school was turksmuslimsguys', 'turksmuslimsguys from eastern', 'eastern europe', 'made fun', 'damn countryBut'], 'stance': 'PRO', 'aspect': 'school was turksmuslimsguys', 'topic': 'turkish citizens', 'concept': 'school was turksmuslimsguys'}, {'sentence': 'And by foreigners I dont mean anything like scandinavians or people from developed countries for that matter I mean all the muslimsturksromanians etc.A little immigration doesnt 

In [None]:
# for i, j in zip(retrieved_ranked, sample):
#     # Add counter to the dictionary (implicitly, i)
#     i["counter"] = j["counter"]
#     fout.write(json.dumps(i))
#     fout.write("\n")