In [1]:
# TODOs: Operate DB Class
# TODOs: Use SQLite
# TODOs: Implement BM25

import sys
sys.path.append('../')

from utils.elastic_db import ElasticDB

# INIT DB OBJECT
PORT = "http://localhost:9200"
INDEX_NAME = "news_cc"

# news_db = ElasticDB(elastic_port=PORT, elastic_index=INDEX_NAME)
# wiki_db = ElasticDB(elastic_port=PORT, elastic_index="knowledge")

db = ElasticDB(elastic_port=PORT)

INFO:utils.elastic_db:Connecting to http://localhost:9200 
INFO:utils.elastic_db:Connected to <Elasticsearch(['http://localhost:9200'])> 


In [2]:
### LOAD DATASETS ###
import json

args = [json.loads(ln) for ln in open("../../data/train_cmv_cleaned.jsonl")]
topics = [json.loads(ln) for ln in open("../../data/claim_topics.jsonl")]

ex_retrieval = [json.loads(ln) for ln in open("../../data/wiki_doc_retrieved_from_op_train.jsonlist")]
ex_ranked = [json.loads(ln) for ln in open("../../data/selected_evidence.jsonl")]

conan = [json.loads(ln) for ln in open("../../data/CONAN.json")]

In [3]:
### SAMPLE OUTPUT ###
import random

# TODOs: Replicate Output, Passage Evidence Retrieval
# TODOs: Replicate Output, Passage Ranking

_ = random.randint(0, 1000)
ranked = ex_ranked[_]
retireval = ex_retrieval[_]

retireval

{'tid': 't3_1hcmq0',
 'retrieval_results': [{'retrieved_document_titles': ['Personhood',
    'Abortion debate',
    'Abortion in the United States',
    'Beginning of human personhood',
    'Philosophical aspects of the abortion debate'],
   'query': 'abortion that personhood the topic Irrelevant',
   'sentence': 'I Believe That Personhood is Irrelevant to the Topic of Abortion . '},
  {'retrieved_document_titles': ['Abortion debate',
    'Judith Jarvis Thomson',
    'Philosophical aspects of the abortion debate',
    'Abortion in the United States',
    'Personhood'],
   'query': 'the topic abortion abortion personhood judith thomson a person the focal point irrelevant',
   'sentence': "Like Judith Thomson , I believe that accepting whether or not a fetus is a person is irrelevant to the topic of abortion because personhood is n't the focal point of which the morality of abortion depends . "},
  {'retrieved_document_titles': ['Margiana',
    'Muslim conquest of Transoxiana',
    'Sogd

In [4]:
# TODOs: News Data
# TODOs: Ranking
# TODOs: Ranking, Cosine
# TODOs: Research Evidence Retireval: Context Aware, Neural Retrieval
# TODOs: Stance
# TODOs: Target ADUs: Premises, Claims, discard non-ADUs, thus reducing noise over retreival 
# TODOs: Paralellise

# TERMS => Input Argument : args : arg
# TERMS => Argument Discourse Units : adu : adu_prem : adu_claim
# TERMS => Evidence : ev
# TERMS => Counter Evidence : counta_ev

In [5]:
### SUBJECT ARG ###
import random
import re

# Note: 340; Gender Equality
# Note: 991; Abortion

_ = random.randint(0, 1000)
print(_)

claim = args[_]["titles"]
arg = args[_]["arguments"]
claim, arg

834


('Intelligent Design and the Big Bang are not very different at their core.',
 'Fact It is widely accepted that the Universe must have a beginning.Claim Intelligent Design and the Big Bang theory explain this phenomenon with the same basic foundation.ID says There is some intelligent being who exists outside of our understanding of time. He has no beginning and no end. He created the Universe.BB says At some moment all matter in the universe was contained in one single point. The Big Bang took place causing this matter to expand and marked the beginning of the Universe.This matter must have always existed because based on the laws of science matter cannot be created. In other words all matter in the universe must have no beginning and no end. Either you believe in an intelligent being who has no beginning and no end or you believe in matter that has no beginning and no end.')

In [6]:
### NLP FUNCTIONS ###

def sentences_segment(doc):
    return [i for i in re.split(r'(?<=[^A-Z].[.?]) +(?=[A-Z])', doc)]

def tokeniser(doc):
    return re.findall(r"\w+(?:'\w+)?|[^\w\s]", doc)

# Test Statements
tokeniser("hello, my name is Josh!")
sentences_segment("hello, my name is Josh! How are you doing today? I'm curious ... will this line seperate? I'm not so sure Dr. Evil")

['hello, my name is Josh! How are you doing today?',
 "I'm curious ... will this line seperate?",
 "I'm not so sure Dr. Evil"]

In [7]:
from utils.keyphrase_extraction import extract_keyphrase
import re

# TODOs: Implement Keyphrase Extraction and Tokenisation as a pre-processing step; Be mindful of interactive pipeline mode
# TODOs: Implement Semantic Search: https://www.elastic.co/blog/text-similarity-search-with-vectors-in-elasticsearch 
# TODOs: Fix Sentencizer
# TODOs: Tokeniser Function
# TODOs: Domain Restrict - polarising social and political debate (Class labelling); Note: currently, open-domain. Score Highly Polarised Discussions.
# TODOs: Bag of Topics, Concepts for Topic Labelling
# TODOs: News, Political, Sociology and 'Good', 'Positive' counter-evidence Knowledge Base.
# TODOs: Consider parsing and normalising knowledge; extracting core ADU arguments, premises, evidence and claims.
# TODOs: Sentence Segment Function
# TODOs: Semantic Retrieval ** 
# TODOs: Fix Sentence parsing
# TODOs: Parameterise Index-DB in use
# TODOs: Implement as a Class
# TODOs: Implement Logging

### RETRIEVER ###

# TODOs: One Argument Loop (1 x # ADUs)
# TODOs: Filter ADUs; Strong and disputable arguments (Premise rakning, NLI, Argument Similarity (counter stance))
# db = news_db

db = db
queries = []
retrieved_ev = []

# TODOs: Reduce the size of the Argument using extractive summarisation
# TODOs: Reduce the size of the Argument Targeting Premises Only
# TODOs: Experiement with Query
# TODOs: Write to Database

def retrieved_evidence(arg):
    ad_units = sentences_segment(arg)

    results = []

    for adu in ad_units:
        toks = re.findall(r"\w+(?:'\w+)?|[^\w\s]", adu)

        if len(toks) <= 8:
            continue
    
        kp = extract_keyphrase(adu, n_kp=5)

        if kp:
            query = ", ".join(i for i in kp)

            titles = [i["_source"]["document"]["title"] for i in db.search(query_=query, k=10)]
            evidence = [i["_source"]["document"]["text"] for i in db.search(query_=query, k=10)]

            results.append({
                "argument_discourse_unit": adu, 
                "retrieved_documents_titles": titles,
                "adu_keyphrases": [i for i in kp],
                "retrieved_evidence": evidence,
                "merged_evidence": ", ".join(ln for ln in evidence)
                })

        return results

retrieved_ev = []

for arg in args[0:100]:
    retrieved_ev.append(retrieved_evidence(arg["arguments"]))

INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: all-MiniLM-L6-v2
INFO:sentence_transformers.SentenceTransformer:Use pytorch device: cpu


No Keywords
['heathrow airport', 'environmental impact', 'aviation']
None


INFO:elastic_transport.transport:POST http://localhost:9200/*/_search [status:200 duration:0.039s]
INFO:elastic_transport.transport:POST http://localhost:9200/*/_search [status:200 duration:0.022s]
INFO:elastic_transport.transport:POST http://localhost:9200/*/_search [status:200 duration:0.030s]
INFO:elastic_transport.transport:POST http://localhost:9200/*/_search [status:200 duration:0.012s]
INFO:elastic_transport.transport:POST http://localhost:9200/*/_search [status:200 duration:0.057s]
INFO:elastic_transport.transport:POST http://localhost:9200/*/_search [status:200 duration:0.036s]
INFO:elastic_transport.transport:POST http://localhost:9200/*/_search [status:200 duration:0.006s]
INFO:elastic_transport.transport:POST http://localhost:9200/*/_search [status:200 duration:0.006s]
INFO:elastic_transport.transport:POST http://localhost:9200/*/_search [status:200 duration:0.004s]
INFO:elastic_transport.transport:POST http://localhost:9200/*/_search [status:200 duration:0.003s]
INFO:elast

No Keywords


INFO:elastic_transport.transport:POST http://localhost:9200/*/_search [status:200 duration:0.011s]
INFO:elastic_transport.transport:POST http://localhost:9200/*/_search [status:200 duration:0.006s]
INFO:elastic_transport.transport:POST http://localhost:9200/*/_search [status:200 duration:0.006s]
INFO:elastic_transport.transport:POST http://localhost:9200/*/_search [status:200 duration:0.007s]
INFO:elastic_transport.transport:POST http://localhost:9200/*/_search [status:200 duration:0.025s]
INFO:elastic_transport.transport:POST http://localhost:9200/*/_search [status:200 duration:0.058s]
INFO:elastic_transport.transport:POST http://localhost:9200/*/_search [status:200 duration:0.049s]
INFO:elastic_transport.transport:POST http://localhost:9200/*/_search [status:200 duration:0.025s]
INFO:elastic_transport.transport:POST http://localhost:9200/*/_search [status:200 duration:0.026s]
INFO:elastic_transport.transport:POST http://localhost:9200/*/_search [status:200 duration:0.025s]
INFO:elast

In [8]:
from multiprocessing.pool import ThreadPool as Pool

### PASSAGE RANKING; KEYWORD OVERLAP ###

# TODOs: For each ADU, Rank Merged Evidence using Keyword Overlap and Filter for Contrasting Stance
# TODOs: Handel Multiple Keywords

def overlap_score(evidence_kp, adu_kp):
    score = 0
    
    # TODOs: Robust 'None' Handelling
    if adu_kp == None:
        return score

    # Split Keyphrase into components, scoring partial units as overlap
    else:
        for i in evidence_kp:
            for j in i.split():
                # Ensure string value, to enact .find
                if ", ".join([i for i in adu_kp]).find(j) != -1: score += 1
                
                else: continue
    
    return score

def calculate_overlap(merged_ev, adu_kp):

    for ev_unit in sentences_segment(merged_ev):
        toks = tokeniser(ev_unit)
        kp_overlap = 0
        
        if len(toks) <= 8: continue
        
        ev_unit_kp = extract_keyphrase(ev_unit)

        if ev_unit_kp:
            kp_overlap = overlap_score(evidence_kp=ev_unit_kp, adu_kp=adu_kp)
        
        else: ev_unit_kp = None
        yield ev_unit, ev_unit_kp, kp_overlap

from tqdm import tqdm

pool = Pool(8)
def rank_passages(ev_):   
    adu = ev_[0]["argument_discourse_unit"]
    merged_ev = ev_[0]["merged_evidence"] 
    adu_kp = ev_[0]["adu_keyphrases"]

    ### CALCULATE OVERLAP ###
    for ev_unit, ev_unit_kp, kp_overlap in calculate_overlap(merged_ev, adu_kp):
        
        yield {
            "adu": adu,
            "adu_kp": adu_kp,
            "evidence_unit": ev_unit,
            "evidence_kps": ev_unit_kp,
            "overlap": kp_overlap
        }

# TODOs: Implement Sort
def sort_rank_evidence(retrieved_evidence):
    for ev_ in retrieved_ev:
        yield [i for i in rank_passages(ev_)]

for i in sort_rank_evidence(retrieved_ev):
    i.sort(key=lambda y: y["overlap"], reverse=True)
    print(i)

# sorted_ranked_evidence = [i for i in sort_rank_evidence(retrieved_evidence)]

### RANKED EVIDENCE ###
# overlapping_evidence.sort(key=lambda y: y["overlap"], reverse=True)
# overlapping_evidence

[{'adu': 'I cant remember the topic that spurred this discussion but a friend and I were debating whether manmade things were natural.', 'adu_kp': ['manmade things', 'discussion', 'topic', 'friend'], 'evidence_unit': 'On-topic discussion of a novel, rather than meta-discussion, would include such things as the consideration of a particular character, examination of incidents in the plot, or exploration of the general themes of the book.', 'evidence_kps': ['topic discussion', 'discussion', 'novel'], 'overlap': 3}, {'adu': 'I cant remember the topic that spurred this discussion but a friend and I were debating whether manmade things were natural.', 'adu_kp': ['manmade things', 'discussion', 'topic', 'friend'], 'evidence_unit': 'On-topic discussion of a novel, rather than meta-discussion, would include such things as the consideration of a particular character, examination of incidents in the plot, or exploration of the general themes of the book.', 'evidence_kps': ['topic discussion', 'd

In [17]:
sorted_ranked_evidence

[<generator object rank_passages at 0x31fad90e0>,
 <generator object rank_passages at 0x31fad9310>,
 <generator object rank_passages at 0x31fad8890>,
 <generator object rank_passages at 0x31fad8cf0>,
 <generator object rank_passages at 0x31fad8c80>,
 <generator object rank_passages at 0x31fad8dd0>,
 <generator object rank_passages at 0x31fad8d60>,
 <generator object rank_passages at 0x31fad8e40>,
 <generator object rank_passages at 0x31fad8eb0>,
 <generator object rank_passages at 0x31fad9770>,
 <generator object rank_passages at 0x31fad9620>,
 <generator object rank_passages at 0x31fad97e0>,
 <generator object rank_passages at 0x31fad9850>,
 <generator object rank_passages at 0x31fad8430>,
 <generator object rank_passages at 0x31fad8120>,
 <generator object rank_passages at 0x31fad93f0>,
 <generator object rank_passages at 0x31fad84a0>,
 <generator object rank_passages at 0x31fad8a50>,
 <generator object rank_passages at 0x31fad8b30>,
 <generator object rank_passages at 0x31fad8660>,


In [25]:
# TODOs: Speed-up, Parrelleise, Yield
def overlap_score(evidence_kp, adu_kp):
    score = 0
    
    # Split Keyphrase into components, scoring partial units as overlap
    for i in evidence_kp:
        for j in i.split():
            # Ensure string value, to enact .find
            if " ".join(adu_kp).find(j) != -1: score += 1
            
            else: continue
    
    return score

ev_units = evidence
adu_kp = extract_keyphrase(adu)

adu_ev_overlap = []

kp_1 = ['sex', 'relationship', 'opportunity'] 
kp_2 = ['better sex']

overlap_score(kp_2, kp_1)

for ev_unit in evidence:
    #print(ev_unit)
    toks = tokeniser(ev_unit)

    # Exprimental Value
    if len(toks) <= 8:
        continue
    
    ev_unit_kp = extract_keyphrase(ev_unit)
    kp_overlap = overlap_score(evidence_kp=ev_unit_kp, adu_kp=adu_kp)
    
    adu_ev_overlap.append({
        "adu": adu, 
        "adu_kp": adu_kp,
        "ev_unit": ev_unit,
        "ev_unit_kp": ev_unit_kp, 
        "kp_overlap": kp_overlap
        
        })
        
adu_ev_overlap

[{'adu': 'The main factors that have led me to this conclusion are The Democratic System in the US Military and Foreign Policy and the Healthcare System.',
  'adu_kp': ['main factors', 'democratic system', 'foreign policy'],
  'ev_unit': 'SEOUL, April 28 A top foreign policy adviser to South Korean presidential front runner Moon Jae-in said on Friday that U.S.',
  'ev_unit_kp': ['south korean presidential front runner moon jae',
   'top foreign policy adviser',
   'seoul'],
  'kp_overlap': 2},
 {'adu': 'The main factors that have led me to this conclusion are The Democratic System in the US Military and Foreign Policy and the Healthcare System.',
  'adu_kp': ['main factors', 'democratic system', 'foreign policy'],
  'ev_unit': "President Donald Trump's suggestion Seoul pay for the THAAD advanced U.S. missile defense system would be an 'impossible option'.",
  'ev_unit_kp': ['suggestion seoul pay', 'thaad', 'impossible option'],
  'kp_overlap': 0},
 {'adu': 'The main factors that have l

In [21]:
### OVERLAP RANKED EVIDENCE ###

adu_ev_overlap.sort(key=lambda y: y["kp_overlap"], reverse=True)
adu_ev_overlap

### FILTER IRRELEVANT EVIDENCE ###
overlapping = [i for i in adu_ev_overlap if i["kp_overlap"] !=0]

len(adu_ev_overlap), len(overlapping)
overlapping


[{'adu': 'The main factors that have led me to this conclusion are The Democratic System in the US Military and Foreign Policy and the Healthcare System.',
  'adu_kp': ['main factors', 'democratic system', 'foreign policy'],
  'ev_unit': 'The vote was marred by allegations that unstamped ballots were counted, despite the opposition’s protestations. “Clearly, [he’s] sensitive on the issue of foreign acceptance,” Bulent Aliriza, director of the Center for Strategic and International Studies’ Turkey Project, told Foreign Policy., He called on foreign parties and countries to respect the results.',
  'ev_unit_kp': ['foreign acceptance', 'foreign policy', 'foreign parties'],
  'kp_overlap': 4},
 {'adu': 'The main factors that have led me to this conclusion are The Democratic System in the US Military and Foreign Policy and the Healthcare System.',
  'adu_kp': ['main factors', 'democratic system', 'foreign policy'],
  'ev_unit': 'The vote was marred by allegations that unstamped ballots were

In [22]:
### ASSERT SAME STANCE ###
from detection.stance_classifier import sentence_stance, compare_stance

# TODOs: Ensure KPs Extracts are constrained to 1 unit
opposing_stance = []
for i in overlapping:
    adu = i["adu"]
    target = " ".join(i for i in i["adu_kp"])
    ev_unit = i["ev_unit"]

    ev_stance = compare_stance(ev_unit, ev_unit, target)
    adu_stance = sentence_stance(adu, target)
    
    if ev_stance != adu_stance:
        opposing_stance.append((ev_unit, ev_stance, adu_stance))

    else: continue

opposing_stance

[('The vote was marred by allegations that unstamped ballots were counted, despite the opposition’s protestations. “Clearly, [he’s] sensitive on the issue of foreign acceptance,” Bulent Aliriza, director of the Center for Strategic and International Studies’ Turkey Project, told Foreign Policy., Despite U.S.',
  'CON',
  'PRO'),
 ("Iran's president broadly manages domestic affairs, above all the economy, and can influence foreign policy decisions.",
  'CON',
  'PRO'),
 ('The biggest issue of Pakistan’s foreign policy was that we don’t have any foreign policy and we, mostly, react to the events rather proactively pursuing diplomacy.',
  'CON',
  'PRO'),
 ('It has been widely seen in the West as a power grab and rollback of democratic hopes many had for the country in the early days of Erdogan.',
  'CON',
  'PRO'),
 ('It has been widely seen in the West as a power grab and rollback of democratic hopes many had for the country in the early days of Erdogan.',
  'CON',
  'PRO'),
 ("Trump to

In [24]:
k = 1
top_k = opposing_stance[0:k]

adu, top_k

('The main factors that have led me to this conclusion are The Democratic System in the US Military and Foreign Policy and the Healthcare System.',
 [('The vote was marred by allegations that unstamped ballots were counted, despite the opposition’s protestations. “Clearly, [he’s] sensitive on the issue of foreign acceptance,” Bulent Aliriza, director of the Center for Strategic and International Studies’ Turkey Project, told Foreign Policy., Despite U.S.',
   'CON',
   'PRO')])

In [None]:
### RANKING ###

# TODOs: Speed-up, Parrelleise, Yield
# ev_units = evidence
# adu_kp = extract_keyphrase(adu)

# adu_ev_overlap = []

# kp_1 = ['sex', 'relationship', 'opportunity'] 
# kp_2 = ['better sex']

# overlap_score(kp_2, kp_1)

# for ev_unit in evidence:
#     #print(ev_unit)
#     toks = tokeniser(ev_unit)

#     # Exprimental Value
#     if len(toks) <= 8:
#         continue
    
#     ev_unit_kp = extract_keyphrase(ev_unit)
#     kp_overlap = overlap_score(evidence_kp=ev_unit_kp, adu_kp=adu_kp)
    
#     adu_ev_overlap.append({
#         "adu": adu, 
#         "adu_kp": adu_kp,
#         "ev_unit": ev_unit,
#         "ev_unit_kp": ev_unit_kp, 
#         "kp_overlap": kp_overlap
        
#         })
        
# adu_ev_overlap


#rank_passages(retrieved_ev)

In [613]:
# import spacy
# from spacy.matcher import PhraseMatcher
# from fuzzywuzzy import fuzz, process

# # TODOs: Package as a Module
# # TODOs: Handle Negation (Polarity shifters)
# # TODOs: Review Unsuperived Approach; Consider adveanced patterns and common-sence knowledge

# nlp = spacy.load("en_core_web_sm")

# sentence = "I hate abortion rights. Abortions should be banned."
# sentence_2 = "I like abortion rights. I belive we should keep them."
# sentence_3 = "I hate tennis. People should play tennis more often"

# ### STANCE SCORING ###

# # TODOs: https://www.cs.uic.edu/~liub/FBS/opinion-mining-final-WSDM.pdf 
# # TODOs: Pattern based Negation
# # TODOs: Semantic Orientation of an opinion (Claim)
# # TODOs:Group synonyms of 'features', 'targets'

# phrase_matcher = PhraseMatcher(nlp.vocab)

# ### SENTIMENT LEXICONS ###
# pos = [w.replace("\n", "") for w in open("../../data/lexicon/positive_lex.txt")]
# neg = [w.replace("\n", "") for w in open("../../data/lexicon/negative_lex.txt")]
# polarity_shifters = [w.replace("\n", "") for w in open("../../data/lexicon/shifter_lexicon.txt")]

# ### STANCE: ASPECT-SEMANTIC ORIENTATION ###
# def extract_aspect(sentence, n_gram):
#     aspects = extract_keyphrase(str(sentence))[0]

#     return nlp(aspects)

# def index_aspect(phrase, aspect, sentence):    
#     patterns = [nlp(aspect)]
#     phrase_matcher.add(phrase, None, *patterns)

#     start = 0
#     stop = 0

#     matched_phrases = phrase_matcher(sentence)
#     for i in matched_phrases:
#         _, start, stop = i
        
#     return start, stop

# # TODOs: Implement Polarity Shifters, Simple
# # TODOs: Implement Polarity Shifters, Complex, Verb Patterns
# def stance_score(start, stop, sentence):
#     pos_score = 0.0
#     neg_score = 0.0

#     score = 0
#     for idx, tok in enumerate(sentence):
#         if idx == start or idx == stop:
#             continue

#         # TODOs: Implement Polarity Shift
#         # TODOs: Experiement with descriptive term + keyphrase aspects
#         # TODOs: ABSA https://www.kaggle.com/code/phiitm/aspect-based-sentiment-analysis
#         # Use external libaray: Textblob
        
#         k = 8
#         # Negation Rules
#         shifted_tok = None
#         shifted_toks = []

#         if (tok.dep_ == "neg") or (tok.dep_ in polarity_shifters):
#             #Shift to Negative
#             if idx <= k:
#                 if idx < start: neg_score += 1/(start - idx)
#                 else: neg_score += 1/(idx - stop)**0.5

#             if shifted_tok != None and shifted_tok in neg:
#                 print(shifted_tok.text)
#                 # Shift to Positive
#                 if idx < start: pos_score += 1/(start - idx)
#                 elif idx > start: pos_score += 1/(idx - stop)**0.5
#                 else: continue

#         # Aspect Sentement Orientation
#         if tok.text in pos:
#             if tok in shifted_toks:
#                 continue
            
#             if idx < start: pos_score += 1/(start - idx)
#             else: pos_score += 1/(idx - stop)**0.5

#         if tok.text in neg:
#             if tok in shifted_toks:
#                 continue

#             if idx <= start: neg_score += 1/(start - idx)
#             else: neg_score += 1/(idx - stop)**0.5
    
#     score = pos_score - neg_score /(pos_score + neg_score + 1)

#     return score

# def overlap_score(evidence_kp, adu_kp):
#     score = 0
    
#     # Split Keyphrase into components, scoring partial units as overlap
#     for i in evidence_kp:
#         for j in i.split():
#             # Ensure string value, to enact .find
#             if " ".join(adu_kp).find(j) != -1: 
#                 score += 1
#                 token = j
            
#             else: continue
    
#     return score

# def get_overlapping_token(evidence_kp, adu_kp):
#     for i in evidence_kp:
#         overlap_tokens = []
#         for j in i.split():
#             if " ".join(adu_kp).find(j) != -1: 
#                 overlap_tokens.append(j) 
            
#         return " ".join(i for i in overlap_tokens)

# def sentence_stance(sentence, aspect):
#     sentence = nlp(sentence)

#     start, stop = index_aspect("aspects", aspect, sentence)
#     score = stance_score(start, stop, sentence)

#     # Add Neutral
#     #stance = {"claim": sentence, "stance": "PRO", "aspect": aspect} if score > 0 else {"claim": sentence, "stance": "CON", "aspect": aspect}
    
#     return "PRO" if score > 0 else "CON"

# def fuzzy_match(target, evidence_unit):

#     overlapping_aspect = process.extractOne(target, ev.split())[0]
#     score = overlapping_aspect[1]

#     overlapping_aspect = nlp(re.sub(r'[^\w]', ' ', overlapping_aspect))

#     return overlapping_aspect, score

# def compare_stance(ev_unit, evidence_aspect, adu_target):
#     # Note: Already identified mathcing or partially matching Aspects. 

#     # Get the overlapping evidence aspect-target.
#     overlapping_target, score = fuzzy_match(target=adu_aspect, evidence_unit=ev)
    
#     # Get position of the overlapping_target
#     start, stop = index_aspect("OVERLAP", nlp(overlapping_target), nlp(ev_unit))

#     # Assert Stance towards evidence aspect
#     score = stance_score(start, stop, nlp(ev_unit))
    
#     return "PRO" if score > 0 else "CON"

# ev = "These simple ideas and techniques could help both you and your lover enjoy sex. 1 / 10 Getty Images/Caiaimage Think beyond the thrust."
# ev_aspect = "sex", "relationship", "opportunity"

# adu = 'Hello! Let me preface by saying I dont believe there is a better sex.'
# adu_aspect = "better sex"

# print(sentence_stance("The mutual trust and understanding you share with your partner will lead to better sex, but that's not the only reason sex can be better when you're not in a relationship.", adu_aspect))
# print(compare_stance(ev, ev_aspect, adu_aspect))


PRO
PRO


In [560]:
opposing_stance

[('None, Jake: This Better Not Bite Me in the Ass, name of your sex tape.',
  'CON',
  'PRO'),
 ('In fact, a study published by National Commission on Aging found women, in particular, said sex in their 70s was at least as satisfying or more satisfying physically than it was in their 40s., The Australian sex worker: “I was disgusted that he would equate sex with his wife with ‘taking one for the team’. “I was further disgusted by the way he gaslit his wife when she was firmly in the right.',
  'CON',
  'PRO'),
 ('Jake: This Better Not Bite Me in the Ass, name of your sex tape.',
  'CON',
  'PRO')]

In [505]:
# from spacy.matcher import DependencyMatcher, Matcher
# matcher = Matcher(vocab=nlp.vocab)
# matcher

# # Matching Rule: Pronouns with Verbs that follow them
# aspect = "better sex"
# patterns = [
#     [{"DEP": "neg"}, {"LOWER": aspect}],
#     [{"DEP": "neg"}, {"POS": "ADJ"}, {"LOWER": aspect}],
#     [{"POS": "VERB"}, {"POS": "ADJ"}, {"LOWER": aspect}],
#     [{"LOWER": aspect.lower()}]
# ]

# test = nlp("Hello! Let me preface by saying I dont believe there is a not better sex.")
# test_2 = nlp("These simple ideas and techniques could help both you and your lover enjoy better sex.")

# matcher.add("test", patterns=patterns)
# result = matcher(test_2, as_spans=True)

# result

# # for tok in test:
# #     print(tok.i, tok, tok.pos_, tok.dep_, tok.head.i, sep="\t")

[]

In [None]:
### TARGETED RETRIEVAL: ATTACKING PEMISES ###

# from BERT_adu_classifier import predict

# premises = []
# for sent in sentences:
#     prediction = predict(sent)
    
#     if prediction == "premise":
#         premises.append(sent)