In [1]:
from src.utils_.elastic_db import ElasticDB

# INIT DB OBJECT
PORT = "http://localhost:9200"
db = ElasticDB(elastic_port=PORT)

INFO:src.utils_.elastic_db:Connecting to http://localhost:9200 
INFO:src.utils_.elastic_db:Connected to <Elasticsearch(['http://localhost:9200'])> 


In [2]:
### LOAD DATASETS ###
import json
import random

args = [json.loads(ln) for ln in open("../data/cmv_processed.jsonl")]
topics = [json.loads(ln) for ln in open("../data/argument_topic_concept.jsonl")]

In [3]:
len(topics), len(args)

(5990, 10303)

In [4]:
### SUBJECT ARG ###
import random
_ = random.randint(0, 99)

claim = args[_]["claim"]
arg = args[_]["argument"]["argument"]

print(_)
print(claim, "\n")
print(arg)

52
Transgenders only enforce gender stereotypes. 

I try not to be bigoted and Id really like to open my mind to this especially as my cousin begins his HRT. Im a strong liberal but I believe the concept of transgender individuals is tied strongly to the concept of gender roles. Why bother changing your gender or identifying as a different one if they are equal? Dysphoria is a real issue but thats purely psychological. If someone truly believed man woman then why would they feel the need to be one or the other. Men can wear make up nail polish dresses suits high heels and be nurturing.


In [5]:
### TODOs ###

# TODOs: News Data
# TODOs: Add Concepts
# TODOs: Commonsense Query and Concept Expansion: Topics, Concepts, Synonyms
# TODOs: Cosine Semantic Search
# TODOs: Research: Evidence Retrieval, Infor Retrieval, Context Aware, Neural Retrieval
# TODOs: Targeted Retrieval with NLI over ADUs, Premises, Claims; discard non-ADUs.
# TODOs: Parallel process
# TODOs: Prior tokenization and sentence segmentation to speed processing
# TODOs: Domain Restrict. Polarising social and political debate (Class labelling) only for higher-quality argument-knowledge set.
# TODOs: News, Political, Sociology and 'Good', 'Positive' counter-evidence Knowledge Base.
# TODOs: Bag of Topics Modelling

# TODOs: Implement as a Class
# TODOs: Implement Logging

# TODOs: Implement Semantic Search: https://www.elastic.co/blog/text-similarity-search-with-vectors-in-elasticsearch
# TODOs: Implement Semantic Ranking

In [6]:
### NLP FUNCTIONS ###
import re
from nltk.tokenize import sent_tokenize, word_tokenize

def tokeniser(doc):
    return word_tokenize(doc)

def sentences_segment(doc):
    return sent_tokenize(doc)

# Test Statements
print(tokeniser("hello, my name is Josh!"))
print(sentences_segment("hello, my name is Josh! How are you doing today? I'm curious ... will this line seperate? I'm not so sure Dr. Evil"))

['hello', ',', 'my', 'name', 'is', 'Josh', '!']
['hello, my name is Josh!', 'How are you doing today?', "I'm curious ... will this line seperate?", "I'm not so sure Dr.", 'Evil']


In [7]:
from keybert import KeyBERT
from keyphrase_vectorizers import KeyphraseCountVectorizer

kb = KeyBERT()
def extract_keyphrase(doc, n_gram=3, n_kp=3, use_mmr="False", use_maxsum="False"):
    #kp = kb.extract_keywords(doc, vectorizer=KeyphraseCountVectorizer(), stop_words="english", diversity=0.2,)

    kp = kb.extract_keywords(doc, keyphrase_ngram_range=(0, 4), stop_words="english", diversity=0.2,)

    return [i[0] for i in kp[0:n_kp]] if kp else None

INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: all-MiniLM-L6-v2
INFO:sentence_transformers.SentenceTransformer:Use pytorch device: cpu


In [91]:
from src.detection.stance_classifier import sentence_stance, compare_stance
from src.utils_.word_net_expansion import expand_query
from src.detection.stance_classifier import sentence_stance

### RETRIEVER ###
db = db
queries = []
retrieved_ev = []

topic_ids = [json.loads(ln)["id"] for ln in open("../data/argument_topic_concept.jsonl")]
def get_topic(arg_id):
    topic_id = topic_ids.index(arg_id)
    topic = topics[topic_id]["topic_label"]
    return str(topic) if topic else None

# TODOs: Add News
# TODOs: Include Topic Label
# TODOs: Include Concept Label
# TODOs: Query Expansion
# TODOs: Multi-Field Search
def retrieved_evidence(arg, query_expansion=True, retrieve_len=5):
    """ Retrieves Evidence from Knowledge base, returning a well-formed Retrieved Evidence Object
    given an input Argument"""

    id_ = arg["id"]
    topic = get_topic(id_)

    counters_sents = sentences_segment(arg["counter"]["counter"])
    adu_sents = sentences_segment(arg["argument"]["argument"])

    # Retrieve per ADU
    # results = []
    retrieved = []
    adus = []
    for _ in adu_sents:
        if len(tokeniser(_)) <= 8:
            continue

        kp = extract_keyphrase(_)
        adu = {"sentence": _, "kp": [i for i in kp], "stance": sentence_stance(_, kp[0])}

        kp.append(topic) if topic else kp
        query = ", ".join(i for i in kp)
        search = [(i["_source"]["document"]["title"], i["_source"]["document"]["text"])for i in db.search(query_=query, k=retrieve_len)]

        evidence = [i[1] for i in search]
        retrieved.append({"evidence": evidence, "kp": [i for i in extract_keyphrase(evidence)]})
        adus.append(adu)

    return ({
        "id": arg["id"],
        "argument": [i for i in adus],
        "retrieved": [i for i in retrieved]})

retrieved_ev = []
for arg in args[0:10]:
    retrieved_ev.append(retrieved_evidence(arg))

INFO:elastic_transport.transport:POST http://localhost:9200/*/_search [status:200 duration:0.064s]
INFO:elastic_transport.transport:POST http://localhost:9200/*/_search [status:200 duration:0.066s]
INFO:elastic_transport.transport:POST http://localhost:9200/*/_search [status:200 duration:0.056s]
INFO:elastic_transport.transport:POST http://localhost:9200/*/_search [status:200 duration:0.020s]
INFO:elastic_transport.transport:POST http://localhost:9200/*/_search [status:200 duration:0.040s]
INFO:elastic_transport.transport:POST http://localhost:9200/*/_search [status:200 duration:0.038s]
INFO:elastic_transport.transport:POST http://localhost:9200/*/_search [status:200 duration:0.036s]
INFO:elastic_transport.transport:POST http://localhost:9200/*/_search [status:200 duration:0.029s]
INFO:elastic_transport.transport:POST http://localhost:9200/*/_search [status:200 duration:0.047s]
INFO:elastic_transport.transport:POST http://localhost:9200/*/_search [status:200 duration:0.040s]
INFO:elast

In [92]:
len(retrieved_ev)

10

In [93]:
retrieved_ev

[{'id': 't3_30oi71',
  'argument': [{'sentence': 'Section I Why is Basic Income Increasingly Popular?',
    'kp': ['basic income increasingly popular',
     'basic income increasingly',
     'section basic income increasingly'],
    'stance': 'NEUTRAL'},
   {'sentence': 'Basic income is a policy that has broad support from both the progressive left and libertarian right.',
    'kp': ['basic income', 'basic income policy', 'basic income policy broad'],
    'stance': 'PRO'},
   {'sentence': 'Centerleft economists including Paul Krugman have endorsed the scheme for various reasons.',
    'kp': ['paul krugman endorsed scheme',
     'economists including paul krugman',
     'krugman endorsed scheme various'],
    'stance': 'PRO'},
   {'sentence': 'BI also reduces inequality by redistributing income from capital to labor.',
    'kp': ['bi reduces inequality redistributing',
     'reduces inequality redistributing income',
     'inequality redistributing income capital'],
    'stance': 'CON'}

In [94]:
# args x adus x retrieved (k=5)
for i in retrieved_ev[2]["retrieved"]:
    print(i, "\n")

{'evidence': ['Brazil. Minimum income has been increasingly accepted by the Brazilian government. In 2004, President\xa0Lula da Silva signed into law a bill to establish a universal basic income.', "Committee member Lady Rhys-Williams argued that the incomes for adults should be more like a basic income. She was also the first to develop the negative income tax model. Her son Brandon Rhys Williams proposed a basic income to a parliamentary committee in 1982, and soon after that in 1984, the Basic Income Research Group, now the Citizen's Basic Income Trust, began to conduct and disseminate research on basic income.", 'Many technology experts and technology entrepreneurs have begun endorsing basic income in the 2000s and 2010s. These include Marshal Brain, Sam Altman, James Hughes, Facebook co-founder Chris Hughes, Elon Musk, and Mark Zuckerberg (in his 2017 Harvard commencement speech), and Jeremy Rifkin. The overriding theme among technologists who favor basic income is the belief that

In [96]:
from sentence_transformers import SentenceTransformer, util
import torch
from tqdm.notebook import tqdm
import time

model = SentenceTransformer('all-MiniLM-L6-v2')

### SCORE COSINE SIMILARITY ###
def cosine_similarity(sent_1, sent_2):
    sentences = [sent_1, sent_2]
    embeddings = model.encode(sentences, convert_to_tensor=True, show_progress_bar=False)

    cos = torch.nn.CosineSimilarity(dim=0)
    score = cos(embeddings[0], embeddings[1])

    return score.numpy().item()

### SCORE TF-KEYWORD OVERLAP ###
def overlap_score(evidence_kp, adu_kp):
    score = 0
    # Split Keyphrase into components, scoring partial units as overlap
    for i in evidence_kp:
        for j in i.split():
            # Ensure string value, to enact .find
            if ", ".join([i for i in adu_kp]).find(j) != -1: score += 1

            else: continue
    return score

### RANK PASSAGES ###
def score_passages(ev):
    for _ in range(0, len(ev["argument"])):
        print(_)

def rank_passages(ev, k=3):
    """ Handles a Retrieved Evidence Object, yielding the top-k passages
    for each ADU, for a single Argument """

    # Index into Retrieved Evidence Object
    adus = [i for i in ev["argument"]]
    retrieved = [i for i in ev["retrieved"]]

    # Rank k-returned passages for each ADU
    r_retrieved = []
    for adu, passage in zip(adus, retrieved):
        scored = []
        ranked_ev = []
        for _, kp in zip(passage["evidence"], passage["kp"]):
            scored.append((_, kp, cosine_similarity(str(adu), str(_))))

        scored = sorted(scored, key=lambda x: x[2], reverse=True)[0:3]
        for i, j, k in scored:
            ranked_ev.append({"evidence": i, "kp": j, "similarity": k})

        r_retrieved.append(ranked_ev)

    return r_retrieved

# TODOs: Ranking order

### UPDATE RETRIEVED OBJECT ###
retrieved_ranked = retrieved_ev.copy()
for i in range(0, len(retrieved_ev)):
    retrieved_ranked[i]["retrieved"] = [i for i in rank_passages(retrieved_ev[i])]

retrieved_ranked

INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: all-MiniLM-L6-v2
INFO:sentence_transformers.SentenceTransformer:Use pytorch device: cpu


[{'id': 't3_30oi71',
  'argument': [{'sentence': 'Section I Why is Basic Income Increasingly Popular?',
    'kp': ['basic income increasingly popular',
     'basic income increasingly',
     'section basic income increasingly'],
    'stance': 'NEUTRAL'},
   {'sentence': 'Basic income is a policy that has broad support from both the progressive left and libertarian right.',
    'kp': ['basic income', 'basic income policy', 'basic income policy broad'],
    'stance': 'PRO'},
   {'sentence': 'Centerleft economists including Paul Krugman have endorsed the scheme for various reasons.',
    'kp': ['paul krugman endorsed scheme',
     'economists including paul krugman',
     'krugman endorsed scheme various'],
    'stance': 'PRO'},
   {'sentence': 'BI also reduces inequality by redistributing income from capital to labor.',
    'kp': ['bi reduces inequality redistributing',
     'reduces inequality redistributing income',
     'inequality redistributing income capital'],
    'stance': 'CON'}

In [80]:
len(passages[2][0])

3

In [272]:
# NOTE: Zipping retrieved evidence, args
print(len(ranked_sorted_evidence), len(args[0:100]))

ranked_sorted_evidence[0]

100 100


[{'adu': 'Section I Why is Basic Income Increasingly Popular?',
  'adu_kp': ['basic income increasingly popular',
   'basic income increasingly',
   'section basic income increasingly'],
  'evidence_unit': "Brazil. Minimum income has been increasingly accepted by the Brazilian government. In 2004, President\xa0Lula da Silva signed into law a bill to establish a universal basic income. Committee member Lady Rhys-Williams argued that the incomes for adults should be more like a basic income. She was also the first to develop the negative income tax model. Her son Brandon Rhys Williams proposed a basic income to a parliamentary committee in 1982, and soon after that in 1984, the Basic Income Research Group, now the Citizen's Basic Income Trust, began to conduct and disseminate research on basic income. Many technology experts and technology entrepreneurs have begun endorsing basic income in the 2000s and 2010s. These include Marshal Brain, Sam Altman, James Hughes, Facebook co-founder Chr

In [120]:
fout = open("../data/cmv_rr.jsonl", "w")

args = [json.loads(ln) for ln in open("../data/cmv_processed.jsonl")]
sample = args[0:100]

for arg, retrieved in zip(sample, ranked_sorted_evidence):
    adu_ = []
    for adu in retrieved:
        adu_.append({
            "evidence": adu["evidence_unit"],
            "evidence_kp": adu["evidence_kps"],
        })

    fout.write(json.dumps({
        "id": arg["id"],
        "argument": arg["argument"],
        "argument_kp": arg["argument"]["arg_keyphrases"],
        "counter": arg["counter"]["counter"],
        "counter_kp": arg["counter"]["counter_keyphrases"],
        "retrieved": adu_
    }))

    fout.write("\n")

In [121]:
### EVALUATE OUTPUT ###
train = [json.loads(ln) for ln in open("../data/cmv_rr.jsonl", "r")]

In [127]:
_ = random.randint(0, len(train))
print(train[_]["argument"], "\n")
print(train[_]["adu"], "\n")
print(train[_]["retrieved"])

{'argument': 'Just a note I do not believe in Nazism. Now I know this view is controversial and I know its generally frowned upon but my lifelong belief is that eugenics isnt that bad. This started before I can even remember. I felt that people who live off of welfare and do nothing all day but drugs and get fat should lose their reproductive rights. At no time I believed people should die.', 'arg_keyphrases': ['isnt that bad', 'note', 'view is controversial', 'generally frowned'], 'arg_stance': ['PRO', 'Nazism']} 



KeyError: 'adu'

In [62]:
# Handle duplicates
# def rank_passages(ev, k=3):
#     """ Handles a Retrieved Evidence Object, returning the top-k passages for each ADU """
#     # Per Argument
#     # Index into Retrieved Evidence Object
#     adus = [i for i in ev[0]["argument"]]
#     retrieved = [i for i in ev[0]["retrieved"]]
#
#     #print(len(retrieved), len(adus))
#
#     # Rank k-returned passages for each ADU
#     r_retrieved = []
#     for adu, passage in zip(adus, retrieved):
#         scored = []
#         ranked_ev = []
#         for _, kp in zip(passage["evidence"], passage["kp"]):
#             scored.append((_, kp, cosine_similarity(str(adu), str(_))))
#
#         scored = sorted(scored, key=lambda x: x[2], reverse=True)[0:3]
#         for i, j, k in scored:
#             ranked_ev.append({"evidence": i, "kp": j, "similarity": k})
#
#         r_retrieved.append(ranked_ev)
#
#     return r_retrieved

# 1 Argument x 4 ADUs x 5 Retrieved Passages
ranked = [i for i in rank_passages(retrieved_ev[3])]
print(len(ranked))
print(ranked)

4 4
4
[[{'evidence': 'Many technology experts and technology entrepreneurs have begun endorsing basic income in the 2000s and 2010s. These include Marshal Brain, Sam Altman, James Hughes, Facebook co-founder Chris Hughes, Elon Musk, and Mark Zuckerberg (in his 2017 Harvard commencement speech), and Jeremy Rifkin. The overriding theme among technologists who favor basic income is the belief that automation is creating an increasingly unstable labor market.', 'kp': ('technologists favor basic income', 0.6414), 'similarity': 0.3174712657928467}, {'evidence': "Committee member Lady Rhys-Williams argued that the incomes for adults should be more like a basic income. She was also the first to develop the negative income tax model. Her son Brandon Rhys Williams proposed a basic income to a parliamentary committee in 1982, and soon after that in 1984, the Basic Income Research Group, now the Citizen's Basic Income Trust, began to conduct and disseminate research on basic income.", 'kp': ('will

In [458]:
# def fuck_you():
#     print("fuck you")
#
# fuck_you()
#
# def rank_(ev):
#     # Index into Retrieved Evidence Object
#     ev = ev[0]
#     adus = [i for i in ev[0]["argument"]]
#     retrieved = [i for i in ev[0]["retrieved"]]
#     k = 3
#     print("hello")
#     # # Rank k-returned passages for each ADU
#     # count = 0
#     # r_retrieved = []
#     # for adu, passage in zip(adus, retrieved):
#     #     count += 1
#     #     ranked_passages = []
#     #     for _ in passage["evidence"]:
#     #         print(_)
#     #         ranked_passages.append((_, cosine_similarity(adu, _)))
#     #         r_retrieved.append({"evidence": i, "similarity": k} for i, k in sorted(ranked_passages, key=lambda x: x[1], reverse=True)[0:k])
#     #         print(r_retrieved)
#
#     # return {
#     #     "r_retrieved": r_retrieved
#     # }
#
# #print(rank_(retrieved_ev[0:1]))

In [116]:
# from multiprocessing.pool import ThreadPool as Pool
# from yake import KeywordExtractor
# import tqdm.notebook as tqdm
# import time
# from summa import keywords
# from tqdm import tqdm
#
# ### PASSAGE RANKING; KEYWORD OVERLAP ###
# kw_extractor = KeywordExtractor(lan="en", n=3, top=5)
#
# # TODOs: For each ADU, Rank Merged Evidence using Keyword Overlap and Filter for Contrasting Stance
# # TODOs: Handel Multiple Keywords
#
# def overlap_score(evidence_kp, adu_kp):
#     score = 0
#     # TODOs: Robust 'None' handeling
#     if adu_kp == None:
#         return score
#     # Split Keyphrase into components, scoring partial units as overlap
#     else:
#         for i in evidence_kp:
#             for j in i.split():
#                 # Ensure string value, to enact .find
#                 if ", ".join([i for i in adu_kp]).find(j) != -1: score += 1
#
#                 else: continue
#
#     return score
#
# def calculate_overlap(merged_ev, adu_kp):
#
#     for ev_unit in sentences_segment(merged_ev):
#         toks = tokeniser(ev_unit)
#         kp_overlap = 0
#
#         if len(toks) <= 8: continue
#
#         #ev_unit_kp = [i for i in keywords.keywords(ev_unit).split("\n")]
#         ev_unit_kp = [i[0] for i in kw_extractor.extract_keywords(ev_unit)]
#
#         if ev_unit_kp:
#             kp_overlap = overlap_score(evidence_kp=ev_unit_kp, adu_kp=adu_kp)
#
#         else: ev_unit_kp = None
#         yield ev_unit, ev_unit_kp, kp_overlap
#
# # pool = Pool(8)
# ### RANK PASSAGES ###
# def score_passages(ev_):
#     adu = ev_[0]["argument_discourse_unit"]
#     adu_stance = ev_[0]["adu_stance"]
#     merged_ev = ev_[0]["merged_evidence"]
#     adu_kp = ev_[0]["adu_keyphrases"]
#
#     ### CALCULATE OVERLAP ###
#     for ev_unit, ev_unit_kp, kp_overlap in calculate_overlap(merged_ev, adu_kp):
#         target = adu_kp[0]
#
#         compared_stace = compare_stance(ev_unit, target)
#         if compared_stace != adu_stance:
#             yield {
#                 "adu": adu,
#                 "adu_kp": adu_kp,
#                 "evidence_unit": ev_unit,
#                 "evidence_kps": ev_unit_kp,
#                 "overlap": kp_overlap,
#                 "evidence_stance": compare_stance(ev_unit, target),
#                 "adu_stance": adu_stance
#             }
#
#         else: continue
#
# ### SCORED EVIDENCE ###
# def score_evidence(retrieved_evidence):
#     for ev_ in retrieved_ev:
#         yield [i for i in score_passages(ev_)]
#
# ### RANKED EVIDENCE ###
# def rank_filter_counter_evidence(retireved_evidence, k=3):
#     with tqdm(total=(len(retrieved_ev))) as pbar:
#         for i in score_evidence(retrieved_ev):
#             yield sorted(i, key=lambda y: y["overlap"], reverse=True)[0:k]
#
#             pbar.update()
#
#
# ### SELECT TOP-K COUNTER-EVIDENCE ###
# tic = time.time()
# ranked_sorted_evidence = [i for i in rank_filter_counter_evidence(retrieved_ev)]
# ranked_sorted_evidence
# toc = time.time()
#
# print(toc - tic)
# # TIME 1:20M

100%|██████████| 100/100 [00:30<00:00,  3.23it/s]

30.97145128250122





In [250]:
# idx = 2
# for ln in retrieved_ev:
#     r = ln[0]
#     for _ in range(0, len(r["argument"])):
#         print(r["argument"][_]["sentence"])
#         print(r["argument"][_]["kp"])
#         print("")
#         print(r["retrieved"][_]["evidence"])
#         print(r["retrieved"][_]["kp"])

#"counter": {"counter": arg["counter"]["counter"], "counter_kp": arg["counter"]["counter_keyphrases"]}
# "argument_discourse_unit": adu,
# "query": query,
# "adu_keyphrases": [i for i in kp],
# "adu_stance": sentence_stance(adu, kp),
# "merged_evidence": ", ".join(ln for ln in evidence)
# "retrieved_documents_titles": titles,
# "retrieved_evidence": evidence,

Section I Why is Basic Income Increasingly Popular?
['basic income increasingly popular', 'basic income increasingly', 'section basic income increasingly']

Brazil. Minimum income has been increasingly accepted by the Brazilian government. In 2004, President Lula da Silva signed into law a bill to establish a universal basic income. Committee member Lady Rhys-Williams argued that the incomes for adults should be more like a basic income. She was also the first to develop the negative income tax model. Her son Brandon Rhys Williams proposed a basic income to a parliamentary committee in 1982, and soon after that in 1984, the Basic Income Research Group, now the Citizen's Basic Income Trust, began to conduct and disseminate research on basic income. Many technology experts and technology entrepreneurs have begun endorsing basic income in the 2000s and 2010s. These include Marshal Brain, Sam Altman, James Hughes, Facebook co-founder Chris Hughes, Elon Musk, and Mark Zuckerberg (in his 201

In [None]:
# # TODOs: Speed-up, Parrelleise, Yield
# def overlap_score(evidence_kp, adu_kp):
#     score = 0

#     # Split Keyphrase into components, scoring partial units as overlap
#     for i in evidence_kp:
#         for j in i.split():
#             # Ensure string value, to enact .find
#             if " ".join(adu_kp).find(j) != -1: score += 1

#             else: continue

#     return score

# ev_units = evidence
# adu_kp = extract_keyphrase(adu)

# adu_ev_overlap = []

# kp_1 = ['sex', 'relationship', 'opportunity']
# kp_2 = ['better sex']

# overlap_score(kp_2, kp_1)

# for ev_unit in evidence:
#     #print(ev_unit)
#     toks = tokeniser(ev_unit)

#     # Exprimental Value
#     if len(toks) <= 8:
#         continue

#     ev_unit_kp = extract_keyphrase(ev_unit)
#     kp_overlap = overlap_score(evidence_kp=ev_unit_kp, adu_kp=adu_kp)

#     adu_ev_overlap.append({
#         "adu": adu,
#         "adu_kp": adu_kp,
#         "ev_unit": ev_unit,
#         "ev_unit_kp": ev_unit_kp,
#         "kp_overlap": kp_overlap

#         })

# adu_ev_overlap

In [None]:
# ### OVERLAP RANKED EVIDENCE ###

# adu_ev_overlap.sort(key=lambda y: y["kp_overlap"], reverse=True)
# adu_ev_overlap

# ### FILTER IRRELEVANT EVIDENCE ###
# overlapping = [i for i in adu_ev_overlap if i["kp_overlap"] !=0]

# len(adu_ev_overlap), len(overlapping)
# overlapping


In [15]:
# Stance Test
# adu = 'I cant remember the topic that spurred this discussion but a friend and I were debating whether manmade things were natural.'
# ev_unit = 'In this essay, Mill argues the idea that the morality of an action can be judged by whether it is natural or unnatural.'
# target = 'natural things'
#
# stance = compare_stance(ev_unit, target)
# stance

'PRO'

In [None]:
# ### ASSERT SAME STANCE ###
# from detection.stance_classifier import sentence_stance, compare_stance
#
# # TODOs: Ensure KPs Extracts are constrained to 1 unit
# opposing_stance = []
# for i in overlapping:
#     adu = i["adu"]
#     target = " ".join(i for i in i["adu_kp"])
#     ev_unit = i["ev_unit"]
#
#     ev_stance = compare_stance(ev_unit, ev_unit, target)
#     adu_stance = sentence_stance(adu, target)
#
#     if ev_stance != adu_stance:
#         opposing_stance.append((ev_unit, ev_stance, adu_stance))
#
#     else: continue
#
# opposing_stance

In [None]:
### RANKING ###

# TODOs: Speed-up, Parrelleise, Yield
# ev_units = evidence
# adu_kp = extract_keyphrase(adu)

# adu_ev_overlap = []

# kp_1 = ['sex', 'relationship', 'opportunity'] 
# kp_2 = ['better sex']

# overlap_score(kp_2, kp_1)

# for ev_unit in evidence:
#     #print(ev_unit)
#     toks = tokeniser(ev_unit)

#     # Exprimental Value
#     if len(toks) <= 8:
#         continue
    
#     ev_unit_kp = extract_keyphrase(ev_unit)
#     kp_overlap = overlap_score(evidence_kp=ev_unit_kp, adu_kp=adu_kp)
    
#     adu_ev_overlap.append({
#         "adu": adu, 
#         "adu_kp": adu_kp,
#         "ev_unit": ev_unit,
#         "ev_unit_kp": ev_unit_kp, 
#         "kp_overlap": kp_overlap
        
#         })
        
# adu_ev_overlap


#rank_passages(retrieved_ev)

In [None]:
# import spacy
# from spacy.matcher import PhraseMatcher
# from fuzzywuzzy import fuzz, process

# # TODOs: Package as a Module
# # TODOs: Handle Negation (Polarity shifters)
# # TODOs: Review Unsuperived Approach; Consider adveanced patterns and common-sence knowledge

# nlp = spacy.load("en_core_web_sm")

# sentence = "I hate abortion rights. Abortions should be banned."
# sentence_2 = "I like abortion rights. I belive we should keep them."
# sentence_3 = "I hate tennis. People should play tennis more often"

# ### STANCE SCORING ###

# # TODOs: https://www.cs.uic.edu/~liub/FBS/opinion-mining-final-WSDM.pdf 
# # TODOs: Pattern based Negation
# # TODOs: Semantic Orientation of an opinion (Claim)
# # TODOs:Group synonyms of 'features', 'targets'

# phrase_matcher = PhraseMatcher(nlp.vocab)

# ### SENTIMENT LEXICONS ###
# pos = [w.replace("\n", "") for w in open("../../data/lexicon/positive_lex.txt")]
# neg = [w.replace("\n", "") for w in open("../../data/lexicon/negative_lex.txt")]
# polarity_shifters = [w.replace("\n", "") for w in open("../../data/lexicon/shifter_lexicon.txt")]

# ### STANCE: ASPECT-SEMANTIC ORIENTATION ###
# def extract_aspect(sentence, n_gram):
#     aspects = extract_keyphrase(str(sentence))[0]

#     return nlp(aspects)

# def index_aspect(phrase, aspect, sentence):    
#     patterns = [nlp(aspect)]
#     phrase_matcher.add(phrase, None, *patterns)

#     start = 0
#     stop = 0

#     matched_phrases = phrase_matcher(sentence)
#     for i in matched_phrases:
#         _, start, stop = i
        
#     return start, stop

# # TODOs: Implement Polarity Shifters, Simple
# # TODOs: Implement Polarity Shifters, Complex, Verb Patterns
# def stance_score(start, stop, sentence):
#     pos_score = 0.0
#     neg_score = 0.0

#     score = 0
#     for idx, tok in enumerate(sentence):
#         if idx == start or idx == stop:
#             continue

#         # TODOs: Implement Polarity Shift
#         # TODOs: Experiement with descriptive term + keyphrase aspects
#         # TODOs: ABSA https://www.kaggle.com/code/phiitm/aspect-based-sentiment-analysis
#         # Use external libaray: Textblob
        
#         k = 8
#         # Negation Rules
#         shifted_tok = None
#         shifted_toks = []

#         if (tok.dep_ == "neg") or (tok.dep_ in polarity_shifters):
#             #Shift to Negative
#             if idx <= k:
#                 if idx < start: neg_score += 1/(start - idx)
#                 else: neg_score += 1/(idx - stop)**0.5

#             if shifted_tok != None and shifted_tok in neg:
#                 print(shifted_tok.text)
#                 # Shift to Positive
#                 if idx < start: pos_score += 1/(start - idx)
#                 elif idx > start: pos_score += 1/(idx - stop)**0.5
#                 else: continue

#         # Aspect Sentement Orientation
#         if tok.text in pos:
#             if tok in shifted_toks:
#                 continue
            
#             if idx < start: pos_score += 1/(start - idx)
#             else: pos_score += 1/(idx - stop)**0.5

#         if tok.text in neg:
#             if tok in shifted_toks:
#                 continue

#             if idx <= start: neg_score += 1/(start - idx)
#             else: neg_score += 1/(idx - stop)**0.5
    
#     score = pos_score - neg_score /(pos_score + neg_score + 1)

#     return score

# def overlap_score(evidence_kp, adu_kp):
#     score = 0
    
#     # Split Keyphrase into components, scoring partial units as overlap
#     for i in evidence_kp:
#         for j in i.split():
#             # Ensure string value, to enact .find
#             if " ".join(adu_kp).find(j) != -1: 
#                 score += 1
#                 token = j
            
#             else: continue
    
#     return score

# def get_overlapping_token(evidence_kp, adu_kp):
#     for i in evidence_kp:
#         overlap_tokens = []
#         for j in i.split():
#             if " ".join(adu_kp).find(j) != -1: 
#                 overlap_tokens.append(j) 
            
#         return " ".join(i for i in overlap_tokens)

# def sentence_stance(sentence, aspect):
#     sentence = nlp(sentence)

#     start, stop = index_aspect("aspects", aspect, sentence)
#     score = stance_score(start, stop, sentence)

#     # Add Neutral
#     #stance = {"claim": sentence, "stance": "PRO", "aspect": aspect} if score > 0 else {"claim": sentence, "stance": "CON", "aspect": aspect}
    
#     return "PRO" if score > 0 else "CON"

# def fuzzy_match(target, evidence_unit):

#     overlapping_aspect = process.extractOne(target, ev.split())[0]
#     score = overlapping_aspect[1]

#     overlapping_aspect = nlp(re.sub(r'[^\w]', ' ', overlapping_aspect))

#     return overlapping_aspect, score

# def compare_stance(ev_unit, evidence_aspect, adu_target):
#     # Note: Already identified mathcing or partially matching Aspects. 

#     # Get the overlapping evidence aspect-target.
#     overlapping_target, score = fuzzy_match(target=adu_aspect, evidence_unit=ev)
    
#     # Get position of the overlapping_target
#     start, stop = index_aspect("OVERLAP", nlp(overlapping_target), nlp(ev_unit))

#     # Assert Stance towards evidence aspect
#     score = stance_score(start, stop, nlp(ev_unit))
    
#     return "PRO" if score > 0 else "CON"

# ev = "These simple ideas and techniques could help both you and your lover enjoy sex. 1 / 10 Getty Images/Caiaimage Think beyond the thrust."
# ev_aspect = "sex", "relationship", "opportunity"

# adu = 'Hello! Let me preface by saying I dont believe there is a better sex.'
# adu_aspect = "better sex"

# print(sentence_stance("The mutual trust and understanding you share with your partner will lead to better sex, but that's not the only reason sex can be better when you're not in a relationship.", adu_aspect))
# print(compare_stance(ev, ev_aspect, adu_aspect))


In [None]:
# from spacy.matcher import DependencyMatcher, Matcher
# matcher = Matcher(vocab=nlp.vocab)
# matcher

# # Matching Rule: Pronouns with Verbs that follow them
# aspect = "better sex"
# patterns = [
#     [{"DEP": "neg"}, {"LOWER": aspect}],
#     [{"DEP": "neg"}, {"POS": "ADJ"}, {"LOWER": aspect}],
#     [{"POS": "VERB"}, {"POS": "ADJ"}, {"LOWER": aspect}],
#     [{"LOWER": aspect.lower()}]
# ]

# test = nlp("Hello! Let me preface by saying I dont believe there is a not better sex.")
# test_2 = nlp("These simple ideas and techniques could help both you and your lover enjoy better sex.")

# matcher.add("test", patterns=patterns)
# result = matcher(test_2, as_spans=True)

# result

# # for tok in test:
# #     print(tok.i, tok, tok.pos_, tok.dep_, tok.head.i, sep="\t")

In [None]:
### TARGETED RETRIEVAL: ATTACKING PEMISES ###

# from BERT_adu_classifier import predict

# premises = []
# for sent in sentences:
#     prediction = predict(sent)
    
#     if prediction == "premise":
#         premises.append(sent)