#### Setting up

In [25]:
from stancebenchmark_functions import import_data, make_dict, split_into_datasets_and_df

In [26]:
#from sentence_transformers import SentenceTransformer, util
import time
import numpy as np

In [27]:
import pandas as pd
import json
from collections import defaultdict
import os
import matplotlib.pyplot as plt
import glob

In [28]:
from transformers import pipeline
import tensorflow

In [29]:
pd.set_option('display.max_colwidth', 200)

#### Import and preprocess stance dataset: extract all individual datasets

In [30]:
data_path = "/Users/myrthereuver/Documents/Models_Data_Repo/Datasets/English/Stance/mdl-stance/data/mt_dnn/"

In [31]:
data_dict, dataset_list = import_data(data_path, "train")

In [32]:
topic_list_scd = {"Obama": ['Obamacare', 'medicare', 'President Obama', 'voting for Obama', 'vote for Obama', 'vote for president Obama', 'Obama', 'Barack Obama'], "Marijuana": ["marijuana", "legal", "weed", "drug", "drugs", "legalization"], "Abortion": ['abortion', 'baby', 'abort', 'aborted','fetus', 'baby', 'babies', 'mothers', 'mother', "pro-life", "prolife", "pro-choice"], "Gay": ['homosexual', 'homosexuals', 'heterosexual', 'heterosexuals', 'marry', 'gay marriage', 'gay', 'gays']}

In [33]:
all_topic_dictionary = make_dict(data_dict, topic_list_scd)

In [34]:
dataframe_dictionary = split_into_datasets_and_df(all_topic_dictionary, dataset_list)

df_arc_train_data = dataframe_dictionary["arc_train"]
df_ibmcs_train_data = dataframe_dictionary["ibmcs_train"]
df_perspectrum_train_data = dataframe_dictionary["perspectrum_train"]
df_fnc1_train_data = dataframe_dictionary["fnc1_train"]
df_iac1_train_data = dataframe_dictionary["iac1_train"]
df_semeval2016t6_train_data = dataframe_dictionary["semeval2016t6_train"]
df_semeval2019t7_train_data = dataframe_dictionary["semeval2019t7_train"]
df_snopes_train_data = dataframe_dictionary["snopes_train"]
df_argmin_train_data = dataframe_dictionary["argmin_train"]
df_scd_train_data = dataframe_dictionary["scd_train"]

#### saving data

In [35]:
import pickle 

pickle_out = open('StanceBenchmarkProCon_allTrainingDatasets_nopreprocessing.pickle', 'wb')
pickle.dump(dataframe_dictionary, pickle_out)
pickle_out.close()


In [36]:
print(len(df_scd_train_data["text"]))

3251


#### making new data representation: empty dict

In [37]:
from collections import defaultdict

SameSideStance_alltrainingdata_dict = defaultdict()

### make DISSIMILAR arguments (label = False) & SIMILAR (label = True)

In [38]:
def make_dissimilar_pair_dataset(df):
    topics = set(df['topic'])
    topic_df_list = []
    pairs_per_topic = []

    for topic in topics:
        topic_df = df.where(df['topic'] == topic).dropna()
        topic_df_list.append(topic_df)
        
    for df in topic_df_list:
        pro_in_topic = df.where(df['label'] == 1).dropna()
        con_in_topic = df.where(df['label'] == 0).dropna()
        pro_con_pair = list(zip(pro_in_topic["topic"], pro_in_topic["text"], con_in_topic["text"]))
        pairs_per_topic.append(pro_con_pair)
        
    pairs_per_topic = [pair for pair in pairs_per_topic if pair != []]
    all_pairs = [item for sublist in pairs_per_topic for item in sublist]
    
    df_notSameSide = pd.DataFrame(all_pairs, columns=["topic", "text1", "text2"])
    df_notSameSide["label"] = False
    
    return df_notSameSide

In [39]:
def make_similar_pair_dataset(df):
    topics = set(df['topic'])
    topic_df_list = []
    pairs_per_topic_con = []
    pairs_per_topic_pro = []

    for topic in topics:
        topic_df = df.where(df['topic'] == topic).dropna()
        topic_df_list.append(topic_df)

    for df in topic_df_list:
        pro_in_topic = df.where(df['label'] == 1).dropna()
        half1_pro, half2_pro = np.array_split(pro_in_topic, 2)
    
        con_in_topic = df.where(df['label'] == 0).dropna()
        half1_con, half2_con = np.array_split(con_in_topic, 2)
    
        make_pairs_con = list(zip(half1_con["topic"], half1_con["text"], half2_con["text"]))
        make_pairs_pro = list(zip(half1_pro["topic"], half1_pro["text"], half2_pro["text"]))

        pairs_per_topic_con.append(make_pairs_con)
        pairs_per_topic_pro.append(make_pairs_pro)
        
    pairs_per_topic_con = [pair for pair in pairs_per_topic_con if pair != []]
    pairs_per_topic_pro = [pair for pair in pairs_per_topic_pro if pair != []]

    all_pairs_con = [item for sublist in pairs_per_topic_con for item in sublist]
    all_pairs_pro = [item for sublist in pairs_per_topic_pro for item in sublist]
    
    all_pairs = all_pairs_con + all_pairs_pro
    
    df_SameSide = pd.DataFrame(all_pairs, columns=["topic", "text1", "text2"])
    df_SameSide["label"] = True
    
    return df_SameSide

## apply to different datasets

### IBMCS

### WORKS WELL

In [40]:
topics_ibmcs = set(df_ibmcs_train_data['topic'])
print(len(topics_ibmcs))

25


In [41]:
ibmcs_dissimilarPairs = make_dissimilar_pair_dataset(df_ibmcs_train_data)
ibmcs_dissimilarPairs[:3]

Unnamed: 0,topic,text1,text2,label
0,This house believes all nations have a right to nuclear weapons,nuclear weapons contribute to stability at a strategic level,Nuclear proliferation increases the chances of nuclear material falling into the hands of non-state groups,False
1,This house believes all nations have a right to nuclear weapons,"Nuclear weapons may lessen a state's reliance on allies for security, thus preventing allies from dragging each other into wars",nuclear holocaust could result in an end to human life,False
2,This house believes all nations have a right to nuclear weapons,"when two countries each have nuclear weapons, the probability of a direct war between them greatly decreases",nuclear weapon technology would soon reach the point where it could end human life on Earth,False


In [42]:
ibmcs_similarPairs = make_similar_pair_dataset(df_ibmcs_train_data)
ibmcs_similarPairs[:3]

Unnamed: 0,topic,text1,text2,label
0,This house believes all nations have a right to nuclear weapons,Nuclear proliferation increases the chances of nuclear material falling into the hands of non-state groups,public opinion is overwhelmingly opposed to nuclearization,True
1,This house believes all nations have a right to nuclear weapons,nuclear holocaust could result in an end to human life,a nuclear nation might be hijacked by a despot or other person or persons who might use nuclear weapons without sane regard for the consequences,True
2,This house believes all nations have a right to nuclear weapons,nuclear weapon technology would soon reach the point where it could end human life on Earth,Nuclear proliferation increases the chance of inter-state nuclear conflict,True


In [43]:
sameside_ibmcs = ibmcs_similarPairs.append(ibmcs_dissimilarPairs)
sameside_ibmcs[:3]

  sameside_ibmcs = ibmcs_similarPairs.append(ibmcs_dissimilarPairs)


Unnamed: 0,topic,text1,text2,label
0,This house believes all nations have a right to nuclear weapons,Nuclear proliferation increases the chances of nuclear material falling into the hands of non-state groups,public opinion is overwhelmingly opposed to nuclearization,True
1,This house believes all nations have a right to nuclear weapons,nuclear holocaust could result in an end to human life,a nuclear nation might be hijacked by a despot or other person or persons who might use nuclear weapons without sane regard for the consequences,True
2,This house believes all nations have a right to nuclear weapons,nuclear weapon technology would soon reach the point where it could end human life on Earth,Nuclear proliferation increases the chance of inter-state nuclear conflict,True


In [44]:
sameside_ibmcs

Unnamed: 0,topic,text1,text2,label
0,This house believes all nations have a right to nuclear weapons,Nuclear proliferation increases the chances of nuclear material falling into the hands of non-state groups,public opinion is overwhelmingly opposed to nuclearization,True
1,This house believes all nations have a right to nuclear weapons,nuclear holocaust could result in an end to human life,a nuclear nation might be hijacked by a despot or other person or persons who might use nuclear weapons without sane regard for the consequences,True
2,This house believes all nations have a right to nuclear weapons,nuclear weapon technology would soon reach the point where it could end human life on Earth,Nuclear proliferation increases the chance of inter-state nuclear conflict,True
3,This house believes all nations have a right to nuclear weapons,Extreme danger is intrinsic to nuclear war and the possession of nuclear weapons,a full-scale nuclear war could potentially bring about the extinction of the human race,True
4,This house believes all nations have a right to nuclear weapons,Nuclear weapons give nations the potential to not only destroy their enemies but humanity itself,"The world is now in a new and dangerous nuclear era, and the likelihood that non-state terrorists will get their hands on nuclear weaponry is increasing",True
...,...,...,...,...
347,This house would build high rises for housing,"In contrast with low-rise and single-family houses, apartment blocks accommodate more inhabitants per unit of area of land they occupy",High-rise structures also pose serious challenges to firefighters during emergencies,False
348,This house would build high rises for housing,more and more people around the world move into multi-story apartment blocks,Many tower blocks experience structural decay,False
349,This house would build high rises for housing,Apartment blocks have technical and economic advantages in areas with high population density,High-rise blocks became notorious for crime and poverty,False
350,This house would build high rises for housing,Single-family houses are part of a much more energy and carbon-intensive lifestyle,"human beings, while social animals, need significant amounts of social space or they become agitated and aggressive",False


### ARC

### topics & texts don't line up in condition "0" because it's the "unrelated" condition (not pro/con)

### therefore, only get pro_con classes (2 and 3), and set them to 2 == 1 (pro) and 3 == 0 (con)

In [45]:
df_arc_train_data_procon = df_arc_train_data[(df_arc_train_data.label == 2) | (df_arc_train_data.label == 3)]

In [46]:
df_arc_train_data_procon.loc[df_arc_train_data_procon.label == 3, 'label'] = 0

In [47]:
df_arc_train_data_procon.loc[df_arc_train_data_procon.label == 2, 'label'] = 1

In [48]:
df_arc_train_data_procon[:2]

Unnamed: 0,topic,text,label,bert_tokens,type_ids,uid,dataset
3,"No, the laws should not be toughened","And what will police do with this information? Try to make sure the patient has no weapons? (No access to local gun shows or other private sellers?) The patient has committed no crime so he (yes, ...",1,"[101, 1998, 2054, 2097, 2610, 2079, 2007, 2023, 2592, 1029, 3046, 2000, 2191, 2469, 1996, 5776, 2038, 2053, 4255, 1029, 1006, 2053, 3229, 2000, 2334, 3282, 3065, 2030, 2060, 2797, 19041, 1029, 100...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",3,arc_train
8,School librarians are expendable,"Librarians might find some protection if they were to get back in the classroom and teach a course for credit on research methods. Most school librarians are already certified teachers, so this wo...",0,"[101, 13850, 2015, 2453, 2424, 2070, 3860, 2065, 2027, 2020, 2000, 2131, 2067, 1999, 1996, 9823, 1998, 6570, 1037, 2607, 2005, 4923, 2006, 2470, 4725, 1012, 2087, 2082, 13850, 2015, 2024, 2525, 73...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",8,arc_train


In [49]:
topics_arc = set(df_arc_train_data_procon['topic'])
print(len(topics_arc))

183


In [50]:
arc_dissimilarPairs = make_dissimilar_pair_dataset(df_arc_train_data_procon)
arc_dissimilarPairs[:3]

Unnamed: 0,topic,text1,text2,label
0,Wealthy colleges deserve tax breaks,If we were to eliminate tax breaks to private colleges and universities there is no guarantee those funds would be directed to public schools. The whole argument is a red herring. The bottom line:...,Here's one for you... Try living in a city (new Haven) that no longer has manufacturing base and the bulk of land is owned by a university that pays no taxes. It creates a city that is completely ...,False
1,Wealthy colleges deserve tax breaks,Columbia College allowed me and countless other lower income students to study at one of the great universities of the world for almost no tuition. I graduated from Columbia with no student loans ...,"The country is better off having two strong systems of higher education, publicly supported and private. Weakening one (under the unrealistic assumption that the monies fried up will be given to s...",False
2,Tracking kids does not invade privacy,I thoroughly agree with Betsy Landers that 'Parents who are monitoring their children's activities via technology are not crossing the line into invasion of privacy; they are cyber-savvy and invol...,This was in the late 80's. Got divorced and my ex put a recording device on her phone to record my teenage daughter's phone conversations. I told her that was not right but she kept doing it. My d...,False


In [51]:
arc_similarPairs = make_similar_pair_dataset(df_arc_train_data_procon)
arc_similarPairs[:3]

Unnamed: 0,topic,text1,text2,label
0,Wealthy colleges deserve tax breaks,Here's one for you... Try living in a city (new Haven) that no longer has manufacturing base and the bulk of land is owned by a university that pays no taxes. It creates a city that is completely ...,One of the reason why the United States is so successful and will continue to be in the foreseeable future is because of the quality of its university system which attracts the best and the bright...,True
1,Wealthy colleges deserve tax breaks,"The country is better off having two strong systems of higher education, publicly supported and private. Weakening one (under the unrealistic assumption that the monies fried up will be given to s...",Unless and until we decide to acknowledge the symbiotic false pretenses underlying our effective subsidy of for-profit institutions of higher learning and redirect those resources back to public e...,True
2,Tracking kids does not invade privacy,This was in the late 80's. Got divorced and my ex put a recording device on her phone to record my teenage daughter's phone conversations. I told her that was not right but she kept doing it. My d...,As the 61 year old parent of two late 20 year olds I think all the helicopter parents here who think they can monitor their kids to the nth degree and not destroy the relationship are out of their...,True


In [52]:
sameside_arc = arc_similarPairs.append(arc_dissimilarPairs)
sameside_arc

  sameside_arc = arc_similarPairs.append(arc_dissimilarPairs)


Unnamed: 0,topic,text1,text2,label
0,Wealthy colleges deserve tax breaks,Here's one for you... Try living in a city (new Haven) that no longer has manufacturing base and the bulk of land is owned by a university that pays no taxes. It creates a city that is completely ...,One of the reason why the United States is so successful and will continue to be in the foreseeable future is because of the quality of its university system which attracts the best and the bright...,True
1,Wealthy colleges deserve tax breaks,"The country is better off having two strong systems of higher education, publicly supported and private. Weakening one (under the unrealistic assumption that the monies fried up will be given to s...",Unless and until we decide to acknowledge the symbiotic false pretenses underlying our effective subsidy of for-profit institutions of higher learning and redirect those resources back to public e...,True
2,Tracking kids does not invade privacy,This was in the late 80's. Got divorced and my ex put a recording device on her phone to record my teenage daughter's phone conversations. I told her that was not right but she kept doing it. My d...,As the 61 year old parent of two late 20 year olds I think all the helicopter parents here who think they can monitor their kids to the nth degree and not destroy the relationship are out of their...,True
3,Medical websites are dangerous,"No, legitimate health sites do not recommend self treatment for anything other than self-limiting conditions - conditions that will go away without professional care. Common colds are examples, an...","While I understand what the article is saying, at the same time it does a disservice to being able to stay informed about our personal health care. We are all not able to distinguish true medical ...",True
4,Medical websites are dangerous,"I think if doctors spent more than 5-10 minutes in a room with a patient, the patient would feel more comfortable that the doctor understand their history and symptoms adequately. Doctors must rem...","I sincerely believe that I would have died many years ago had I not done my own homework, researched my doctors' counsel, and found their advice wanting. Just remember: In most cases, a doctor's c...",True
...,...,...,...,...
537,Veganism is good for everyone,"I have been vegan for 29 years, and i am fitter and healthier than any body that i know, most of all the meat eaters. the article seems flawed at most completely inaccurate, to say that a meat eat...","As a hypogycemic, eating all carbs, all the time (even the good kinds) is just not going to work for me -- I'd be hungry all the time an inevitiably overeat and gain weight. Lots of people are sen...",False
538,Veganism is good for everyone,Ms Nina Planck ...stated Nature created humans as omnivores....which seem to be she has a direct connection to god but eventually its never true .. then let me tell why we called Human being the m...,"A LOT depends on where one lives. Someone in frozen areas like Alaska,(a head of lettuce is $8) Finland, Siberia could never survive being vegan. Desert people with camels could never be vegan. So...",False
539,Veganism is good for everyone,"Veganism is for everyone. The age or body type of a person does not change the fact that all people need vitamins,proteins, and carbohydrates, which can all be obtained from plant sources. Nowaday...","Given that even species that are 100% herbivores feed their young milk (cows, for example), doesn't it stand to reason that human infants should not be vegan during their development? I am a veget...",False
540,Veganism is good for everyone,"I was disappointed to read the 'Room for Debate' essays published April 17 addressing 'Is Veganism Good for Everyone.' Of the six essayists, only two appear to be vegans, and not one is a physicia...","Humans don't resemble vegetarian mammals (cows, sheep) any more than we resemble bears. We are primates, and our closest genetic relatives (chimps) are omnivores. What's more revelant is that our ...",False


### PERSPECTRUM

### WORKS WELL

In [53]:
topics_perspectrum = set(df_perspectrum_train_data['topic'])
print(len(topics_perspectrum))

541


In [54]:
perspectrum_dissimilarPairs = make_dissimilar_pair_dataset(df_perspectrum_train_data)
perspectrum_dissimilarPairs[:3]

Unnamed: 0,topic,text1,text2,label
0,The United States’ individual health insurance mandate is unconstitutional,The mandate is not constitutional under the commerce clause,The mandate is constitutional under the commerce clause,False
1,The UK would be better off economically outside the European Union,There will be £350 million more to spend a week,Economic growth comes with closer integration with your neighbours,False
2,The UK would be better off economically outside the European Union,There will be millions more to spend this week,Leaving will cause a shock to the British economy,False


In [55]:
perspectrum_similarPairs = make_similar_pair_dataset(df_perspectrum_train_data)
perspectrum_similarPairs[perspectrum_similarPairs.topic == "Abolish nuclear weapons"]

Unnamed: 0,topic,text1,text2,label
1157,Abolish nuclear weapons,Nuclear weapons are required for deterrence,"Disarmament would actually cause increased insecurity among nations, it is near impossible to guarantee that a nation has disarmed.",True
1158,Abolish nuclear weapons,Public acknowledgement of the right to nuclear deterrence will benefit the public regulation of nuclear weapons generally,"Small countries would no longer need the protection of larger ones, so could become more politically independent.",True
1159,Abolish nuclear weapons,"Disarmament is impossible, rendering efforts to disarm pointless and simply wasteful","Disarmament would actually cause increased insecurity among nations, you can never really know for sure whether they have disarmed.",True
1160,Abolish nuclear weapons,Abolishment is an unrealistic goal,"Disarmament would actually cause increased insecurity among nations, as there can never be any guarantee that a nation has disarmed",True
1161,Abolish nuclear weapons,Nuclear weapons give states valuable agenda-setting power on the international stage,Thus the super power model will be broken and small countries will be finally given the chance to have political independence without the need of protection from a ‘big brother’.,True
1162,Abolish nuclear weapons,Countries with nuclear weapons have the ability to set their own agenda.,"The abolition of nuclear weapons would actually incentivize the development and use of even more dangerous weapons, such as chemical and biological weapons",True
1163,Abolish nuclear weapons,States with nuclear weapons are afforded more authority to set agendas at international levels.,All the people worldwide have the same rights and are equal to one another.,True
1164,Abolish nuclear weapons,"All countries have a right to defend themselves with nuclear weapons, even when they lack the capacity in conventional weapons",The pursuit of nuclear defence (respectively the possession of nuclear weapons) by more countries is a guarantee for peace.,True
1165,Abolish nuclear weapons,All states have a right to nuclear self-defense.,any conflict will rapidly de-escalate,True
1166,Abolish nuclear weapons,To use nuclear weapons in defense of the nation is lawful for all countries.,Nuclear weapons will rapidly de-escalate conflicts.,True


In [56]:
sameside_perspectrum = perspectrum_similarPairs.append(perspectrum_dissimilarPairs)
sameside_perspectrum[perspectrum_similarPairs.topic == "Abolish nuclear weapons"]

  sameside_perspectrum = perspectrum_similarPairs.append(perspectrum_dissimilarPairs)
  sameside_perspectrum[perspectrum_similarPairs.topic == "Abolish nuclear weapons"]


Unnamed: 0,topic,text1,text2,label
1157,Abolish nuclear weapons,Nuclear weapons are required for deterrence,"Disarmament would actually cause increased insecurity among nations, it is near impossible to guarantee that a nation has disarmed.",True
1158,Abolish nuclear weapons,Public acknowledgement of the right to nuclear deterrence will benefit the public regulation of nuclear weapons generally,"Small countries would no longer need the protection of larger ones, so could become more politically independent.",True
1159,Abolish nuclear weapons,"Disarmament is impossible, rendering efforts to disarm pointless and simply wasteful","Disarmament would actually cause increased insecurity among nations, you can never really know for sure whether they have disarmed.",True
1160,Abolish nuclear weapons,Abolishment is an unrealistic goal,"Disarmament would actually cause increased insecurity among nations, as there can never be any guarantee that a nation has disarmed",True
1161,Abolish nuclear weapons,Nuclear weapons give states valuable agenda-setting power on the international stage,Thus the super power model will be broken and small countries will be finally given the chance to have political independence without the need of protection from a ‘big brother’.,True
1162,Abolish nuclear weapons,Countries with nuclear weapons have the ability to set their own agenda.,"The abolition of nuclear weapons would actually incentivize the development and use of even more dangerous weapons, such as chemical and biological weapons",True
1163,Abolish nuclear weapons,States with nuclear weapons are afforded more authority to set agendas at international levels.,All the people worldwide have the same rights and are equal to one another.,True
1164,Abolish nuclear weapons,"All countries have a right to defend themselves with nuclear weapons, even when they lack the capacity in conventional weapons",The pursuit of nuclear defence (respectively the possession of nuclear weapons) by more countries is a guarantee for peace.,True
1165,Abolish nuclear weapons,All states have a right to nuclear self-defense.,any conflict will rapidly de-escalate,True
1166,Abolish nuclear weapons,To use nuclear weapons in defense of the nation is lawful for all countries.,Nuclear weapons will rapidly de-escalate conflicts.,True


### FNC 

### not working - no multiple stances to one topic (bc topic = headline)

In [57]:
#df_fnc1_train_data[df_fnc1_train_data.label == 1][:1]

In [58]:
#print(len(df_fnc1_train_data.topic))

In [59]:
#fnc1_dissimilarPairs = make_dissimilar_pair_dataset(df_fnc1_train_data)
#fnc1_dissimilarPairs[:1]

In [60]:
#fnc1_similarPairs = make_similar_pair_dataset(df_fnc1_train_data)
#fnc1_similarPairs[:1]

In [61]:
#sameside_fnc1 = fnc1_similarPairs.append(fnc1_dissimilarPairs)
#sameside_fnc1[:1]

### IAC

### Works well

In [62]:
iac1_dissimilarPairs = make_dissimilar_pair_dataset(df_iac1_train_data)
iac1_dissimilarPairs[:2]

Unnamed: 0,topic,text1,text2,label
0,gay marriage,"And just how many of the Ten Commandments are part of U.S. law? By the same argument I could say the legislature was following the Buddhist precepts, or the philosophy of Aristotle.\n Or the Code ...","You really need to stop with the personal insults. I have never claimed to be a hypocrite, a bigot yes but not a hypocrite.\n\n Maybe not directly, but by declaring yourself a bigot while accusing...",False
1,gay marriage,"i dont care about seperation of church and state, the U.S. was based on the Bible and the Christian religion. If you want to flee from America so yo can get married then go ahead, if it makes you ...","Matt, this so-called ""separation of church and state"" never existed in the US Constitution. It was present in the former Soviet Union!! Are you seriuosly advocating a theocracy? Do you really thin...",False


In [63]:
iac1_similarPairs = make_similar_pair_dataset(df_iac1_train_data)
iac1_similarPairs[:2]

Unnamed: 0,topic,text1,text2,label
0,gay marriage,"You really need to stop with the personal insults. I have never claimed to be a hypocrite, a bigot yes but not a hypocrite.\n\n Maybe not directly, but by declaring yourself a bigot while accusing...","When the similarities in arguments of polygamy and gay marriage are pointed out here, as well as the way they analogize one another, the response by those who would prefer gay marriage being a top...",True
1,gay marriage,"Matt, this so-called ""separation of church and state"" never existed in the US Constitution. It was present in the former Soviet Union!! Are you seriuosly advocating a theocracy? Do you really thin...",Most americans support a Federal Marriage Amendment. Defining Marriage as a union between a man and a woman.\n \n Federal Marriage Amendment\n \n \n This is the text of the Amend:\n \n Marriage in...,True


In [64]:
sameside_iac1 = iac1_similarPairs.append(iac1_dissimilarPairs)
sameside_iac1

  sameside_iac1 = iac1_similarPairs.append(iac1_dissimilarPairs)


Unnamed: 0,topic,text1,text2,label
0,gay marriage,"You really need to stop with the personal insults. I have never claimed to be a hypocrite, a bigot yes but not a hypocrite.\n\n Maybe not directly, but by declaring yourself a bigot while accusing...","When the similarities in arguments of polygamy and gay marriage are pointed out here, as well as the way they analogize one another, the response by those who would prefer gay marriage being a top...",True
1,gay marriage,"Matt, this so-called ""separation of church and state"" never existed in the US Constitution. It was present in the former Soviet Union!! Are you seriuosly advocating a theocracy? Do you really thin...",Most americans support a Federal Marriage Amendment. Defining Marriage as a union between a man and a woman.\n \n Federal Marriage Amendment\n \n \n This is the text of the Amend:\n \n Marriage in...,True
2,gay marriage,"i dont care about seperation of church and state, the U.S. was based on the Bible and the Christian religion. If you want to flee from America so yo can get married then go ahead, if it makes you ...",Most americans support a Federal Marriage Amendment. Defining Marriage as a union between a man and a woman.\n \n Federal Marriage Amendment\n \n \n This is the text of the Amend:\n \n Marriage in...,True
3,gay marriage,Actually a LITERAL interpretation is that the government can't make any laws that RESPECT the ESTABLISHMENT of religion. \n\n \n You forgot the all-important 2nd part which I believe is what Bassm...,"OK here are my views on gay people.\n \n If it's OK to be gay, why is it that God created Adam and Eve? Man and women. We have the ability to make our own decisions ""yes"" but if everyone would hav...",True
4,gay marriage,"gay marriage was never legal in the US until Massachusetts did it. I am not seeing how we are all of a sudden not the land of the free. They don't, however, they run their mouths, they introduce ...","OK. Marriage. Originally it was always between man and woman. Why is it all of a sudden changing? Sure you can have a relationship, that's not the main thing that bothers me. Marriage also consist...",True
...,...,...,...,...
1180,death penalty,"Was Jesus for the death penalty? I say jesus was for the deaht penalty anddddddd he was guilty.\n\n \n From this site\n \n Death Penalty in Judaism\n \n Hebrew Words are all G-d ordained, supporte...","I say jesus was for the death penalty anddddddd he was guilty.\n\n What??\n \n And what is your reasoning that Jesus was for the death penalty? For example, the only time, in my memory, that Jesus...",False
1181,death penalty,"Was Jesus for the death penalty? I say jesus was for the deaht penalty anddddddd he was guilty. If you are going to argue for or against the death penalty, at least try to be a credit to the argum...","of course jesus was for the death penalty. long before any legal system was in place, death was the penatly for sin. the concept of crime didnt even exist at the time. in fact, the mere fact that ...",False
1182,death penalty,"If I remember correctly, Jesus brought with him new teachings that overrode the Exodus rules. Also, like Obvious child said, not all sins require death.\n \n I guess I should add this.\n \n When J...","Christ regarded capital punishment as a just penalty for murder (Matthew 26:52). \n\n Er...\n \n ""Mat 26:51 And, behold, one of them which were with Jesus stretched out [his] hand, and drew his sw...",False
1183,death penalty,"Was Jesus for the death penalty? I say jesus was for the deaht penalty anddddddd he was guilty.\n\n of course jesus was for the death penalty. long before any legal system was in place, death was ...",What does it matter anyway what the Bible has to say on capital punishment?\n \n Criminal justice has come a long way from then and I don't see why we should be looking to such a socially crude ti...,False


### SemEval 2016

### works well, but data quite noisy (RTs with unclear original tweet etc.)

In [65]:
semeval2016t6_dissimilarPairs = make_dissimilar_pair_dataset(df_semeval2016t6_train_data)
semeval2016t6_dissimilarPairs[:2]

Unnamed: 0,topic,text1,text2,label
0,Feminist Movement,"@MrRepzion This lady on yt is right, Radical feminist have the loudest voices while normal,logical feminist are being outspoken. #SemST","@DFoxtrot15 @FeminismIsLies so labeling womne? OMG SEXISM! labeling men? well, most men are like that, so... #SemST",False
1,Feminist Movement,Suns out.... Dresses out... #StreetHarassment out... This shouldn't be daily life #YesAllWomen @EverydaySexism #SemST,".@cooImemegirl Feminists believe mothers should have a say over the father's body, his job, and his money. #SemST",False


In [66]:
semeval2016t6_similarPairs = make_similar_pair_dataset(df_semeval2016t6_train_data)
semeval2016t6_similarPairs[:2]

Unnamed: 0,topic,text1,text2,label
0,Feminist Movement,"@DFoxtrot15 @FeminismIsLies so labeling womne? OMG SEXISM! labeling men? well, most men are like that, so... #SemST",@equalforwomen why r u on twitter get back in the kitchen #feminist #feminazi #powertowomen #Equality #SemST,True
1,Feminist Movement,".@cooImemegirl Feminists believe mothers should have a say over the father's body, his job, and his money. #SemST","I'm honestly surprised that feminist extremists like @lenadunham aren't screaming ""sexist!!"" at gay men. #SemST",True


In [67]:
sameside_semeval2016t6 = semeval2016t6_similarPairs.append(semeval2016t6_dissimilarPairs)
sameside_semeval2016t6

  sameside_semeval2016t6 = semeval2016t6_similarPairs.append(semeval2016t6_dissimilarPairs)


Unnamed: 0,topic,text1,text2,label
0,Feminist Movement,"@DFoxtrot15 @FeminismIsLies so labeling womne? OMG SEXISM! labeling men? well, most men are like that, so... #SemST",@equalforwomen why r u on twitter get back in the kitchen #feminist #feminazi #powertowomen #Equality #SemST,True
1,Feminist Movement,".@cooImemegirl Feminists believe mothers should have a say over the father's body, his job, and his money. #SemST","I'm honestly surprised that feminist extremists like @lenadunham aren't screaming ""sexist!!"" at gay men. #SemST",True
2,Feminist Movement,@_toekey and the offensive joke is left out of the context... #SpankAFeminist #SemST,"If masculinity is toxic, then I must be a nuclear waste facility. #gamergate #dumbfeminist #SemST",True
3,Feminist Movement,Why do girls/women spend so much time to be pretty and attractive and then complain if they're reduced to a sex symbol? #SemST,Anyone else starting to notice the general demographic of feminists is like fucking high-schoolers. #SemST,True
4,Feminist Movement,So you want equality in the work place between men and woman but you won't take the trash out..? Makes sense #SemST,"watching #BaltimoreRiots on Fox news, obvious feminist and her goony beardman following the reporter around for attention #SemST",True
...,...,...,...,...
471,Hillary Clinton,#LoveWins on the day I get my Hillary gear! #Yaaas #Pride #SemST,@docdebags @awelab1956 @BuzzFeedAndrew @BuzzFeedNews It will. Otherwise we're guaranteed GOP in control of all 3 branches. #SemST,False
472,Hillary Clinton,Thank you @HillaryClinton for your support of the LGBT community. On this historic day I thank you for giving us hope. #SemST,@GOP Of course @HillaryClinton is silent. That insures she doesn't have 2 stand 4 ANYTHING & less chance of offending voters #SemST,False
473,Hillary Clinton,If you're not watching @HillaryClinton's speech right now you're missing her drop tons of wisdom. #SemST,"#BernieSanders is running for president, don't the #democrats already have one really old guy running?? #SemST",False
474,Hillary Clinton,@TheDemocrats @DNCWomen I'm down to help for 2016 election. #SemST,@HillaryClinton I notice that you didn't condemn the lawless actions of the rioters. #SemST,False


### SemEval 2019

### data quite noisy (RTs with unclear original tweet, deleted/removed tweets, etc), "implicit topics" also on recent events (Brexit, Charlie Hebdo)

### Should work if we get implicit topics explicit, for instance with clustering (now every line is its own topic)

In [68]:
#print(len(df_semeval2019t7_train_data))

#### Basic clustering of topics

In [69]:
#dataframe_dictionary = split_into_datasets_and_df(all_topic_dictionary, dataset_list)

#df_semeval2019t7_train_data = dataframe_dictionary["semeval2019t7_train"]

In [70]:
#from sentence_transformers import SentenceTransformer, util
#import time

In [71]:
#model = SentenceTransformer('all-MiniLM-L6-v2')

In [72]:
#corpus_embeddings = model.encode(df_semeval2019t7_train_data["topic"], batch_size=64, show_progress_bar=True, convert_to_tensor=True)


In [73]:
#print("Start clustering")
#start_time = time.time()

#Two parameters to tune:
#min_cluster_size: Only consider cluster that have at least 10 elements
#threshold: Consider sentence pairs with a cosine-similarity larger than threshold as similar
#clusters = util.community_detection(corpus_embeddings, min_community_size=10, threshold=0.55)

#print("Clustering done after {:.2f} sec".format(time.time() - start_time))

In [74]:
# #Print for all clusters the top 3 and bottom 3 elements
# cluster_dict = {}

# for i, cluster in enumerate(clusters):
#     cluster_dict[i+1] = list()
#     for sentence_id in cluster:
#         cluster_dict[i+1].append(df_semeval2019t7_train_data["topic"][sentence_id])
#     print("\nCluster {}, #{} Elements ".format(i+1, len(cluster)))
#     for sentence_id in cluster[0:3]:
#         print("\t", df_semeval2019t7_train_data["topic"][sentence_id])
#     # df_semeval2019t7_train_data["topic"] = 
#     print("\t", "...")
#     for sentence_id in cluster[-3:]:
#         print("\t", df_semeval2019t7_train_data["topic"][sentence_id])

In [75]:
# from collections import Counter

# df_semeval2019t7_train_data["new_topic"] = df_semeval2019t7_train_data["topic"]

# for i, cluster in enumerate(clusters):
#     data_set = cluster_dict[i+1]
  
#     stopwords = ["i", "in", "@rt_com", "it", "the", "a", "them", "they", "in", "an", "you're", "was", "and", "this", "we", "can", "you", "your", "are", "at", "that", "on", "@", "the", "of", "for", "not", "is", "a", "have", "all", "by", "with", "to", "they", "has", "be", "so", "i", "him", "he", "she"]
#     count = Counter(word.lower() for sentence in data_set for word in str(sentence).strip('(@\w+.*?)').split() if word not in stopwords)
    
#     topic = [pair[0] for pair in count.most_common(5)]
#     print(topic)
#     for sentence_id in cluster:
#         df_semeval2019t7_train_data["new_topic"][sentence_id] = " ".join(topic)

In [76]:
# print(df_semeval2019t7_train_data["new_topic"].value_counts()[:30])

In [77]:
#print(len(df_semeval2019t7_train_data))

In [78]:
#df_semeval2019t7_train_data["topic"].nunique()

In [79]:
#semeval2019t7_dissimilarPairs = make_dissimilar_pair_dataset(df_semeval2019t7_train_data)
#semeval2019t7_dissimilarPairs[:2]

In [80]:
#semeval2019t7_similarPairs = make_similar_pair_dataset(df_semeval2019t7_train_data)
#semeval2019t7_similarPairs[:2]

In [81]:
#sameside_semeval2019t7 = semeval2019t7_similarPairs.append(semeval2019t7_dissimilarPairs)
#sameside_semeval2019t7[:2]

### Snopes

In [82]:
#snopes_dissimilarPairs = make_dissimilar_pair_dataset(df_snopes_train_data)
#snopes_dissimilarPairs[:5]

In [83]:
#snopes_similarPairs = make_similar_pair_dataset(df_snopes_train_data)
#snopes_similarPairs[:2]

In [84]:
#sameside_snopes = snopes_similarPairs.append(snopes_dissimilarPairs)
#sameside_snopes[:3]

### ArgMin

In [85]:
argmin_dissimilarPairs = make_dissimilar_pair_dataset(df_argmin_train_data)
argmin_dissimilarPairs[:2]

Unnamed: 0,topic,text1,text2,label
0,marijuana legalization,"Eight people or groups turned in arguments for the "" pro "" side 's six pages in the publication , urging voters to approve the Regulation and Taxation of Marijuana Act ( RTMA ) .",She likens it to making alcohol or cigarettes ten times stronger .,False
1,marijuana legalization,Research in some of the 25 states where medical marijuana is legal has found a possible protective effect against opioid overdose deaths .,"Although some studies have been disputed , marijuana abuse has been tied to brain damage , cancer , lung damage , depression , amotivational syndrome , and even death .",False


In [86]:
argmin_similarPairs = make_similar_pair_dataset(df_argmin_train_data)
argmin_similarPairs

Unnamed: 0,topic,text1,text2,label
0,marijuana legalization,She likens it to making alcohol or cigarettes ten times stronger .,"There is no doubt that if marijuana were legalized , more people , including juveniles , would consume it .",True
1,marijuana legalization,"Although some studies have been disputed , marijuana abuse has been tied to brain damage , cancer , lung damage , depression , amotivational syndrome , and even death .",It 's even more toxic than cigarette smoke .,True
2,marijuana legalization,Would you want to live in a neighborhood filled with people who regularly smoke marijuana ?,"It is reaffirmed by several studies that have linked marijuana use to criminal behavior , unemployment , lower incomes , greater welfare dependence , and lower life satisfaction .",True
3,marijuana legalization,"The people pushing it — the business people who want to sell it , and the political hacks who want to tax it — are deliberately misleading you .","Accordingly , federal law , representing the considered judgment of medical science and the nation ’s two political branches of government , takes the unequivocal position that marijuana is danger...",True
4,marijuana legalization,The brain damage has been shown to cause memory loss and difficulty in problem solving .,Public places like bars would expose innocent patrons .,True
...,...,...,...,...
3416,cloning,Potentially hastens recovery from traumatic injury .,RESOLVE ’s position on stem cell research and cloning does not mandate the destruction of embryos .,True
3417,cloning,"A cure for baldness - From one of our readers : "" But how about the possibility of using cloning technology to get more hair on a balding scalp .","Alan and Kristine Wolf paid thousands of dollars to have their deceased cat , Spot , cloned from skin cells they had preserved .",True
3418,cloning,"In my view , which I ’ve defended in Humanity Enhanced and other publications , human cloning would not be a seriously worrying action if we could carry it out safely .","Because their happiness will depend largely on how we receive them , we need only adjust our attitude to be more receptive .",True
3419,cloning,George Wright takes this idea to an extreme length by suggesting that reproductive cloning would actually promote human dignity by proving the inaccuracy of genetic determinism .,Is it not infinitely better for the human race to be perfectly adapted to its environment than to struggle along hoping the next random sexual union will produce a wonder child ?,True


In [87]:
sameside_argmin = argmin_similarPairs.append(argmin_dissimilarPairs)
sameside_argmin

  sameside_argmin = argmin_similarPairs.append(argmin_dissimilarPairs)


Unnamed: 0,topic,text1,text2,label
0,marijuana legalization,She likens it to making alcohol or cigarettes ten times stronger .,"There is no doubt that if marijuana were legalized , more people , including juveniles , would consume it .",True
1,marijuana legalization,"Although some studies have been disputed , marijuana abuse has been tied to brain damage , cancer , lung damage , depression , amotivational syndrome , and even death .",It 's even more toxic than cigarette smoke .,True
2,marijuana legalization,Would you want to live in a neighborhood filled with people who regularly smoke marijuana ?,"It is reaffirmed by several studies that have linked marijuana use to criminal behavior , unemployment , lower incomes , greater welfare dependence , and lower life satisfaction .",True
3,marijuana legalization,"The people pushing it — the business people who want to sell it , and the political hacks who want to tax it — are deliberately misleading you .","Accordingly , federal law , representing the considered judgment of medical science and the nation ’s two political branches of government , takes the unequivocal position that marijuana is danger...",True
4,marijuana legalization,The brain damage has been shown to cause memory loss and difficulty in problem solving .,Public places like bars would expose innocent patrons .,True
...,...,...,...,...
3125,cloning,RESOLVE ’s position on stem cell research and cloning does not mandate the destruction of embryos .,"Besides the religious arguments , however , there are a number of ethic objections .",False
3126,cloning,"Alan and Kristine Wolf paid thousands of dollars to have their deceased cat , Spot , cloned from skin cells they had preserved .",The basis for this justification is that reproductive human cloning necessarily infringes notions of human dignity .,False
3127,cloning,"Because their happiness will depend largely on how we receive them , we need only adjust our attitude to be more receptive .",( b ) There is no medical need for cloning .,False
3128,cloning,Is it not infinitely better for the human race to be perfectly adapted to its environment than to struggle along hoping the next random sexual union will produce a wonder child ?,QUIT CLONING TO SAVE LIVES .,False


### SCD 

In [88]:
scd_dissimilarPairs = make_dissimilar_pair_dataset(df_scd_train_data)
scd_dissimilarPairs

Unnamed: 0,topic,text1,text2,label
0,Marijuana,"It is more likely that if something is illegal, because it is unregulated, standards may slip. Yes, it is possible that a blackmarket clinic may be actually safer than some legal clinics, but beca...",Weeds and other plants have no soul o who cares if they die as long as there is enough so that we can survive i think it is wrong to kill and animal for the fun of it. A weed is not an animal be...,False
1,Marijuana,"//It is a human embryo, which are living cells, just like you, except smaller, because you are made up of tiny cells.// I lol'd when I read this. Haha...""except smaller"". Believe me, an embryo is...","I think it should be illegal because when you extend your reasoning, condomns are then also responsible for preventing fertilisation and thus the possibility of life. But more to the point, just b...",False
2,Marijuana,actually prostitution isn't illegal at the federal level (with some small exceptions ref ) and there aren't any federal or state laws left that ban suicide ref and many libertarians and liberal...,I can see what you're saying and i am in partial agreement but i still think it shouldn't be legal at all. our Nation should not support such a decision made by a citizen. if it becomes legal then...,False
3,Marijuana,Then why are you even debating a topic that you fail to research on? I did research it. Legal rulings has nothing to do with politics. If that was the case then why is political science differ...,"Really? Then if it's about a woman's right to control her own body, then why is it that the laws forbid her from controlling her own body when it comes to prostitution or the use of drugs? How com...",False
4,Marijuana,"Well, once it is a child, it already is illegal to kill it. Before the first tri-mester it's a cluster of cells with no more self-awareness than a kitchen table. So that wouldn't be ""killing a c...","They don't use, the only word I can think to describe them, tongs. They use a vacuum now. A vacuum? oh that's alright then. Much less barbaric ;) it's a burden If having a child would be bur...",False
...,...,...,...,...
691,Gay,"It seems that the only real argument against gay marriage is because the bible says it is wrong. That is your personal belief and your personal opinion, don't force it on to others. The bible says...","Don't get me wrong, I'm not a huge fan of homosexuality but that doesn't really matter. If they wan't to have a life together that's they're business. But I don't think marriage should be re-def...",False
692,Gay,"Well thanks for the advice straight boy but quite frankly, you don't know your ass from your elbow when it comes to gay rights, what we want or what we deserve as human beings. It's not the word t...",there is no such thing as gay rights There most certainly is. everyone has the same rights no one has different rights than others. So why on Earth are you arguing against gay marriage (you'...,False
693,Gay,"I have already explained to you that is it unsafe sex practices, not homosexuality itself, that causes this, and I used factual evidence from Africa to back it up. Why are you still using this arg...",Well the fact that it's wrong to be gay in the first place... ...why would you want there to be marriage? The fact that marriage is a religious ceremony.. ...why would you bring something so im...,False
694,Gay,"You seem to be mistaking ""tradition"" for ""monopoly"". (I assume) You don't mind Muslims marrying and they certainly aren't Christian. And if you can be bothered to check the bible, there is nothin...","Before I begin, the following reminder is in order : In biology, evolution is change in the inherited traits of a population of organisms from one generation to the next. These changes are caused...",False


In [89]:
scd_similarPairs = make_similar_pair_dataset(df_scd_train_data)
scd_similarPairs[:1]

Unnamed: 0,topic,text1,text2,label
0,Marijuana,Weeds and other plants have no soul o who cares if they die as long as there is enough so that we can survive i think it is wrong to kill and animal for the fun of it. A weed is not an animal be...,It's arbitrary because all I have to do is change the law to suite my preference. Which by the way is something that is actively being done with the age of consent. And this is exactly what is bei...,True


In [90]:
sameside_scd = scd_similarPairs.append(scd_dissimilarPairs)
sameside_scd[:3]

  sameside_scd = scd_similarPairs.append(scd_dissimilarPairs)


Unnamed: 0,topic,text1,text2,label
0,Marijuana,Weeds and other plants have no soul o who cares if they die as long as there is enough so that we can survive i think it is wrong to kill and animal for the fun of it. A weed is not an animal be...,It's arbitrary because all I have to do is change the law to suite my preference. Which by the way is something that is actively being done with the age of consent. And this is exactly what is bei...,True
1,Marijuana,"I think it should be illegal because when you extend your reasoning, condomns are then also responsible for preventing fertilisation and thus the possibility of life. But more to the point, just b...","This has nothing to do about their actions. Listen, they will be confronted and dealt with in the end, not my problem. I do not want to control them from being together, I honestly don't care. How...",True
2,Marijuana,I can see what you're saying and i am in partial agreement but i still think it shouldn't be legal at all. our Nation should not support such a decision made by a citizen. if it becomes legal then...,"Yes, it's no big deal as in the world won't end or society won't collapse like you said, but that's the same as a waiter bringing me an orange juice (OJ) when I asked him for a lemonade and explai...",True


In [91]:
sameside_scd[sameside_scd.topic == "Gay"][:5]

Unnamed: 0,topic,text1,text2,label
229,Gay,"During the Holocaust, approximately 11 million people were killed between 1933 till 1945. These people include Jews, homosexuals, Slavics (Russians, Poles, and others), Gypsies, mentally and physi...","I felt free to down vote that response. I hate getting opposed responses for things I can hardly remember anyways, I would appreciated if you would not use such bad language when talking to me. ...",True
230,Gay,Sex is a deviation if it's not for the purpose of child-making? I never said it's sole purpose was procreation. The core question is one of anthropology; does man have a nature and is that natu...,Well IF science 'says' that your fate is decided at birth then I'm going to say it how it is. Bullshit. Science only points to an increase likelyhood of being gay. Never 100% definately going to...,True
231,Gay,"No, you are way out in left field. Stop the red herrings and keep it to the topic. What does Hitler have to do with abortions? As I said earlier, one of two commandments to adhere to as a Christia...","Prove that homosexuality is natural. There is no logical reason to pain pictures - in a way, there is. It shows future generations what the world looks (or rather, looked) like, primarily before ...",True
232,Gay,"Well we've covered a lot of ground. Let me see if I can tie it all together for you; knowing that there is a difference between proof and persuasion. What do abortion, premarital sex, homosexuali...","Are we fighting for the rights of gays to be able to marry? yes Or are we fighting for the rights for gays to use the word marriage? why What is more important , a word , or the rights of gays ....",True
233,Gay,"âFor You created my inmost being;You knit me together in my mother's womb. I praise You because I am fearfully and wonderfully made; Your works are wonderful, I know that full well. My frame was...",Your first argument compares two things that are not related; this is a fallacy of faulty analogy. The ability to fabricate an aircraft does not evidence an ability to raise and love a child. The ...,True


In [92]:
sameside_scd[sameside_scd.topic == "Abortion"][:5]

Unnamed: 0,topic,text1,text2,label
8,Abortion,I donât care to debate nor discuss your convoluted justifications for destroying a human fetus or embryo. Don't abstract a fragment of a statement for your propaganda. Pick and choose what y...,"As a woman who does not hold a religious stance on this but rather a personal experience stance, I think my argument holds a lot of water. I had an abortion. I regret it deeply. I am older now a...",True
9,Abortion,yes abortion should definetly be banned because if you look at it from court/police type of view then you could definetly call it murder if you abort something beause the actual definition of murd...,That scenario of the endangered mother is rare. The vast majority of abortions are performed on healthy women and children. You don't make laws based on exceptions to the norm. The right to life ...,True
10,Abortion,"I don't think that is an age where you necessarily fully understand consequences, vies change when people get older. It's that time when everyone wants to have fun. As for adoption, the baby is g...","Pregnancy is a pain in the ass I would be willing to go through tremendous pangs to save a life, as would many others. Why else do soldiers get injured/killed trying to save people from minefiel...",True
11,Abortion,"By the end of the 1st trimester a baby . . . has started to develop blood vessels, brain, spinal cord, nervous system, eyes, ears, arms, legs, mouth, nose, thyroid gland, tongue, color of retina...","I'm against abortion! An abortion can result in medical complications later in life. In the future, woman who did abortion may never be a mother, even if she really want it. Another reason is that...",True
12,Abortion,So are you are saying that because she wants an abortion she should have it? Then why are you even against abortion? We are talking about a life here not just a fetus. I believe that everyone has ...,"Im not a Dr. but I do believe a fetus is a human. Aborting it is playing gods role. With that said, look back at your past. Many of us have been in situations where a baby would be unbearable, fac...",True


In [93]:
print(len(sameside_scd[sameside_scd.topic == "Gay"]))

588


In [94]:
print(len(sameside_scd[sameside_scd.topic == "Abortion"]))

988


### Output new benchmarkdata as csv

In [95]:
SameSideStance_alltrainingdata_dict["arc_train"] = sameside_arc 
SameSideStance_alltrainingdata_dict["ibmcs_train"] = sameside_ibmcs
SameSideStance_alltrainingdata_dict["perspectrum_train"] = sameside_perspectrum
#SameSideStance_alltrainingdata_dict["fnc1_train"] = sameside_fnc1
SameSideStance_alltrainingdata_dict["iac1_train"] = sameside_iac1
SameSideStance_alltrainingdata_dict["semeval2016t6_train"] = sameside_semeval2016t6
#SameSideStance_alltrainingdata_dict["semeval2019t7_train"] = sameside_semeval2019t7
#SameSideStance_alltrainingdata_dict["snopes_train"] = sameside_snopes
SameSideStance_alltrainingdata_dict["argmin_train"] = sameside_argmin
SameSideStance_alltrainingdata_dict["scd_train"] = sameside_scd

In [99]:
print(len(SameSideStance_alltrainingdata_dict["iac1_train"]["topic"])) 

3110


In [73]:
pickle_out = open('StanceBenchmarkSameSide_allTrainingDatasets_nopreprocessing.pickle', 'wb')
pickle.dump(SameSideStance_alltrainingdata_dict, pickle_out)
pickle_out.close()
