In [1]:
import sklearn
from sklearn import metrics
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn import svm

In [2]:
pd.set_option('display.float_format', lambda x: '%.5f' % x)
pd.set_option('max_colwidth', 1000)

In [3]:
file_train = '/Users/myrthereuver/Documents/GitHub/argmining2022/TaskA_train.csv'

In [4]:
file_dev = '/Users/myrthereuver/Documents/GitHub/argmining2022/TaskA_dev.csv'

In [5]:
file_test = '/Users/myrthereuver/Documents/GitHub/argmining2022/TaskA_test.csv'

### Importing data

In [6]:
def create_df(input_file):
    from nltk.stem.snowball import SnowballStemmer
    stemmer = SnowballStemmer(language='english')
    
    df_current = pd.read_csv(input_file)
    
    # changing neutral to 1, as specified in the paper and data
    df_current.loc[df_current["Validity"] == 0, "Validity"] = 1
    df_current.loc[df_current["Novelty"] == 0, "Novelty"] = 1
    
    # adding the premise and conclusion to one text
    df_current["examples"] = df_current["Premise"]+" AND "+df_current["Conclusion"]
    df_current["examples_topic"] = df_current["topic"]+ " AND "+df_current["Premise"]+" AND "+df_current["Conclusion"]
    
    # stemming
    df_current["stemmed"] = df_current["examples"].apply(lambda row: " ".join([stemmer.stem(w) for w in row.split(" ")]))
    df_current["stemmed_topic"] = df_current["examples_topic"].apply(lambda row: " ".join([stemmer.stem(w) for w in row.split(" ")]))
    return df_current

In [7]:
data_train = create_df(file_train)
data_dev = create_df(file_dev)
data_test = create_df(file_test)

In [8]:
data_train["Novelty"].value_counts()

-1    595
 1    155
Name: Novelty, dtype: int64

In [9]:
print(len(data_train))

750


In [10]:
data_train["topic"].value_counts()

Trying 9/11 terror suspects in NYC courts             90
Trying terrorist suspects in civilian courts          85
US health care reform                                 80
US offshore oil drilling                              70
US and NATO intervention in Libya                     45
Torture                                               45
US-Indian nuclear deal                                35
Two-state solution to Israeli-Palestinian conflict    35
United Nations Standing Army                          30
Turkey EU membership                                  30
Unilateral US military strike inside Pakistan         25
UN Security Council veto                              25
Using sanctions to end child labor                    20
US debt ceiling deal                                  20
Twin Towers reconstruction                            20
US electoral college                                  15
Two-party system                                      15
Underground nuclear waste stora

In [11]:
print(len(data_dev))

202


In [12]:
print(len(data_test))

520


### SVM classifier

#### Pipeline

In [13]:
text_clf_hyperparam = Pipeline([
    ('vect', CountVectorizer()),
    ('tfidf', TfidfTransformer()),
    ('clf', svm.LinearSVC(C=0.09)),
     ])

In [14]:
def results_model(model, validation, label, version_text):
    from sklearn import metrics

    predicted = model.predict(validation[version_text])
  
    print(metrics.classification_report(validation[label], predicted, digits=4))
    accuracy = np.mean(predicted == validation[label])
    return predicted

### Validity, Stemmed

In [15]:
model_val = text_clf_hyperparam.fit(data_train.stemmed, data_train.Validity)

In [16]:
pred_val = results_model(model_val, data_dev, "Validity", "stemmed")

              precision    recall  f1-score   support

          -1     0.6053    0.3108    0.4107        74
           1     0.6890    0.8828    0.7740       128

    accuracy                         0.6733       202
   macro avg     0.6471    0.5968    0.5923       202
weighted avg     0.6583    0.6733    0.6409       202



In [17]:
pred_val_test = results_model(model_val, data_test, "Validity", "stemmed")

              precision    recall  f1-score   support

          -1     0.4211    0.4660    0.4424       206
           1     0.6233    0.5796    0.6007       314

    accuracy                         0.5346       520
   macro avg     0.5222    0.5228    0.5215       520
weighted avg     0.5432    0.5346    0.5380       520



### Validity, Topic+stemmed

In [18]:
model_val_top = text_clf_hyperparam.fit(data_train.stemmed_topic, data_train.Validity)

In [19]:
pred_val_top = results_model(model_val_top, data_dev, "Validity", "stemmed_topic")

              precision    recall  f1-score   support

          -1     0.5263    0.2703    0.3571        74
           1     0.6707    0.8594    0.7534       128

    accuracy                         0.6436       202
   macro avg     0.5985    0.5648    0.5553       202
weighted avg     0.6178    0.6436    0.6083       202



In [20]:
pred_val_topic_test = results_model(model_val_top, data_test, "Validity", "stemmed_topic")

              precision    recall  f1-score   support

          -1     0.4382    0.5340    0.4814       206
           1     0.6431    0.5510    0.5935       314

    accuracy                         0.5442       520
   macro avg     0.5407    0.5425    0.5374       520
weighted avg     0.5620    0.5442    0.5491       520



In [21]:
text_clf_hyperparam_nov = Pipeline([
    ('vect', CountVectorizer()),
    ('tfidf', TfidfTransformer()),
    ('clf', svm.LinearSVC(C=4.7)),
     ])

### Novelty, Stemmed

In [22]:
model_nov = text_clf_hyperparam_nov.fit(data_train.stemmed, data_train.Novelty)

In [23]:
pred_nov = results_model(model_nov, data_dev, "Novelty", "stemmed")

              precision    recall  f1-score   support

          -1     0.5787    0.9661    0.7238       118
           1     0.2000    0.0119    0.0225        84

    accuracy                         0.5693       202
   macro avg     0.3893    0.4890    0.3731       202
weighted avg     0.4212    0.5693    0.4322       202



In [24]:
pred_nov_test = results_model(model_nov, data_test, "Novelty", "stemmed")

              precision    recall  f1-score   support

          -1     0.5714    0.9796    0.7218       294
           1     0.6250    0.0442    0.0826       226

    accuracy                         0.5731       520
   macro avg     0.5982    0.5119    0.4022       520
weighted avg     0.5947    0.5731    0.4440       520



### Novelty, Topic+stemmed

In [25]:
model_nov_top = text_clf_hyperparam_nov.fit(data_train.stemmed_topic, data_train.Novelty)

In [26]:
pred_nov_top = results_model(model_nov_top, data_dev, "Novelty", "stemmed_topic")

              precision    recall  f1-score   support

          -1     0.5821    0.9915    0.7335       118
           1     0.0000    0.0000    0.0000        84

    accuracy                         0.5792       202
   macro avg     0.2910    0.4958    0.3668       202
weighted avg     0.3400    0.5792    0.4285       202



In [27]:
pred_nov_top_test = results_model(model_nov_top, data_test, "Novelty", "stemmed_topic")

              precision    recall  f1-score   support

          -1     0.5673    0.9898    0.7212       294
           1     0.5714    0.0177    0.0343       226

    accuracy                         0.5673       520
   macro avg     0.5693    0.5037    0.3778       520
weighted avg     0.5691    0.5673    0.4227       520



### eval on Shared Task organizer's metrics

In [28]:
from typing import Dict

import numpy

def val_nov_metric(is_validity: numpy.ndarray, should_validity: numpy.ndarray, is_novelty: numpy.ndarray, should_novelty: numpy.ndarray) -> Dict[str, float]:
    ret = dict()

    ret_base_help = {
        "true_positive_validity": numpy.sum(numpy.where(
            numpy.all(numpy.stack([is_validity >= .5, should_validity >= .5]), axis=0),
            1, 0)),
        "true_negative_validity": numpy.sum(numpy.where(
            numpy.all(numpy.stack([is_validity < .5, should_validity < .5]), axis=0),
            1, 0)),
        "true_positive_novelty": numpy.sum(numpy.where(
            numpy.all(numpy.stack([is_novelty >= .5, should_novelty >= .5]), axis=0),
            1, 0)),
        "true_negative_novelty": numpy.sum(numpy.where(
            numpy.all(numpy.stack([is_novelty < .5, should_novelty < .5]), axis=0),
            1, 0)),
        "true_positive_valid_novel": numpy.sum(numpy.where(
            numpy.all(numpy.stack([is_validity >= .5, is_novelty >= .5,
                                   should_validity >= .5, should_novelty >= .5]), axis=0),
            1, 0)),
        "true_positive_nonvalid_novel": numpy.sum(numpy.where(
            numpy.all(numpy.stack([is_validity < .5, is_novelty >= .5,
                                   should_validity < .5, should_novelty >= .5]), axis=0),
            1, 0)),
        "true_positive_valid_nonnovel": numpy.sum(numpy.where(
            numpy.all(numpy.stack([is_validity >= .5, is_novelty < .5,
                                   should_validity >= .5, should_novelty < .5]), axis=0),
            1, 0)),
        "true_positive_nonvalid_nonnovel": numpy.sum(numpy.where(
            numpy.all(numpy.stack([is_validity < .5, is_novelty < .5,
                                   should_validity < .5, should_novelty < .5]), axis=0),
            1, 0)),
        "classified_positive_validity": numpy.sum(numpy.where(is_validity >= .5, 1, 0)),
        "classified_negative_validity": numpy.sum(numpy.where(is_validity < .5, 1, 0)),
        "classified_positive_novelty": numpy.sum(numpy.where(is_novelty >= .5, 1, 0)),
        "classified_negative_novelty": numpy.sum(numpy.where(is_novelty < .5, 1, 0)),
        "classified_positive_valid_novel": numpy.sum(numpy.where(
            numpy.all(numpy.stack([is_validity >= .5, is_novelty >= .5]), axis=0),
            1, 0)),
        "classified_positive_nonvalid_novel": numpy.sum(numpy.where(
            numpy.all(numpy.stack([is_validity < .5, is_novelty >= .5]), axis=0),
            1, 0)),
        "classified_positive_valid_nonnovel": numpy.sum(numpy.where(
            numpy.all(numpy.stack([is_validity >= .5, is_novelty < .5]), axis=0),
            1, 0)),
        "classified_positive_nonvalid_nonnovel": numpy.sum(numpy.where(
            numpy.all(numpy.stack([is_validity < .5, is_novelty < .5]), axis=0),
            1, 0)),
        "indeed_positive_validity": numpy.sum(numpy.where(should_validity >= .5, 1, 0)),
        "indeed_negative_validity": numpy.sum(numpy.where(should_validity < .5, 1, 0)),
        "indeed_positive_novelty": numpy.sum(numpy.where(should_novelty >= .5, 1, 0)),
        "indeed_negative_novelty": numpy.sum(numpy.where(should_novelty < .5, 1, 0)),
        "indeed_positive_valid_novel": numpy.sum(numpy.where(
            numpy.all(numpy.stack([should_validity >= .5, should_novelty >= .5]), axis=0),
            1, 0)),
        "indeed_positive_nonvalid_novel": numpy.sum(numpy.where(
            numpy.all(numpy.stack([should_validity < .5, should_novelty >= .5]), axis=0),
            1, 0)),
        "indeed_positive_valid_nonnovel": numpy.sum(numpy.where(
            numpy.all(numpy.stack([should_validity >= .5, should_novelty < .5]), axis=0),
            1, 0)),
        "indeed_positive_nonvalid_nonnovel": numpy.sum(numpy.where(
            numpy.all(numpy.stack([should_validity < .5, should_novelty < .5]), axis=0),
            1, 0)),
    }

    ret_help = {
        "precision_validity": ret_base_help["true_positive_validity"] /
                              max(1, ret_base_help["classified_positive_validity"]),
        "precision_novelty": ret_base_help["true_positive_novelty"] /
                             max(1, ret_base_help["classified_positive_novelty"]),
        "recall_validity": ret_base_help["true_positive_validity"] /
                           max(1, ret_base_help["indeed_positive_validity"]),
        "recall_novelty": ret_base_help["true_positive_novelty"] /
                          max(1, ret_base_help["indeed_positive_novelty"]),
        "precision_val_neg": ret_base_help["true_negative_validity"] /
                              max(1, ret_base_help["classified_negative_validity"]),
        "precision_nov_neg": ret_base_help["true_negative_novelty"] /
                             max(1, ret_base_help["classified_negative_novelty"]),
        "recall_val_neg": ret_base_help["true_negative_validity"] /
                           max(1, ret_base_help["indeed_negative_validity"]),
        "recall_nov_neg": ret_base_help["true_negative_novelty"] /
                          max(1, ret_base_help["indeed_negative_novelty"]),
        "precision_valid_novel": ret_base_help["true_positive_valid_novel"] /
                                 max(1, ret_base_help["classified_positive_valid_novel"]),
        "precision_valid_nonnovel": ret_base_help["true_positive_valid_nonnovel"] /
                                    max(1, ret_base_help["classified_positive_valid_nonnovel"]),
        "precision_nonvalid_novel": ret_base_help["true_positive_nonvalid_novel"] /
                                    max(1, ret_base_help["classified_positive_nonvalid_novel"]),
        "precision_nonvalid_nonnovel": ret_base_help["true_positive_nonvalid_nonnovel"] /
                                       max(1, ret_base_help["classified_positive_nonvalid_nonnovel"]),
        "recall_valid_novel": ret_base_help["true_positive_valid_novel"] /
                              max(1, ret_base_help["indeed_positive_valid_novel"]),
        "recall_valid_nonnovel": ret_base_help["true_positive_valid_nonnovel"] /
                                 max(1, ret_base_help["indeed_positive_valid_nonnovel"]),
        "recall_nonvalid_novel": ret_base_help["true_positive_nonvalid_novel"] /
                                 max(1, ret_base_help["indeed_positive_nonvalid_novel"]),
        "recall_nonvalid_nonnovel": ret_base_help["true_positive_nonvalid_nonnovel"] /
                                    max(1, ret_base_help["indeed_positive_nonvalid_nonnovel"])
    }

    ret.update({
        "f1_validity": 2 * ret_help["precision_validity"] * ret_help["recall_validity"] / max(1e-4, ret_help["precision_validity"] + ret_help["recall_validity"]),
        "f1_novelty": 2 * ret_help["precision_novelty"] * ret_help["recall_novelty"] / max(1e-4, ret_help["precision_novelty"] + ret_help["recall_novelty"]),
        "f1_val_neg": 2 * ret_help["precision_val_neg"] * ret_help["recall_val_neg"] / max(1e-4, ret_help["precision_val_neg"] + ret_help["recall_val_neg"]),
        "f1_nov_neg": 2 * ret_help["precision_nov_neg"] * ret_help["recall_nov_neg"] / max(1e-4, ret_help["precision_nov_neg"] + ret_help["recall_nov_neg"]),
        "f1_valid_novel": 2 * ret_help["precision_valid_novel"] * ret_help["recall_valid_novel"] / max(1e-4, ret_help["precision_valid_novel"] + ret_help["recall_valid_novel"]),
        "f1_valid_nonnovel": 2 * ret_help["precision_valid_nonnovel"] * ret_help["recall_valid_nonnovel"] / max(1e-4, ret_help["precision_valid_nonnovel"] + ret_help["recall_valid_nonnovel"]),
        "f1_nonvalid_novel": 2 * ret_help["precision_nonvalid_novel"] * ret_help["recall_nonvalid_novel"] / max(1e-4, ret_help["precision_nonvalid_novel"] + ret_help["recall_nonvalid_novel"]),
        "f1_nonvalid_nonnovel": 2 * ret_help["precision_nonvalid_nonnovel"] * ret_help["recall_nonvalid_nonnovel"] / max(1e-4, ret_help["precision_nonvalid_nonnovel"] + ret_help["recall_nonvalid_nonnovel"])
    })

    ret.update({
        "f1_val_macro": (ret["f1_validity"] + ret["f1_val_neg"])/2,
        "f1_nov_macro": (ret["f1_novelty"] + ret["f1_nov_neg"])/2,
        "f1_macro": (ret["f1_valid_novel"]+ret["f1_valid_nonnovel"]+ret["f1_nonvalid_novel"]+ret["f1_nonvalid_nonnovel"])/4
    })

    return ret

### Dev

In [29]:
val_nov_metric(pred_val, np.array(data_dev["Validity"]), pred_nov, np.array(data_dev["Novelty"]))

{'f1_validity': 0.773972602739726,
 'f1_novelty': 0.02247191011235955,
 'f1_val_neg': 0.4107142857142857,
 'f1_nov_neg': 0.7238095238095238,
 'f1_valid_novel': 0.0,
 'f1_valid_nonnovel': 0.6048387096774194,
 'f1_nonvalid_novel': 0.0,
 'f1_nonvalid_nonnovel': 0.417910447761194,
 'f1_val_macro': 0.5923434442270059,
 'f1_nov_macro': 0.37314071696094164,
 'f1_macro': 0.25568728935965335}

### test

In [30]:
val_nov_metric(pred_val_test, np.array(data_test["Validity"]), pred_nov_test, np.array(data_test["Novelty"]))

{'f1_validity': 0.6006600660066006,
 'f1_novelty': 0.08264462809917357,
 'f1_val_neg': 0.4423963133640553,
 'f1_nov_neg': 0.7218045112781954,
 'f1_valid_novel': 0.06896551724137931,
 'f1_valid_nonnovel': 0.4598698481561822,
 'f1_nonvalid_novel': 0.020618556701030924,
 'f1_nonvalid_nonnovel': 0.27299703264094954,
 'f1_val_macro': 0.521528189685328,
 'f1_nov_macro': 0.40222456968868453,
 'f1_macro': 0.2056127386848855}

### Topic stats

In [31]:
print(set(data_train["topic"]))

{'Two-party system', 'Trying terrorist suspects in civilian courts', 'UN Security Council veto', 'US health care reform', 'Torture', 'US-Indian nuclear deal', 'Underground nuclear waste storage', 'US electoral college', 'US debt ceiling deal', 'Twin Towers reconstruction', 'TV viewing is harmful to children', 'U.S. Withdrawal From the United Nations', 'Unilateral US military strike inside Pakistan', 'Two-state solution to Israeli-Palestinian conflict', 'Using sanctions to end child labor', 'Turkey EU membership', 'Trying 9/11 terror suspects in NYC courts', 'US and NATO intervention in Libya', 'United Nations Standing Army', 'Trade vs aid', 'US offshore oil drilling', 'United Nations No Growth Budgets'}


In [32]:
print(set(data_dev["topic"]))

{'War on Drugs', 'Video surveillance', 'Zoos', 'Vegetarianism', 'Wind energy', 'Wave power', 'Yucca Mountain nuclear waste repository', 'Warrantless wiretapping in the United States'}


In [33]:
print(set(data_test["topic"]))

{'Withdrawing from Iraq', 'War on Drugs', 'Zoos', 'Warrantless wiretapping in the United States', 'Year-round school', 'Zero tolerance law', 'Was the War in Iraq worth it?', 'Veal', 'Vegetarianism', 'Wind energy', 'Water privatization', 'Video games', 'Wave power', 'Yucca Mountain nuclear waste repository', 'Video surveillance'}


In [34]:
print(set(data_dev["topic"]).symmetric_difference(set(data_test["topic"])))

{'Withdrawing from Iraq', 'Year-round school', 'Zero tolerance law', 'Was the War in Iraq worth it?', 'Veal', 'Water privatization', 'Video games'}


In [35]:
data_dev.loc[data_dev['topic'] == "Two-party system"]

Unnamed: 0,topic,Premise,Conclusion,Validity,Validity-Confidence,Novelty,Novelty-Confidence,examples,examples_topic,stemmed,stemmed_topic


In [36]:
data_train["topic"].value_counts()

Trying 9/11 terror suspects in NYC courts             90
Trying terrorist suspects in civilian courts          85
US health care reform                                 80
US offshore oil drilling                              70
US and NATO intervention in Libya                     45
Torture                                               45
US-Indian nuclear deal                                35
Two-state solution to Israeli-Palestinian conflict    35
United Nations Standing Army                          30
Turkey EU membership                                  30
Unilateral US military strike inside Pakistan         25
UN Security Council veto                              25
Using sanctions to end child labor                    20
US debt ceiling deal                                  20
Twin Towers reconstruction                            20
US electoral college                                  15
Two-party system                                      15
Underground nuclear waste stora