In [1]:
import sklearn
from sklearn import metrics
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn import svm

In [2]:
pd.set_option('display.float_format', lambda x: '%.5f' % x)
pd.set_option('max_colwidth', 1000)

In [3]:
file_train = '/Users/myrthereuver/Documents/GitHub/argmining2022/TaskA_train.csv'

In [4]:
file_dev = '/Users/myrthereuver/Documents/GitHub/argmining2022/TaskA_dev.csv'

In [5]:
file_test = '/Users/myrthereuver/Documents/GitHub/argmining2022/TaskA_test.csv'

### Importing data

In [6]:
def create_df(input_file):
    from nltk.stem.snowball import SnowballStemmer
    stemmer = SnowballStemmer(language='english')
    
    df_current = pd.read_csv(input_file)
    
    # changing neutral to 1, as specified in the paper and data
    df_current.loc[df_current["Validity"] == 0, "Validity"] = 1
    df_current.loc[df_current["Novelty"] == 0, "Novelty"] = 1
    
    # adding the premise and conclusion to one text
    df_current["examples"] = df_current["Premise"]+" AND "+df_current["Conclusion"]
    df_current["examples_topic"] = df_current["topic"]+ " AND "+df_current["Premise"]+" AND "+df_current["Conclusion"]
    
    # stemming
    df_current["stemmed"] = df_current["examples"].apply(lambda row: " ".join([stemmer.stem(w) for w in row.split(" ")]))
    df_current["stemmed_topic"] = df_current["examples_topic"].apply(lambda row: " ".join([stemmer.stem(w) for w in row.split(" ")]))
    return df_current

In [7]:
data_train = create_df(file_train)
data_dev = create_df(file_dev)
data_test = create_df(file_test)

In [8]:
data_train["Novelty"].value_counts()

-1    595
 1    155
Name: Novelty, dtype: int64

In [9]:
print(len(data_train))

750


In [10]:
print(len(data_dev))

202


In [11]:
print(len(data_test))

520


### SVM classifier

#### Pipeline

In [12]:
text_clf_hyperparam = Pipeline([
    ('vect', CountVectorizer()),
    ('tfidf', TfidfTransformer()),
    ('clf', svm.LinearSVC(C=0.09)),
     ])

In [13]:
def results_model(model, validation, label, version_text):
    from sklearn import metrics

    predicted = model.predict(validation[version_text])
  
    print(metrics.classification_report(validation[label], predicted, digits=4))
    accuracy = np.mean(predicted == validation[label])
    return predicted

### Validity, Stemmed

In [14]:
model_val = text_clf_hyperparam.fit(data_train.stemmed, data_train.Validity)

In [15]:
pred_val = results_model(model_val, data_dev, "Validity", "stemmed")

              precision    recall  f1-score   support

          -1     0.6053    0.3108    0.4107        74
           1     0.6890    0.8828    0.7740       128

    accuracy                         0.6733       202
   macro avg     0.6471    0.5968    0.5923       202
weighted avg     0.6583    0.6733    0.6409       202



In [16]:
pred_val_test = results_model(model_val, data_test, "Validity", "stemmed")

              precision    recall  f1-score   support

          -1     0.4211    0.4660    0.4424       206
           1     0.6233    0.5796    0.6007       314

    accuracy                         0.5346       520
   macro avg     0.5222    0.5228    0.5215       520
weighted avg     0.5432    0.5346    0.5380       520



### Validity, Topic+stemmed

In [17]:
model_val_top = text_clf_hyperparam.fit(data_train.stemmed_topic, data_train.Validity)

In [18]:
pred_val_top = results_model(model_val_top, data_dev, "Validity", "stemmed_topic")

              precision    recall  f1-score   support

          -1     0.5263    0.2703    0.3571        74
           1     0.6707    0.8594    0.7534       128

    accuracy                         0.6436       202
   macro avg     0.5985    0.5648    0.5553       202
weighted avg     0.6178    0.6436    0.6083       202



In [19]:
pred_val_topic_test = results_model(model_val_top, data_test, "Validity", "stemmed_topic")

              precision    recall  f1-score   support

          -1     0.4382    0.5340    0.4814       206
           1     0.6431    0.5510    0.5935       314

    accuracy                         0.5442       520
   macro avg     0.5407    0.5425    0.5374       520
weighted avg     0.5620    0.5442    0.5491       520



In [20]:
text_clf_hyperparam2 = Pipeline([
    ('vect', CountVectorizer()),
    ('tfidf', TfidfTransformer()),
    ('clf', svm.LinearSVC(C=4.7)),
     ])

#### higher C is better for Novelty, but we will disregard that as we want one parameter for both models.

### Novelty, Stemmed

In [21]:
model_nov = text_clf_hyperparam.fit(data_train.stemmed, data_train.Novelty)

In [22]:
pred_nov = results_model(model_nov, data_dev, "Novelty", "stemmed")

              precision    recall  f1-score   support

          -1     0.5842    1.0000    0.7375       118
           1     0.0000    0.0000    0.0000        84

    accuracy                         0.5842       202
   macro avg     0.2921    0.5000    0.3687       202
weighted avg     0.3412    0.5842    0.4308       202



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [23]:
pred_nov_test = results_model(model_nov, data_test, "Novelty", "stemmed")

              precision    recall  f1-score   support

          -1     0.5654    1.0000    0.7224       294
           1     0.0000    0.0000    0.0000       226

    accuracy                         0.5654       520
   macro avg     0.2827    0.5000    0.3612       520
weighted avg     0.3197    0.5654    0.4084       520



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### Novelty, Topic+stemmed

In [24]:
model_nov_top = text_clf_hyperparam.fit(data_train.stemmed_topic, data_train.Novelty)

In [25]:
pred_nov_top = results_model(model_nov_top, data_dev, "Novelty", "stemmed_topic")

              precision    recall  f1-score   support

          -1     0.5842    1.0000    0.7375       118
           1     0.0000    0.0000    0.0000        84

    accuracy                         0.5842       202
   macro avg     0.2921    0.5000    0.3687       202
weighted avg     0.3412    0.5842    0.4308       202



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [26]:
pred_nov_top_test = results_model(model_nov_top, data_test, "Novelty", "stemmed_topic")

              precision    recall  f1-score   support

          -1     0.5654    1.0000    0.7224       294
           1     0.0000    0.0000    0.0000       226

    accuracy                         0.5654       520
   macro avg     0.2827    0.5000    0.3612       520
weighted avg     0.3197    0.5654    0.4084       520



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### eval on Shared Task organizer's metrics

In [30]:
from typing import Dict

import numpy

def val_nov_metric(is_validity: numpy.ndarray, should_validity: numpy.ndarray, is_novelty: numpy.ndarray, should_novelty: numpy.ndarray) -> Dict[str, float]:
    ret = dict()

    ret_base_help = {
        "true_positive_validity": numpy.sum(numpy.where(
            numpy.all(numpy.stack([is_validity >= .5, should_validity >= .5]), axis=0),
            1, 0)),
        "true_negative_validity": numpy.sum(numpy.where(
            numpy.all(numpy.stack([is_validity < .5, should_validity < .5]), axis=0),
            1, 0)),
        "true_positive_novelty": numpy.sum(numpy.where(
            numpy.all(numpy.stack([is_novelty >= .5, should_novelty >= .5]), axis=0),
            1, 0)),
        "true_negative_novelty": numpy.sum(numpy.where(
            numpy.all(numpy.stack([is_novelty < .5, should_novelty < .5]), axis=0),
            1, 0)),
        "true_positive_valid_novel": numpy.sum(numpy.where(
            numpy.all(numpy.stack([is_validity >= .5, is_novelty >= .5,
                                   should_validity >= .5, should_novelty >= .5]), axis=0),
            1, 0)),
        "true_positive_nonvalid_novel": numpy.sum(numpy.where(
            numpy.all(numpy.stack([is_validity < .5, is_novelty >= .5,
                                   should_validity < .5, should_novelty >= .5]), axis=0),
            1, 0)),
        "true_positive_valid_nonnovel": numpy.sum(numpy.where(
            numpy.all(numpy.stack([is_validity >= .5, is_novelty < .5,
                                   should_validity >= .5, should_novelty < .5]), axis=0),
            1, 0)),
        "true_positive_nonvalid_nonnovel": numpy.sum(numpy.where(
            numpy.all(numpy.stack([is_validity < .5, is_novelty < .5,
                                   should_validity < .5, should_novelty < .5]), axis=0),
            1, 0)),
        "classified_positive_validity": numpy.sum(numpy.where(is_validity >= .5, 1, 0)),
        "classified_negative_validity": numpy.sum(numpy.where(is_validity < .5, 1, 0)),
        "classified_positive_novelty": numpy.sum(numpy.where(is_novelty >= .5, 1, 0)),
        "classified_negative_novelty": numpy.sum(numpy.where(is_novelty < .5, 1, 0)),
        "classified_positive_valid_novel": numpy.sum(numpy.where(
            numpy.all(numpy.stack([is_validity >= .5, is_novelty >= .5]), axis=0),
            1, 0)),
        "classified_positive_nonvalid_novel": numpy.sum(numpy.where(
            numpy.all(numpy.stack([is_validity < .5, is_novelty >= .5]), axis=0),
            1, 0)),
        "classified_positive_valid_nonnovel": numpy.sum(numpy.where(
            numpy.all(numpy.stack([is_validity >= .5, is_novelty < .5]), axis=0),
            1, 0)),
        "classified_positive_nonvalid_nonnovel": numpy.sum(numpy.where(
            numpy.all(numpy.stack([is_validity < .5, is_novelty < .5]), axis=0),
            1, 0)),
        "indeed_positive_validity": numpy.sum(numpy.where(should_validity >= .5, 1, 0)),
        "indeed_negative_validity": numpy.sum(numpy.where(should_validity < .5, 1, 0)),
        "indeed_positive_novelty": numpy.sum(numpy.where(should_novelty >= .5, 1, 0)),
        "indeed_negative_novelty": numpy.sum(numpy.where(should_novelty < .5, 1, 0)),
        "indeed_positive_valid_novel": numpy.sum(numpy.where(
            numpy.all(numpy.stack([should_validity >= .5, should_novelty >= .5]), axis=0),
            1, 0)),
        "indeed_positive_nonvalid_novel": numpy.sum(numpy.where(
            numpy.all(numpy.stack([should_validity < .5, should_novelty >= .5]), axis=0),
            1, 0)),
        "indeed_positive_valid_nonnovel": numpy.sum(numpy.where(
            numpy.all(numpy.stack([should_validity >= .5, should_novelty < .5]), axis=0),
            1, 0)),
        "indeed_positive_nonvalid_nonnovel": numpy.sum(numpy.where(
            numpy.all(numpy.stack([should_validity < .5, should_novelty < .5]), axis=0),
            1, 0)),
    }

    ret_help = {
        "precision_validity": ret_base_help["true_positive_validity"] /
                              max(1, ret_base_help["classified_positive_validity"]),
        "precision_novelty": ret_base_help["true_positive_novelty"] /
                             max(1, ret_base_help["classified_positive_novelty"]),
        "recall_validity": ret_base_help["true_positive_validity"] /
                           max(1, ret_base_help["indeed_positive_validity"]),
        "recall_novelty": ret_base_help["true_positive_novelty"] /
                          max(1, ret_base_help["indeed_positive_novelty"]),
        "precision_val_neg": ret_base_help["true_negative_validity"] /
                              max(1, ret_base_help["classified_negative_validity"]),
        "precision_nov_neg": ret_base_help["true_negative_novelty"] /
                             max(1, ret_base_help["classified_negative_novelty"]),
        "recall_val_neg": ret_base_help["true_negative_validity"] /
                           max(1, ret_base_help["indeed_negative_validity"]),
        "recall_nov_neg": ret_base_help["true_negative_novelty"] /
                          max(1, ret_base_help["indeed_negative_novelty"]),
        "precision_valid_novel": ret_base_help["true_positive_valid_novel"] /
                                 max(1, ret_base_help["classified_positive_valid_novel"]),
        "precision_valid_nonnovel": ret_base_help["true_positive_valid_nonnovel"] /
                                    max(1, ret_base_help["classified_positive_valid_nonnovel"]),
        "precision_nonvalid_novel": ret_base_help["true_positive_nonvalid_novel"] /
                                    max(1, ret_base_help["classified_positive_nonvalid_novel"]),
        "precision_nonvalid_nonnovel": ret_base_help["true_positive_nonvalid_nonnovel"] /
                                       max(1, ret_base_help["classified_positive_nonvalid_nonnovel"]),
        "recall_valid_novel": ret_base_help["true_positive_valid_novel"] /
                              max(1, ret_base_help["indeed_positive_valid_novel"]),
        "recall_valid_nonnovel": ret_base_help["true_positive_valid_nonnovel"] /
                                 max(1, ret_base_help["indeed_positive_valid_nonnovel"]),
        "recall_nonvalid_novel": ret_base_help["true_positive_nonvalid_novel"] /
                                 max(1, ret_base_help["indeed_positive_nonvalid_novel"]),
        "recall_nonvalid_nonnovel": ret_base_help["true_positive_nonvalid_nonnovel"] /
                                    max(1, ret_base_help["indeed_positive_nonvalid_nonnovel"])
    }

    ret.update({
        "f1_validity": 2 * ret_help["precision_validity"] * ret_help["recall_validity"] / max(1e-4, ret_help["precision_validity"] + ret_help["recall_validity"]),
        "f1_novelty": 2 * ret_help["precision_novelty"] * ret_help["recall_novelty"] / max(1e-4, ret_help["precision_novelty"] + ret_help["recall_novelty"]),
        "f1_val_neg": 2 * ret_help["precision_val_neg"] * ret_help["recall_val_neg"] / max(1e-4, ret_help["precision_val_neg"] + ret_help["recall_val_neg"]),
        "f1_nov_neg": 2 * ret_help["precision_nov_neg"] * ret_help["recall_nov_neg"] / max(1e-4, ret_help["precision_nov_neg"] + ret_help["recall_nov_neg"]),
        "f1_valid_novel": 2 * ret_help["precision_valid_novel"] * ret_help["recall_valid_novel"] / max(1e-4, ret_help["precision_valid_novel"] + ret_help["recall_valid_novel"]),
        "f1_valid_nonnovel": 2 * ret_help["precision_valid_nonnovel"] * ret_help["recall_valid_nonnovel"] / max(1e-4, ret_help["precision_valid_nonnovel"] + ret_help["recall_valid_nonnovel"]),
        "f1_nonvalid_novel": 2 * ret_help["precision_nonvalid_novel"] * ret_help["recall_nonvalid_novel"] / max(1e-4, ret_help["precision_nonvalid_novel"] + ret_help["recall_nonvalid_novel"]),
        "f1_nonvalid_nonnovel": 2 * ret_help["precision_nonvalid_nonnovel"] * ret_help["recall_nonvalid_nonnovel"] / max(1e-4, ret_help["precision_nonvalid_nonnovel"] + ret_help["recall_nonvalid_nonnovel"])
    })

    ret.update({
        "f1_val_macro": (ret["f1_validity"] + ret["f1_val_neg"])/2,
        "f1_nov_macro": (ret["f1_novelty"] + ret["f1_nov_neg"])/2,
        "f1_macro": (ret["f1_valid_novel"]+ret["f1_valid_nonnovel"]+ret["f1_nonvalid_novel"]+ret["f1_nonvalid_nonnovel"])/4
    })

    return ret

### Dev

In [31]:
val_nov_metric(pred_val, np.array(data_dev["Validity"]), pred_nov, np.array(data_dev["Novelty"]))

{'f1_validity': 0.773972602739726,
 'f1_novelty': 0.0,
 'f1_val_neg': 0.4107142857142857,
 'f1_nov_neg': 0.7374999999999999,
 'f1_valid_novel': 0.0,
 'f1_valid_nonnovel': 0.6245059288537549,
 'f1_nonvalid_novel': 0.0,
 'f1_nonvalid_nonnovel': 0.417910447761194,
 'f1_val_macro': 0.5923434442270059,
 'f1_nov_macro': 0.36874999999999997,
 'f1_macro': 0.2606040941537372}

### test

In [32]:
val_nov_metric(pred_val_test, np.array(data_test["Validity"]), pred_nov_test, np.array(data_test["Novelty"]))

{'f1_validity': 0.6006600660066006,
 'f1_novelty': 0.0,
 'f1_val_neg': 0.4423963133640553,
 'f1_nov_neg': 0.7223587223587223,
 'f1_valid_novel': 0.0,
 'f1_valid_nonnovel': 0.453781512605042,
 'f1_nonvalid_novel': 0.0,
 'f1_nonvalid_nonnovel': 0.272189349112426,
 'f1_val_macro': 0.521528189685328,
 'f1_nov_macro': 0.36117936117936117,
 'f1_macro': 0.181492715429367}