### **ENVIRONMENT SETUP**

In [1]:
# ! rm -r data*
# ! wget http://argumentation.bplaced.net/arguana-data/dagstuhl-15512-argquality-corpus-v2.zip
# ! unzip dagstuhl-15512-argquality-corpus-v2.zip
# ! rm *.zip
# ! rm -r __MACOSX
# ! mv dagstuhl-15512-argquality-corpus-v2 data

### **IMPORT LIBRARIES**

In [2]:
# Set random seed

import random
random.seed(14071)

In [88]:
import re
import pandas as pd
import numpy as np
from tqdm import tqdm

import nltk
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('omw-1.4')
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer, PorterStemmer
from nltk.tokenize import word_tokenize
from nltk.stem import SnowballStemmer

from sklearn.feature_extraction.text import CountVectorizer # Bag of Words
from sklearn.feature_extraction.text import TfidfVectorizer # TF-IDF

from sklearn.preprocessing import LabelEncoder # Label Encoding
from sklearn.preprocessing import OneHotEncoder # One Hot Encoding

from sklearn.model_selection import train_test_split # Train Test Split
from sklearn.model_selection import cross_val_score, cross_val_predict # Cross validation
from sklearn.model_selection import KFold # K Fold

from sklearn.linear_model import LogisticRegression # LR Model
from sklearn.tree import DecisionTreeClassifier # DT Model
from sklearn.ensemble import RandomForestClassifier # RF Model

import tensorflow.keras as keras # Keras bindings
import tensorflow as tf # Tensorflow bindings

from sklearn.metrics import classification_report # Classification Report

from sklearn.model_selection import GridSearchCV # Grid Search

[nltk_data] Downloading package stopwords to /home/sri/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /home/sri/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /home/sri/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /home/sri/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


### **IMPORT DATA**

In [5]:
df = pd.read_csv("data/dagstuhl-15512-argquality-corpus-annotated.csv", sep='\t', encoding_errors="ignore")

In [6]:
df

Unnamed: 0,annotator,argumentative,overall quality,local acceptability,appropriateness,arrangement,clarity,cogency,effectiveness,global acceptability,...,global sufficiency,reasonableness,local relevance,credibility,emotional appeal,sufficiency,argument,#id,issue,stance
0,1,y,1 (Low),1 (Low),1 (Low),1 (Low),2 (Average),1 (Low),1 (Low),1 (Low),...,1 (Low),1 (Low),1 (Low),1 (Low),1 (Low),1 (Low),"it is true that bottled water is a waste, but ...",arg219250,ban-plastic-water-bottles,no-bad-for-the-economy
1,2,y,1 (Low),3 (High),2 (Average),2 (Average),3 (High),1 (Low),1 (Low),3 (High),...,1 (Low),2 (Average),2 (Average),2 (Average),2 (Average),1 (Low),"it is true that bottled water is a waste, but ...",arg219250,ban-plastic-water-bottles,no-bad-for-the-economy
2,3,y,2 (Average),2 (Average),2 (Average),2 (Average),2 (Average),2 (Average),2 (Average),2 (Average),...,2 (Average),2 (Average),3 (High),2 (Average),1 (Low),2 (Average),"it is true that bottled water is a waste, but ...",arg219250,ban-plastic-water-bottles,no-bad-for-the-economy
3,1,y,2 (Average),3 (High),2 (Average),2 (Average),2 (Average),2 (Average),2 (Average),2 (Average),...,2 (Average),2 (Average),3 (High),3 (High),2 (Average),2 (Average),Most Americans on average recycle 86-88% of th...,arg219293,ban-plastic-water-bottles,no-bad-for-the-economy
4,2,y,1 (Low),2 (Average),1 (Low),2 (Average),2 (Average),1 (Low),1 (Low),2 (Average),...,1 (Low),1 (Low),2 (Average),2 (Average),2 (Average),1 (Low),Most Americans on average recycle 86-88% of th...,arg219293,ban-plastic-water-bottles,no-bad-for-the-economy
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
955,2,y,2 (Average),2 (Average),3 (High),2 (Average),2 (Average),1 (Low),1 (Low),3 (High),...,1 (Low),2 (Average),2 (Average),2 (Average),2 (Average),1 (Low),Raffles neglected Singapore when he went aroun...,arg168822,william-farquhar-ought-to-be-honoured-as-the-r...,yes-of-course
956,3,y,2 (Average),2 (Average),2 (Average),2 (Average),2 (Average),2 (Average),2 (Average),2 (Average),...,2 (Average),2 (Average),3 (High),2 (Average),2 (Average),2 (Average),Raffles neglected Singapore when he went aroun...,arg168822,william-farquhar-ought-to-be-honoured-as-the-r...,yes-of-course
957,1,y,2 (Average),2 (Average),2 (Average),2 (Average),1 (Low),2 (Average),2 (Average),2 (Average),...,2 (Average),2 (Average),3 (High),2 (Average),2 (Average),2 (Average),"Raffles doesn't care about the citizens, doesn...",arg168834,william-farquhar-ought-to-be-honoured-as-the-r...,yes-of-course
958,2,y,2 (Average),2 (Average),3 (High),2 (Average),3 (High),1 (Low),2 (Average),2 (Average),...,1 (Low),2 (Average),2 (Average),2 (Average),3 (High),1 (Low),"Raffles doesn't care about the citizens, doesn...",arg168834,william-farquhar-ought-to-be-honoured-as-the-r...,yes-of-course


In [7]:
print(f"Number of annotations = {len(df['argument'])}")
print(f"Number of unique arguements = {len(np.unique(df['argument']))}") # Each argument was scored by 3 annotators
print(f"Number of unique issue = {len(np.unique(df['issue']))}")  # There are a total of 16 issues
print(f"Number of unique stance = {len(np.unique(df['stance']))}") # Each issue has on an avg 2 stance (positive and negative)

Number of annotations = 960
Number of unique arguements = 320
Number of unique issue = 16
Number of unique stance = 28


### **DATA CLEANING**

#### Remove statements are that are tagged as NOT argumentative

In [8]:
df[df["argumentative"] == "n"] # Statements that are tagged as NOT argumentative

Unnamed: 0,annotator,argumentative,overall quality,local acceptability,appropriateness,arrangement,clarity,cogency,effectiveness,global acceptability,...,global sufficiency,reasonableness,local relevance,credibility,emotional appeal,sufficiency,argument,#id,issue,stance
25,2,n,,,,,,,,,...,,,,,,,We will be able to ban water bottles until we ...,arg219242,ban-plastic-water-bottles,no-bad-for-the-economy
32,3,n,,,,,,,,,...,,,,,,,The high price of bottled water is not the wat...,arg219232,ban-plastic-water-bottles,yes-emergencies-only
37,2,n,,,,,,,,,...,,,,,,,A drop of water is worth more than a sack of g...,arg219210,ban-plastic-water-bottles,yes-emergencies-only
51,1,n,,,,,,,,,...,,,,,,,Yeah I have a bottle of water next to me its n...,arg219292,ban-plastic-water-bottles,yes-emergencies-only
52,2,n,,,,,,,,,...,,,,,,,Yeah I have a bottle of water next to me its n...,arg219292,ban-plastic-water-bottles,yes-emergencies-only
53,3,n,,,,,,,,,...,,,,,,,Yeah I have a bottle of water next to me its n...,arg219292,ban-plastic-water-bottles,yes-emergencies-only
97,2,n,,,,,,,,,...,,,,,,,This is just wrong we should not insult who we...,arg236317,christianity-or-atheism,christianity
104,3,n,,,,,,,,,...,,,,,,,I have a personal relationship with Christ. I ...,arg317490,christianity-or-atheism,christianity
105,1,n,,,,,,,,,...,,,,,,,God helps those who help themselves! So i will...,arg234318,christianity-or-atheism,christianity
106,2,n,,,,,,,,,...,,,,,,,God helps those who help themselves! So i will...,arg234318,christianity-or-atheism,christianity


In [9]:
statements = df[df["argumentative"] == "n"]["argument"].to_numpy() # Extract the statements
statements = np.unique(statements) # Extract the unique statements

for ele in statements: # Remove all occurrences of NOT argumentative statements
    df.drop(df[df['argument'] == ele].index, axis=0, inplace=True)

In [10]:
df[df["argumentative"] == "n"]

Unnamed: 0,annotator,argumentative,overall quality,local acceptability,appropriateness,arrangement,clarity,cogency,effectiveness,global acceptability,...,global sufficiency,reasonableness,local relevance,credibility,emotional appeal,sufficiency,argument,#id,issue,stance


In [11]:
print(f"Number of annotations = {len(df['argument'])}")
print(f"Number of unique arguements = {len(np.unique(df['argument']))}") # Each argument was scored by 3 annotators
print(f"Number of unique issue = {len(np.unique(df['issue']))}")  # There are a total of 16 issues
print(f"Number of unique stance = {len(np.unique(df['stance']))}") # Each issue has on an avg 2 stance (positive and negative)

Number of annotations = 912
Number of unique arguements = 304
Number of unique issue = 16
Number of unique stance = 28


#### Combine all Annotators' scores into a single score

In [12]:
argument = np.unique(df["argument"])

In [13]:
attributes = ["annotator", "overall quality", "cogency", "effectiveness", "reasonableness", "argument", "#id"]

cleaned_df = []

for arg in argument:

    new_df = df[df["argument"] == arg][attributes]
    flag = 0
    new_dict = {
        "#id": new_df["#id"].iloc[0],
        "argument": new_df["argument"].iloc[0],
    }

    for ele in ["overall quality", "cogency", "effectiveness", "reasonableness"]:
        if len(pd.value_counts(new_df[ele])) == 3:
            flag = 1
            break
        new_dict[ele] = pd.value_counts(new_df[ele]).index[0]
        
    if flag == 1:
        continue
    cleaned_df.append(new_dict)

cleaned_df = pd.DataFrame(cleaned_df)

In [14]:
df[df["#id"] == "28068"]

Unnamed: 0,annotator,argumentative,overall quality,local acceptability,appropriateness,arrangement,clarity,cogency,effectiveness,global acceptability,...,global sufficiency,reasonableness,local relevance,credibility,emotional appeal,sufficiency,argument,#id,issue,stance
144,1,y,2 (Average),2 (Average),2 (Average),2 (Average),2 (Average),1 (Low),1 (Low),1 (Low),...,1 (Low),1 (Low),3 (High),1 (Low),2 (Average),1 (Low),"""Debates are based on convincing evidence. The...",28068,evolution-vs-creation,creation
145,2,y,1 (Low),1 (Low),3 (High),2 (Average),2 (Average),1 (Low),1 (Low),1 (Low),...,1 (Low),1 (Low),2 (Average),1 (Low),2 (Average),1 (Low),"""Debates are based on convincing evidence. The...",28068,evolution-vs-creation,creation
146,3,y,2 (Average),2 (Average),2 (Average),2 (Average),3 (High),2 (Average),1 (Low),2 (Average),...,1 (Low),1 (Low),2 (Average),1 (Low),2 (Average),2 (Average),"""Debates are based on convincing evidence. The...",28068,evolution-vs-creation,creation


In [15]:
cleaned_df

Unnamed: 0,#id,argument,overall quality,cogency,effectiveness,reasonableness
0,28068,"""Debates are based on convincing evidence. The...",2 (Average),1 (Low),1 (Low),1 (Low)
1,13270,"""If a women is raped"" is a good argument. Howe...",1 (Low),1 (Low),1 (Low),1 (Low)
2,13275,"""The government has no place to tell a woman w...",1 (Low),1 (Low),1 (Low),1 (Low)
3,12365,(I am writing this through Firefox) Emotions a...,2 (Average),1 (Low),1 (Low),2 (Average)
4,arg561672,1. It makes everyone equal - if children can w...,1 (Low),2 (Average),1 (Low),2 (Average)
...,...,...,...,...,...,...
256,arg334959,"yea, because even though there are many other ...",2 (Average),2 (Average),1 (Low),2 (Average)
257,arg335089,yes because if they fear getting hit than they...,1 (Low),1 (Low),1 (Low),1 (Low)
258,arg203922,"yes, i believe it's nice to have a school unif...",2 (Average),2 (Average),1 (Low),2 (Average)
259,arg596217,"yes,India has potential to lead the world.So, ...",1 (Low),1 (Low),1 (Low),1 (Low)


In [16]:
print(f"Number of arguements = {len(cleaned_df['argument'])}")

Number of arguements = 261


### **DATA PREPROCESSING**

In [17]:
text = cleaned_df["argument"]

In [18]:
stop_words = set(stopwords.words("english"))
english_stopwords = stopwords.words("english")
english_stemmer = SnowballStemmer("english")

In [19]:
def clean_text(text):
    text = text.replace('</br>', '') # Remove </br>
    text = re.sub(r'[^\w]', ' ', text) # Remove symbols
    text = re.sub(r'[ ]{2,}', ' ', text) # Remove extra spaces
    text = re.sub(r'[ \t]+$', '', text) # Remove trailing white spaces
    tokens = []
    for token in text.split():
        if token not in stop_words:
            token = english_stemmer.stem(token)
            tokens.append(token)
    return " ".join(tokens)
    #return token

In [20]:
cleaned_text = [clean_text(text) for text in text]
text = cleaned_text

### **VECTORIZE THE TEXT DATA**

In [21]:
# Using Bag of Words (BoW)

vectorizer = CountVectorizer()
X = vectorizer.fit_transform(text)
X = X.toarray()

print(f"Shape of Vector = {X.shape}")

Shape of Vector = (261, 2001)


### **PREDICTING OVERALL QUALITY FROM TEXT**

In [25]:
y = cleaned_df["overall quality"].to_numpy()
# y = y.reshape(-1, 1)

#### Label Encoding

In [26]:
# Label Encoding

encoder = LabelEncoder()
enc_y = encoder.fit_transform(y)
y = enc_y

print(f"Size of Labels = {y.shape}")

Size of Labels = (261,)


In [28]:
print(f"Shape of Training Data: {X.shape}")
print(f"Shape of Training Labels: {y.shape}")

Shape of Training Data: (261, 2001)
Shape of Training Labels: (261,)


#### Training Logistic Regression Model

In [None]:
# Fine-tuning

parameters = {
    "penalty": ["l1", "l2", "elasticnet", "none"],
    "dual": [True, False],
    "C": [1, 0.1, 0.01],
    "fit_intercept": [True, False],
    "solver": ["newton-cg", "lbfgs", "liblinear", "sag", "saga"],
}

model = LogisticRegression()

grid_search = GridSearchCV(model, parameters, n_jobs=-1, scoring='f1_weighted')
grid_search.fit(X, y)

grid_search.best_params_

In [38]:
# {'C': 0.01,
#  'dual': False,
#  'fit_intercept': True,
#  'penalty': 'none',
#  'solver': 'sag'}

model = LogisticRegression(dual=False, fit_intercept=True, penalty="none", solver="sag", max_iter=5000)
pred = cross_val_predict(model, X, y, cv=5)
print(classification_report(y, pred))

              precision    recall  f1-score   support

           0       0.64      0.81      0.72       149
           1       0.56      0.42      0.48        96
           2       0.00      0.00      0.00        16

    accuracy                           0.62       261
   macro avg       0.40      0.41      0.40       261
weighted avg       0.57      0.62      0.59       261



In [None]:
# # Grid Search

# parameters = {
#     "penalty": ["l1", "l2", "elasticnet", "none"],
#     "dual": [True, False],
#     "C": [1, 0.1, 0.01],
#     "fit_intercept": [True, False],
#     "solver": ["newton-cg", "lbfgs", "liblinear", "sag", "saga"],
# }

# model = LogisticRegression()

# grid_search = GridSearchCV(model, parameters, n_jobs=-1)
# grid_search.fit(X_train, y_train)

# grid_search.best_params_

#### Training Decision Tree Model

In [None]:
# Fine-tuning

parameters = {
    "criterion": ["gini", "entropy", "log_loss"],
    "splitter": ["best", "random"],
    "max_features": ["auto", "sqrt", "log2", None],
}

model = DecisionTreeClassifier()

grid_search = GridSearchCV(model, parameters, n_jobs=-1, scoring='f1_weighted')
grid_search.fit(X, y)

grid_search.best_params_

In [55]:
# {'criterion': 'gini', 'max_features': 'log2', 'splitter': 'best'}

model = DecisionTreeClassifier(criterion="gini", max_features="log2", splitter="best")
pred = cross_val_predict(model, X, y, cv=5)
print(classification_report(y, pred))

              precision    recall  f1-score   support

           0       0.65      0.75      0.70       149
           1       0.57      0.44      0.49        96
           2       0.07      0.06      0.06        16

    accuracy                           0.59       261
   macro avg       0.43      0.42      0.42       261
weighted avg       0.58      0.59      0.58       261



In [None]:
# # Grid Search

# parameters = {
#     "criterion": ["gini", "entropy", "log_loss"],
#     "splitter": ["best", "random"],
#     "max_features": ["auto", "sqrt", "log2", None],
# }

# model = DecisionTreeClassifier()

# grid_search = GridSearchCV(model, parameters, n_jobs=-1)
# grid_search.fit(X_train, y_train)

# grid_search.best_params_

#### Training Random Forest Model

In [None]:
# Fine-tuning

parameters = {
    "n_estimators": [100, 200, 300],
    "criterion": ["gini", "entropy", "log_loss"],
    "max_features": ["auto", "sqrt", "log2", None],
    "bootstrap": [True, False],
    "oob_score": [True, False],
    "warm_start": [True, False],
    "class_weight": ["balanced", "balanced_subsample", None],
}

model = RandomForestClassifier()

grid_search = GridSearchCV(model, parameters, n_jobs=-1, scoring='f1_weighted')
grid_search.fit(X, y)

grid_search.best_params_

In [60]:
# {'bootstrap': True,
#  'class_weight': 'balanced',
#  'criterion': 'gini',
#  'max_features': None,
#  'n_estimators': 300,
#  'oob_score': False,
#  'warm_start': False}

model = RandomForestClassifier(bootstrap=True, class_weight='balanced', 
    criterion='gini', max_features=None, n_estimators=300, oob_score=False, warm_start=False)
pred = cross_val_predict(model, X, y, cv=5)
print(classification_report(y, pred))

              precision    recall  f1-score   support

           0       0.65      0.85      0.74       149
           1       0.52      0.35      0.42        96
           2       0.00      0.00      0.00        16

    accuracy                           0.62       261
   macro avg       0.39      0.40      0.39       261
weighted avg       0.56      0.62      0.58       261



In [None]:
# # Grid Search

# parameters = {
#     "n_estimators": [100, 200, 300],
#     "criterion": ["gini", "entropy", "log_loss"],
#     "max_features": ["auto", "sqrt", "log2", None],
#     "bootstrap": [True, False],
#     "oob_score": [True, False],
#     "warm_start": [True, False],
#     "class_weight": ["balanced", "balanced_subsample", None],
# }

# model = RandomForestClassifier()

# grid_search = GridSearchCV(model, parameters, n_jobs=-1)
# grid_search.fit(X_train, y_train)

# grid_search.best_params_

#### Training Neural Network

In [63]:
# One Hot Encoding

y = cleaned_df["overall quality"].to_numpy()
y = y.reshape(-1, 1)

encoder = OneHotEncoder()
enc_y = encoder.fit_transform(y)
y = enc_y.toarray()

print(f"Size of Labels = {y.shape}")
print(f"Label Sample: {y[0]}")

Size of Labels = (261, 3)
Label Sample: [0. 1. 0.]


In [64]:
print(f"Shape of Training Data: {X.shape}")
print(f"Shape of Training Labels: {y.shape}")

Shape of Training Data: (261, 2001)
Shape of Training Labels: (261, 3)


In [73]:
def init_model():

    # Define Model
    model = keras.models.Sequential([
        keras.layers.Dense(32, input_dim=X.shape[1], activation="relu"),
        keras.layers.Dropout(0.6),
        keras.layers.Dense(128, activation="relu"),
        keras.layers.Dropout(0.6),
        keras.layers.Dense(64, activation="relu"),
        keras.layers.Dropout(0.2),
        keras.layers.Dense(3, activation="softmax"),
    ])

    loss_function = keras.losses.CategoricalCrossentropy() # Define loss function
    # loss_function = keras.losses.SparseCategoricalCrossentropy() # Define loss function
    optimizer = keras.optimizers.SGD(learning_rate=0.005) # Define optimizer

    model.compile(optimizer=optimizer, loss=loss_function, metrics=["accuracy"]) # Compile the model

    return model

In [98]:
# Train the model

kf = KFold(n_splits=5)

pred = []

for train_index, test_index in kf.split(X):

    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    model = init_model()

    history = model.fit(X_train, y_train, epochs=75, batch_size=1, 
        validation_data=(X_test, y_test), verbose=0)

    pred_test = [np.argmax(ele) for ele in model.predict(X_test)]
    pred += pred_test



In [99]:
new_y = [np.argmax(ele) for ele in y]
print(classification_report(new_y, pred))

              precision    recall  f1-score   support

           0       0.69      0.69      0.69       149
           1       0.53      0.56      0.55        96
           2       0.09      0.06      0.07        16

    accuracy                           0.61       261
   macro avg       0.44      0.44      0.44       261
weighted avg       0.60      0.61      0.60       261

