# Data Import

In [1]:
import pandas as pd

In [2]:
df = pd.read_parquet("args.parquet").drop("conclusion", axis=1)

IDS = df["id"].tolist()
SAMPLES = df["premise"].tolist()

del(df)

In [3]:
df = pd.read_csv("webis-argquality20-full.csv")

# Vectorization

In [4]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(
    lowercase=True, 
    stop_words="english",
    max_features = 2**18
)
SAMPLES = vectorizer.fit_transform(SAMPLES)

# Support Vector Machine for Argument Classification

In [5]:
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score

X = df["Premise"].astype(str).tolist()
Y = (df["Combined Quality"].astype(float) != -4.0).tolist()

X = vectorizer.transform(X)

classificator = SVC()
classificator.fit(X,Y)

scores = cross_val_score(classificator, X, Y, cv=10, scoring='f1')
"F1-score: {}".format(scores.mean().round(4))

'F1-score: 0.88'

# Support Vector Regression for Quality Prediction

In [6]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVR
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import cross_val_score

X = df.loc[df["Combined Quality"] != -4.0, "Premise"].astype(str).values
Y = df.loc[df["Combined Quality"] != -4.0, "Combined Quality"].astype(float).values

scaler = MinMaxScaler()
Y = scaler.fit_transform(Y.reshape(-1, 1)).ravel()


X = vectorizer.transform(X).todense()

regressor = LinearSVR()
regressor.fit(X,Y)

scores = cross_val_score(regressor, X, Y, cv=10)
"MSE: {}".format(scores.mean().round(4))



'MSE: 0.1413'

# Predicting Scores for the args.me dataset

In [7]:
PREDICTIONS = classificator.predict(SAMPLES)

In [8]:
df = pd.DataFrame(zip(IDS,PREDICTIONS), columns=["id","quality"])
df.to_parquet("quality.parquet")