In [2]:
import os
import numpy as np
from sklearn.svm import LinearSVC
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import FeatureUnion

from collections import Counter
from sklearn.metrics import classification_report, accuracy_score, f1_score

from statistics import mean

In [3]:
datadir = "/data/panstuffs/"
# - english
pan15train = "pan15-authorship-verification-training-dataset-english-2015-04-19/"
pan15test = "pan15-authorship-verification-test-dataset2-english-2015-04-19/"

pan14traine = "pan14-author-verification-training-corpus-english-essays-2014-04-22/"
pan14teste = "pan14-author-verification-test-corpus2-english-essays-2014-04-22/"

pan14trainn = "pan14-author-verification-training-corpus-english-novels-2014-04-22/"
pan14testn = "pan14-author-verification-test-corpus2-english-novels-2014-04-22/"


# 
pan15traindutch = "pan15-authorship-verification-training-dataset-dutch-2015-04-19/"
pan15testdutch = "pan15-authorship-verification-test-dataset2-dutch-2015-04-19/"

pan15traingreek = "pan15-authorship-verification-training-dataset-greek-2015-04-19/"
pan15testgreek = "pan15-authorship-verification-test-dataset2-greek-2015-04-19/"

pan15trainspanish = "pan15-authorship-verification-training-dataset-spanish-2015-04-19/"
pan15testspanish = "pan15-authorship-verification-test-dataset2-spanish-2015-04-19/"

# - other
pan14traindutchr = "pan14-author-verification-training-corpus-dutch-essays-2014-04-22/"
pan14testdutchr = "pan14-author-verification-test-corpus2-dutch-reviews-2014-04-22/"

pan14traindutche = "pan14-author-verification-training-corpus-dutch-essays-2014-04-22/"
pan14testdutche = "pan14-author-verification-test-corpus2-dutch-essays-2014-04-22/"

pan14traingreek = "pan14-author-verification-training-corpus-greek-articles-2014-04-22/"
pan14testgreek = "pan14-author-verification-test-corpus2-greek-articles-2014-04-22/"

pan14trainspanish = "pan14-author-verification-training-corpus-spanish-articles-2014-04-22/"
pan14testspanish = "pan14-author-verification-test-corpus2-spanish-articles-2014-04-22/"

In [4]:
def read_file(filepath):
    with open(filepath) as f:
        s = f.read()
    return s

def load_pan_data(directory, prefix=None):
    """Load known and unknown texts in the PAN data format"""
    # FIXME: assumes one known file per author, which is fine for English datasets only
    authors = sorted([x for x in os.listdir(directory) if not "." in x])  # ignore hidden stuff and files with extension
    known_texts = []
    unknown_texts = []
    for author in authors:
        authordir = os.path.join(directory, author)
        kfs = [x for x in os.listdir(authordir) if x.startswith("known")]
        all_known = ""
        for fn in kfs:
            kf = os.path.join(authordir, fn)
            all_known += "{}\n".format(read_file(kf))  
        uf = os.path.join(directory, author, "unknown.txt")
        known_texts.append(all_known)
        unknown_texts.append(read_file(uf))
        
    truthfile = os.path.join(directory, "truth.txt")
    with open(truthfile) as f:
        lines = f.read().strip().split("\n")
    y = [1 if line.split()[1] == "Y" else 0 for line in lines]
    y = np.array(y)
    return known_texts, unknown_texts, y

def get_fit_vectorizer(knowns, unknowns):
    char_tf = TfidfVectorizer(analyzer='char', ngram_range=(2,3), min_df=0.01, lowercase=False)
    word_tf = TfidfVectorizer(ngram_range=(1,2), lowercase=False, min_df=0.01)
    vectorizer = FeatureUnion([
        ('char', char_tf),
        ('word', word_tf)
    ])

    vectorizer.fit(knowns + unknowns)
    return vectorizer

def vectorize(knowns, unknowns, vectorizer):
    known_vecs = vectorizer.transform(knowns)
    unknown_vecs = vectorizer.transform(unknowns)
    pairs = np.abs(known_vecs - unknown_vecs)
    return pairs


In [5]:
def run_experiment(train_dir, test_dir):
    print("__RUNNING ON {} {}".format(train_dir, test_dir))
    tr_known, tr_unknown, tr_labels = load_pan_data(datadir + train_dir)
    te_known, te_unknown, te_labels = load_pan_data(datadir + test_dir) 
    
    vectorizer = get_fit_vectorizer(tr_known, tr_unknown)
    
    train_pairs = vectorize(tr_known, tr_unknown, vectorizer)
    test_pairs = vectorize(te_known, te_unknown, vectorizer)
    
    svm = LinearSVC()
    svm.fit(train_pairs, tr_labels)
    
    preds = svm.predict(test_pairs)
    cls_report = classification_report(te_labels, preds)
    acc = accuracy_score(te_labels, preds)
    print(cls_report)
    print(acc)
    return cls_report, acc

In [6]:
report, acc = run_experiment(pan15train, pan15test)

__RUNNING ON pan15-authorship-verification-training-dataset-english-2015-04-19/ pan15-authorship-verification-test-dataset2-english-2015-04-19/
             precision    recall  f1-score   support

          0       0.57      0.78      0.66       250
          1       0.65      0.40      0.50       250

avg / total       0.61      0.59      0.58       500

0.592


In [7]:
datasets = """
pan15train = "pan15-authorship-verification-training-dataset-english-2015-04-19/"
pan15test = "pan15-authorship-verification-test-dataset2-english-2015-04-19/"
pan14traine = "pan14-author-verification-training-corpus-english-essays-2014-04-22/"
pan14teste = "pan14-author-verification-test-corpus2-english-essays-2014-04-22/"
pan14trainn = "pan14-author-verification-training-corpus-english-novels-2014-04-22/"
pan14testn = "pan14-author-verification-test-corpus2-english-novels-2014-04-22/"
pan15traindutch = "pan15-authorship-verification-training-dataset-dutch-2015-04-19/"
pan15testdutch = "pan15-authorship-verification-test-dataset2-dutch-2015-04-19/"
pan15traingreek = "pan15-authorship-verification-training-dataset-greek-2015-04-19/"
pan15testgreek = "pan15-authorship-verification-test-dataset2-greek-2015-04-19/"
pan15trainspanish = "pan15-authorship-verification-training-dataset-spanish-2015-04-19/"
pan15testspanish = "pan15-authorship-verification-test-dataset2-spanish-2015-04-19/"
pan14traindutchr = "pan14-author-verification-training-corpus-dutch-essays-2014-04-22/"
pan14testdutchr = "pan14-author-verification-test-corpus2-dutch-reviews-2014-04-22/"
pan14traindutche = "pan14-author-verification-training-corpus-dutch-essays-2014-04-22/"
pan14testdutche = "pan14-author-verification-test-corpus2-dutch-essays-2014-04-22/"
pan14traingreek = "pan14-author-verification-training-corpus-greek-articles-2014-04-22/"
pan14testgreek = "pan14-author-verification-test-corpus2-greek-articles-2014-04-22/"
pan14trainspanish = "pan14-author-verification-training-corpus-spanish-articles-2014-04-22/"
pan14testspanish = "pan14-author-verification-test-corpus2-spanish-articles-2014-04-22/"
""".split("\n")[1:21]

In [8]:
dataset_pairs = []
for i in range(0, len(datasets), 2):
    train = datasets[i].split(" = ")[1].replace('"',"")
    test = datasets[i+1].split(" = ")[1].replace('"', "")
    dataset_pairs.append((train, test))
    

In [9]:
results = {}
for train, test in dataset_pairs:
    results["train: {}, test: {}".format(train, test)] = run_experiment(train, test)
    results["train: {}, test: {}".format(test, train)] = run_experiment(test, train)


__RUNNING ON pan15-authorship-verification-training-dataset-english-2015-04-19/ pan15-authorship-verification-test-dataset2-english-2015-04-19/
             precision    recall  f1-score   support

          0       0.57      0.78      0.66       250
          1       0.65      0.40      0.50       250

avg / total       0.61      0.59      0.58       500

0.592
__RUNNING ON pan15-authorship-verification-test-dataset2-english-2015-04-19/ pan15-authorship-verification-training-dataset-english-2015-04-19/
             precision    recall  f1-score   support

          0       0.57      0.88      0.69        50
          1       0.74      0.34      0.47        50

avg / total       0.66      0.61      0.58       100

0.61
__RUNNING ON pan14-author-verification-training-corpus-english-essays-2014-04-22/ pan14-author-verification-test-corpus2-english-essays-2014-04-22/
             precision    recall  f1-score   support

          0       0.57      0.63      0.60       100
          1     

  'precision', 'predicted', average, warn_for)


             precision    recall  f1-score   support

          0       0.50      1.00      0.67        50
          1       0.00      0.00      0.00        50

avg / total       0.25      0.50      0.33       100

0.5
__RUNNING ON pan14-author-verification-training-corpus-dutch-essays-2014-04-22/ pan14-author-verification-test-corpus2-dutch-reviews-2014-04-22/
             precision    recall  f1-score   support

          0       1.00      0.04      0.08        50
          1       0.51      1.00      0.68        50

avg / total       0.76      0.52      0.38       100

0.52
__RUNNING ON pan14-author-verification-test-corpus2-dutch-reviews-2014-04-22/ pan14-author-verification-training-corpus-dutch-essays-2014-04-22/
             precision    recall  f1-score   support

          0       0.67      0.50      0.57        48
          1       0.60      0.75      0.67        48

avg / total       0.63      0.62      0.62        96

0.625
__RUNNING ON pan14-author-verification-training-co

In [13]:
for key in results:
    print(key)
    print(results[key][0])
    print(results[key][1])
    print("---------")

train: pan15-authorship-verification-training-dataset-english-2015-04-19/, test: pan15-authorship-verification-test-dataset2-english-2015-04-19/
             precision    recall  f1-score   support

          0       0.57      0.78      0.66       250
          1       0.65      0.40      0.50       250

avg / total       0.61      0.59      0.58       500

0.592
---------
train: pan15-authorship-verification-test-dataset2-english-2015-04-19/, test: pan15-authorship-verification-training-dataset-english-2015-04-19/
             precision    recall  f1-score   support

          0       0.57      0.88      0.69        50
          1       0.74      0.34      0.47        50

avg / total       0.66      0.61      0.58       100

0.61
---------
train: pan14-author-verification-training-corpus-english-essays-2014-04-22/, test: pan14-author-verification-test-corpus2-english-essays-2014-04-22/
             precision    recall  f1-score   support

          0       0.57      0.63      0.60    

In [None]:
%%time
for train, test in dataset_pairs:
    run_experiment_n_times(train, test, 10)

__RUNNING ON pan15-authorship-verification-training-dataset-english-2015-04-19/ pan15-authorship-verification-test-dataset2-english-2015-04-19/
Counter({1: 370, 0: 130})
             precision    recall  f1-score   support

          0       0.91      0.47      0.62       250
          1       0.64      0.95      0.77       250

avg / total       0.78      0.71      0.69       500

Accuracy 0.712
Counter({1: 60, 0: 40})
             precision    recall  f1-score   support

          0       0.85      0.68      0.76        50
          1       0.73      0.88      0.80        50

avg / total       0.79      0.78      0.78       100

Accuracy 0.78
__RUNNING ON pan15-authorship-verification-training-dataset-english-2015-04-19/ pan15-authorship-verification-test-dataset2-english-2015-04-19/
Counter({1: 390, 0: 110})
             precision    recall  f1-score   support

          0       0.87      0.38      0.53       250
          1       0.61      0.94      0.74       250

avg / total     

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Counter({1: 149, 0: 16})
             precision    recall  f1-score   support

          0       0.56      0.11      0.18        82
          1       0.51      0.92      0.66        83

avg / total       0.54      0.52      0.42       165

Accuracy 0.5151515151515151
Counter({1: 78, 0: 22})
             precision    recall  f1-score   support

          0       0.68      0.30      0.42        50
          1       0.55      0.86      0.67        50

avg / total       0.62      0.58      0.54       100

Accuracy 0.58
test: [0.45495697565352144, 0.43988097732701897, 0.46200467988798954, 0.45945399393326591, 0.45046346454797159, 0.49091111854577901, 0.48159497304216281, 0.36485111278262022, 0.49741090466037163, 0.41942294159042925]
test avg: 0.452095114197113
train: [0.37629937629937638, 0.47862356621480717, 0.4873940788785438, 0.42210166293194951, 0.43502824858757067, 0.57108484151061834, 0.48574100046750812, 0.51461546758709953, 0.33333333333333331, 0.54427083333333337]
train avg: 0.4648

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Counter({1: 90, 0: 10})
             precision    recall  f1-score   support

          0       0.70      0.14      0.23        50
          1       0.52      0.94      0.67        50

avg / total       0.61      0.54      0.45       100

Accuracy 0.54
__RUNNING ON pan15-authorship-verification-training-dataset-greek-2015-04-19/ pan15-authorship-verification-test-dataset2-greek-2015-04-19/
Counter({1: 99, 0: 1})
             precision    recall  f1-score   support

          0       1.00      0.02      0.04        50
          1       0.51      1.00      0.67        50

avg / total       0.75      0.51      0.36       100

Accuracy 0.51
Counter({1: 97, 0: 3})
             precision    recall  f1-score   support

          0       0.67      0.04      0.08        50
          1       0.51      0.98      0.67        50

avg / total       0.59      0.51      0.37       100

Accuracy 0.51
__RUNNING ON pan15-authorship-verification-training-dataset-greek-2015-04-19/ pan15-authorship-verifica

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Counter({1: 87, 0: 13})
             precision    recall  f1-score   support

          0       0.38      0.10      0.16        50
          1       0.48      0.84      0.61        50

avg / total       0.43      0.47      0.39       100

Accuracy 0.47
__RUNNING ON pan15-authorship-verification-training-dataset-greek-2015-04-19/ pan15-authorship-verification-test-dataset2-greek-2015-04-19/
Counter({1: 100})
             precision    recall  f1-score   support

          0       0.00      0.00      0.00        50
          1       0.50      1.00      0.67        50

avg / total       0.25      0.50      0.33       100

Accuracy 0.5


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Counter({1: 73, 0: 27})
             precision    recall  f1-score   support

          0       0.63      0.34      0.44        50
          1       0.55      0.80      0.65        50

avg / total       0.59      0.57      0.55       100

Accuracy 0.57
__RUNNING ON pan15-authorship-verification-training-dataset-greek-2015-04-19/ pan15-authorship-verification-test-dataset2-greek-2015-04-19/
Counter({1: 100})
             precision    recall  f1-score   support

          0       0.00      0.00      0.00        50
          1       0.50      1.00      0.67        50

avg / total       0.25      0.50      0.33       100

Accuracy 0.5


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Counter({1: 66, 0: 34})
             precision    recall  f1-score   support

          0       0.53      0.36      0.43        50
          1       0.52      0.68      0.59        50

avg / total       0.52      0.52      0.51       100

Accuracy 0.52
__RUNNING ON pan15-authorship-verification-training-dataset-greek-2015-04-19/ pan15-authorship-verification-test-dataset2-greek-2015-04-19/
Counter({1: 99, 0: 1})
             precision    recall  f1-score   support

          0       1.00      0.02      0.04        50
          1       0.51      1.00      0.67        50

avg / total       0.75      0.51      0.36       100

Accuracy 0.51
Counter({1: 78, 0: 22})
             precision    recall  f1-score   support

          0       0.59      0.26      0.36        50
          1       0.53      0.82      0.64        50

avg / total       0.56      0.54      0.50       100

Accuracy 0.54
__RUNNING ON pan15-authorship-verification-training-dataset-greek-2015-04-19/ pan15-authorship-verific

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Counter({1: 97, 0: 3})
             precision    recall  f1-score   support

          0       1.00      0.06      0.11        50
          1       0.52      1.00      0.68        50

avg / total       0.76      0.53      0.40       100

Accuracy 0.53
Counter({1: 96})
             precision    recall  f1-score   support

          0       0.00      0.00      0.00        48
          1       0.50      1.00      0.67        48

avg / total       0.25      0.50      0.33        96

Accuracy 0.5
__RUNNING ON pan14-author-verification-training-corpus-dutch-essays-2014-04-22/ pan14-author-verification-test-corpus2-dutch-reviews-2014-04-22/


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Counter({1: 99, 0: 1})
             precision    recall  f1-score   support

          0       1.00      0.02      0.04        50
          1       0.51      1.00      0.67        50

avg / total       0.75      0.51      0.36       100

Accuracy 0.51
Counter({1: 96})
             precision    recall  f1-score   support

          0       0.00      0.00      0.00        48
          1       0.50      1.00      0.67        48

avg / total       0.25      0.50      0.33        96

Accuracy 0.5
__RUNNING ON pan14-author-verification-training-corpus-dutch-essays-2014-04-22/ pan14-author-verification-test-corpus2-dutch-reviews-2014-04-22/


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Counter({1: 99, 0: 1})
             precision    recall  f1-score   support

          0       1.00      0.02      0.04        50
          1       0.51      1.00      0.67        50

avg / total       0.75      0.51      0.36       100

Accuracy 0.51
Counter({1: 96})
             precision    recall  f1-score   support

          0       0.00      0.00      0.00        48
          1       0.50      1.00      0.67        48

avg / total       0.25      0.50      0.33        96

Accuracy 0.5
__RUNNING ON pan14-author-verification-training-corpus-dutch-essays-2014-04-22/ pan14-author-verification-test-corpus2-dutch-reviews-2014-04-22/


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Counter({1: 99, 0: 1})
             precision    recall  f1-score   support

          0       1.00      0.02      0.04        50
          1       0.51      1.00      0.67        50

avg / total       0.75      0.51      0.36       100

Accuracy 0.51
Counter({1: 95, 0: 1})
             precision    recall  f1-score   support

          0       1.00      0.02      0.04        48
          1       0.51      1.00      0.67        48

avg / total       0.75      0.51      0.36        96

Accuracy 0.5104166666666666
__RUNNING ON pan14-author-verification-training-corpus-dutch-essays-2014-04-22/ pan14-author-verification-test-corpus2-dutch-reviews-2014-04-22/
Counter({1: 99, 0: 1})
             precision    recall  f1-score   support

          0       1.00      0.02      0.04        50
          1       0.51      1.00      0.67        50

avg / total       0.75      0.51      0.36       100

Accuracy 0.51
Counter({1: 96})
             precision    recall  f1-score   support

          0   

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Counter({1: 100})
             precision    recall  f1-score   support

          0       0.00      0.00      0.00        50
          1       0.50      1.00      0.67        50

avg / total       0.25      0.50      0.33       100

Accuracy 0.5


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Counter({1: 96})
             precision    recall  f1-score   support

          0       0.00      0.00      0.00        48
          1       0.50      1.00      0.67        48

avg / total       0.25      0.50      0.33        96

Accuracy 0.5
__RUNNING ON pan14-author-verification-training-corpus-dutch-essays-2014-04-22/ pan14-author-verification-test-corpus2-dutch-reviews-2014-04-22/
Counter({1: 98, 0: 2})
             precision    recall  f1-score   support

          0       1.00      0.04      0.08        50
          1       0.51      1.00      0.68        50

avg / total       0.76      0.52      0.38       100

Accuracy 0.52
Counter({1: 96})
             precision    recall  f1-score   support

          0       0.00      0.00      0.00        48
          1       0.50      1.00      0.67        48

avg / total       0.25      0.50      0.33        96

Accuracy 0.5
__RUNNING ON pan14-author-verification-training-corpus-dutch-essays-2014-04-22/ pan14-author-verification-test-co

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Counter({1: 100})
             precision    recall  f1-score   support

          0       0.00      0.00      0.00        50
          1       0.50      1.00      0.67        50

avg / total       0.25      0.50      0.33       100

Accuracy 0.5


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Counter({1: 94, 0: 2})
             precision    recall  f1-score   support

          0       1.00      0.04      0.08        48
          1       0.51      1.00      0.68        48

avg / total       0.76      0.52      0.38        96

Accuracy 0.5208333333333334
test: [0.35517831293591262, 0.37629937629937638, 0.39673982800667434, 0.35517831293591262, 0.35517831293591262, 0.35517831293591262, 0.35517831293591262, 0.33333333333333331, 0.37629937629937638, 0.33333333333333331]
test avg: 0.3591896811951657
train: [0.37802816901408448, 0.33333333333333331, 0.33333333333333331, 0.33333333333333331, 0.33333333333333331, 0.35607249892964182, 0.33333333333333331, 0.33333333333333331, 0.33333333333333331, 0.37802816901408448]
train avg: 0.3445462170291144
__RUNNING ON pan14-author-verification-training-corpus-dutch-essays-2014-04-22/ pan14-author-verification-test-corpus2-dutch-essays-2014-04-22/
Counter({1: 83, 0: 13})
             precision    recall  f1-score   support

          0       

In [20]:
for train, test in dataset_pairs[-2:]:
    run_experiment_n_times(train, test, 10)

__RUNNING ON pan14-author-verification-training-corpus-greek-articles-2014-04-22/ pan14-author-verification-test-corpus2-greek-articles-2014-04-22/
Counter({1: 100})
             precision    recall  f1-score   support

          0       0.00      0.00      0.00        50
          1       0.50      1.00      0.67        50

avg / total       0.25      0.50      0.33       100

Accuracy 0.5


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Counter({1: 94, 0: 6})
             precision    recall  f1-score   support

          0       1.00      0.12      0.21        50
          1       0.53      1.00      0.69        50

avg / total       0.77      0.56      0.45       100

Accuracy 0.56
__RUNNING ON pan14-author-verification-training-corpus-greek-articles-2014-04-22/ pan14-author-verification-test-corpus2-greek-articles-2014-04-22/
Counter({1: 100})
             precision    recall  f1-score   support

          0       0.00      0.00      0.00        50
          1       0.50      1.00      0.67        50

avg / total       0.25      0.50      0.33       100

Accuracy 0.5


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Counter({1: 95, 0: 5})
             precision    recall  f1-score   support

          0       1.00      0.10      0.18        50
          1       0.53      1.00      0.69        50

avg / total       0.76      0.55      0.44       100

Accuracy 0.55
__RUNNING ON pan14-author-verification-training-corpus-greek-articles-2014-04-22/ pan14-author-verification-test-corpus2-greek-articles-2014-04-22/
Counter({1: 100})
             precision    recall  f1-score   support

          0       0.00      0.00      0.00        50
          1       0.50      1.00      0.67        50

avg / total       0.25      0.50      0.33       100

Accuracy 0.5


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Counter({1: 88, 0: 12})
             precision    recall  f1-score   support

          0       0.75      0.18      0.29        50
          1       0.53      0.94      0.68        50

avg / total       0.64      0.56      0.49       100

Accuracy 0.56
__RUNNING ON pan14-author-verification-training-corpus-greek-articles-2014-04-22/ pan14-author-verification-test-corpus2-greek-articles-2014-04-22/
Counter({1: 100})
             precision    recall  f1-score   support

          0       0.00      0.00      0.00        50
          1       0.50      1.00      0.67        50

avg / total       0.25      0.50      0.33       100

Accuracy 0.5


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Counter({1: 97, 0: 3})
             precision    recall  f1-score   support

          0       1.00      0.06      0.11        50
          1       0.52      1.00      0.68        50

avg / total       0.76      0.53      0.40       100

Accuracy 0.53
__RUNNING ON pan14-author-verification-training-corpus-greek-articles-2014-04-22/ pan14-author-verification-test-corpus2-greek-articles-2014-04-22/
Counter({1: 100})
             precision    recall  f1-score   support

          0       0.00      0.00      0.00        50
          1       0.50      1.00      0.67        50

avg / total       0.25      0.50      0.33       100

Accuracy 0.5


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Counter({1: 94, 0: 6})
             precision    recall  f1-score   support

          0       0.67      0.08      0.14        50
          1       0.51      0.96      0.67        50

avg / total       0.59      0.52      0.40       100

Accuracy 0.52
__RUNNING ON pan14-author-verification-training-corpus-greek-articles-2014-04-22/ pan14-author-verification-test-corpus2-greek-articles-2014-04-22/
Counter({1: 100})
             precision    recall  f1-score   support

          0       0.00      0.00      0.00        50
          1       0.50      1.00      0.67        50

avg / total       0.25      0.50      0.33       100

Accuracy 0.5


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Counter({1: 96, 0: 4})
             precision    recall  f1-score   support

          0       1.00      0.08      0.15        50
          1       0.52      1.00      0.68        50

avg / total       0.76      0.54      0.42       100

Accuracy 0.54
__RUNNING ON pan14-author-verification-training-corpus-greek-articles-2014-04-22/ pan14-author-verification-test-corpus2-greek-articles-2014-04-22/
Counter({1: 100})
             precision    recall  f1-score   support

          0       0.00      0.00      0.00        50
          1       0.50      1.00      0.67        50

avg / total       0.25      0.50      0.33       100

Accuracy 0.5


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Counter({1: 99, 0: 1})
             precision    recall  f1-score   support

          0       1.00      0.02      0.04        50
          1       0.51      1.00      0.67        50

avg / total       0.75      0.51      0.36       100

Accuracy 0.51
__RUNNING ON pan14-author-verification-training-corpus-greek-articles-2014-04-22/ pan14-author-verification-test-corpus2-greek-articles-2014-04-22/
Counter({1: 100})
             precision    recall  f1-score   support

          0       0.00      0.00      0.00        50
          1       0.50      1.00      0.67        50

avg / total       0.25      0.50      0.33       100

Accuracy 0.5


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Counter({1: 100})
             precision    recall  f1-score   support

          0       0.00      0.00      0.00        50
          1       0.50      1.00      0.67        50

avg / total       0.25      0.50      0.33       100

Accuracy 0.5
__RUNNING ON pan14-author-verification-training-corpus-greek-articles-2014-04-22/ pan14-author-verification-test-corpus2-greek-articles-2014-04-22/
Counter({1: 100})
             precision    recall  f1-score   support

          0       0.00      0.00      0.00        50
          1       0.50      1.00      0.67        50

avg / total       0.25      0.50      0.33       100

Accuracy 0.5


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Counter({1: 99, 0: 1})
             precision    recall  f1-score   support

          0       0.00      0.00      0.00        50
          1       0.49      0.98      0.66        50

avg / total       0.25      0.49      0.33       100

Accuracy 0.49
__RUNNING ON pan14-author-verification-training-corpus-greek-articles-2014-04-22/ pan14-author-verification-test-corpus2-greek-articles-2014-04-22/
Counter({1: 100})
             precision    recall  f1-score   support

          0       0.00      0.00      0.00        50
          1       0.50      1.00      0.67        50

avg / total       0.25      0.50      0.33       100

Accuracy 0.5


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Counter({1: 98, 0: 2})
             precision    recall  f1-score   support

          0       1.00      0.04      0.08        50
          1       0.51      1.00      0.68        50

avg / total       0.76      0.52      0.38       100

Accuracy 0.52
test: [0.33333333333333331, 0.33333333333333331, 0.33333333333333331, 0.33333333333333331, 0.33333333333333331, 0.33333333333333331, 0.33333333333333331, 0.33333333333333331, 0.33333333333333331, 0.33333333333333331]
test avg: 0.3333333333333333
train: [0.45436507936507931, 0.43573667711598751, 0.48574100046750812, 0.39673982800667434, 0.40476190476190477, 0.41653982749873159, 0.35517831293591262, 0.33333333333333331, 0.32885906040268453, 0.37629937629937638]
train avg: 0.39875544001871926
__RUNNING ON pan14-author-verification-training-corpus-spanish-articles-2014-04-22/ pan14-author-verification-test-corpus2-spanish-articles-2014-04-22/
Counter({1: 98, 0: 2})
             precision    recall  f1-score   support

          0       1.00  