Skip to content
This repository was archived by the owner on Dec 6, 2023. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,6 @@ environment:
PYTHON_ARCH: "64"
MINICONDA: "C:\\Miniconda35-x64"



install:
# Miniconda is pre-installed in the worker build
- "SET PATH=%MINICONDA%;%MINICONDA%\\Scripts;%PATH%"
Expand Down
92 changes: 92 additions & 0 deletions benchmarks/bench_20newsgroups.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
# Benchmark polynomial classifiers on bag-of-words text classification
# Inspired from: https://github.com/scikit-learn/scikit-learn/blob/master
# /benchmarks/bench_20newsgroups.py

from time import time

import numpy as np
import scipy.sparse as sp

from sklearn.base import clone
from sklearn.metrics import accuracy_score, f1_score
from sklearn.datasets import fetch_20newsgroups_vectorized

from polylearn import (FactorizationMachineClassifier,
PolynomialNetworkClassifier)


estimators = {
'fm-2': FactorizationMachineClassifier(n_components=30,
fit_linear=False,
fit_lower=None,
degree=2,
random_state=0,
max_iter=10),

'polynet-2': PolynomialNetworkClassifier(n_components=15, degree=2,
fit_lower=None,
max_iter=10,
random_state=0)
}

estimators['fm-3'] = clone(estimators['fm-2']).set_params(degree=3)
estimators['polynet-3'] = (clone(estimators['polynet-2'])
.set_params(degree=3, n_components=10))

if __name__ == '__main__':
data_train = fetch_20newsgroups_vectorized(subset="train")
data_test = fetch_20newsgroups_vectorized(subset="test")
X_train = sp.csc_matrix(data_train.data)
X_test = sp.csc_matrix(data_test.data)

y_train = data_train.target == 0 # atheism vs rest
y_test = data_test.target == 0

print("20 newsgroups")
print("=============")
print("X_train.shape = {0}".format(X_train.shape))
print("X_train.format = {0}".format(X_train.format))
print("X_train.dtype = {0}".format(X_train.dtype))
print("X_train density = {0}"
"".format(X_train.nnz / np.product(X_train.shape)))
print("y_train {0}".format(y_train.shape))
print("X_test {0}".format(X_test.shape))
print("X_test.format = {0}".format(X_test.format))
print("X_test.dtype = {0}".format(X_test.dtype))
print("y_test {0}".format(y_test.shape))
print()

print("Classifier Training")
print("===================")
f1, accuracy, train_time, test_time = {}, {}, {}, {}

for name, clf in sorted(estimators.items()):
print("Training %s ... " % name, end="")
t0 = time()
clf.fit(X_train, y_train)
train_time[name] = time() - t0
t0 = time()
y_pred = clf.predict(X_test)
test_time[name] = time() - t0
accuracy[name] = accuracy_score(y_test, y_pred)
f1[name] = f1_score(y_test, y_pred)
print("done")

print("Classification performance:")
print("===========================")
print()
print("%s %s %s %s %s" % ("Classifier".ljust(16),
"train".rjust(10),
"test".rjust(10),
"f1".rjust(10),
"accuracy".rjust(10)))
print("-" * (16 + 4 * 11))
for name in sorted(f1, key=f1.get):
print("%s %s %s %s %s" % (
name.ljust(16),
("%.4fs" % train_time[name]).rjust(10),
("%.4fs" % test_time[name]).rjust(10),
("%.4f" % f1[name]).rjust(10),
("%.4f" % accuracy[name]).rjust(10)))

print()
Loading