Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Fetching contributors…

Cannot retrieve contributors at this time

105 lines (84 sloc) 3.461 kb
from sklearn.linear_model import SGDClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline, make_union
from featureforge.vectorizer import Vectorizer
from iepy.extraction.features import parse_features
_valid_classifiers = {
"sgd": SGDClassifier,
"knn": KNeighborsClassifier,
"svc": SVC,
"randomforest": RandomForestClassifier,
"adaboost": AdaBoostClassifier,
}
_configuration_options = """
classifier
classifier_args
sparse_features
dense_features
""".split()
class RelationExtractionClassifier:
def __init__(self, **config):
# Validate options are present
for option in _configuration_options:
if option not in config:
raise ValueError("Missing configuration "
"option {!r}".format(option))
# Feature extraction
sparse_features = parse_features(config["sparse_features"])
densifier = make_pipeline(Vectorizer(sparse_features, sparse=True),
ClassifierAsFeature())
dense_features = parse_features(config["dense_features"])
vectorization = make_union(densifier,
Vectorizer(dense_features, sparse=False))
# Classifier
try:
classifier = _valid_classifiers[config["classifier"]]
except KeyError:
raise ValueError("Unknown classification algorithm "
"{!r}".format(config["classifier"]))
classifier = classifier(**config["classifier_args"])
self.pipeline = make_pipeline(vectorization, StandardScaler())
self.classifier = classifier
def fit(self, X, y):
X = self.pipeline.fit_transform(X, y)
self.classifier.fit(X, y)
return self
def _chew(self, evidences):
return self.pipeline.transform(evidences)
def _predict(self, X):
return self.classifier.predict(X)
def _rank(self, X):
return self.classifier.decision_function(X).ravel()
def predict(self, evidences):
return self._predict(self._chew(evidences))
def decision_function(self, evidences):
return self._rank(self._chew(evidences))
class ClassifierAsFeature:
"""
A transformation that esentially implements a form of dimensionality
reduction.
This class uses (by default) a fast SGDClassifier configured like a linear
SVM to produce a feature that is the decision function of the classifier.
It's useful to reduce the dimension of bag-of-words feature-set into a
feature that's denser in information.
"""
def __init__(self, classifier=None):
if classifier is None:
classifier = SGDClassifier()
self.classifier = classifier
def fit(self, X, y):
"""
`X` is expected to be an array-like or a sparse matrix.
`y` is expected to be an array-like containing the classes to learn.
"""
self.classifier.fit(X, y)
return self
def transform(self, X, y=None):
"""
`X` is expected to be an array-like or a sparse matrix.
It returns a dense matrix of shape (n_samples, 1).
"""
return self.classifier.decision_function(X).reshape(-1, 1)
Jump to Line
Something went wrong with that request. Please try again.