In [1]:
from sklearn.datasets import make_classification
# Define data set
X, y = make_classification(n_samples=1000, n_features=20,
                           n_informative=10, n_redundant=10, random_state=7)
# Summarize the dataset
print(X.shape, y.shape)


(1000, 20) (1000,)


In [2]:
from numpy import mean
from numpy import std
from sklearn.datasets import make_classification
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.linear_model import LogisticRegression

# Define Dataset
X, y = make_classification(n_samples=1000, n_features=20,
                           n_informative=10, n_redundant=10, random_state=7)
# Define the model
model = LogisticRegression()
# Evaluate Model
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
n_scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=1)
# Report performance
print('Accuracy: %.3f (%.3f)' % (mean(n_scores), std(n_scores)))


Accuracy: 0.824 (0.034)


In [8]:
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
steps = [('pca', PCA(n_components=10)), ('m', LogisticRegression())]
model = Pipeline(steps=steps)
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
n_scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=1)
print('Accuracy: %.3f (%.3f)' % (mean(n_scores), std(n_scores)))


Accuracy: 0.824 (0.034)


In [9]:
# Evaluate singular value decomposition algorithm for classification
from sklearn.decomposition import TruncatedSVD
# Define the pipeline
steps = [('svd', TruncatedSVD(n_components=10)), ('m', LogisticRegression())]
model = Pipeline(steps=steps)
# Evaluate the model
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
n_scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=1)
# Report performance
print('Accuracy: %.3f (%.3f)' % (mean(n_scores), std(n_scores)))


Accuracy: 0.824 (0.034)


In [7]:
# Linear Discriminant Analysis or LDA, is a multi-class classification algorithm that can be used for dimensionality reduction.
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
# Define the pipeline
steps = [('lda', LinearDiscriminantAnalysis(n_components=1)),
         ('m', LogisticRegression())]
model = Pipeline(steps=steps)
# Evaluate the model
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
n_scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=1)

# report performance
print('Accuracy: %.3f (%.3f)' % (mean(n_scores), std(n_scores)))


Accuracy: 0.825 (0.034)


In [12]:
# Isomap is a dimensionality reduction technique that is used to reduce the dimensionality of a dataset.
from sklearn.manifold import Isomap
steps = [('iso', Isomap(n_components=10)), ('m', LogisticRegression())]
model = Pipeline(steps=steps)
# Evaluate the model
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
n_scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=1)

# report performance
print('Accuracy: %.3f (%.3f)' % (mean(n_scores), std(n_scores)))


Accuracy: 0.888 (0.029)


In [10]:
# Locally Linear Embedding or LLE, is a multi-class classification algorithm that can be used for dimensionality reduction.

from sklearn.manifold import LocallyLinearEmbedding
# Define the pipeline
steps = [('lle', LocallyLinearEmbedding(n_components=10)),
         ('m', LogisticRegression())]
model = Pipeline(steps=steps)
# Evaluate the model
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
n_scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=1)

# report performance
print('Accuracy: %.3f (%.3f)' % (mean(n_scores), std(n_scores)))


Accuracy: 0.886 (0.028)
