## Manifold learning in Power System Transient Stability Assessment

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
from sklearn import metrics
from sklearn import preprocessing
from sklearn import svm
from sklearn.pipeline import Pipeline
from sklearn.manifold import Isomap, TSNE, MDS
from sklearn.manifold import SpectralEmbedding as SE
from sklearn.manifold import LocallyLinearEmbedding as LLE
from sklearn.decomposition import PCA, KernelPCA, TruncatedSVD
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics.pairwise import paired_distances

In [None]:
# Using experimental HalvingRandomSearchCV for hyperparameters optimization.
from sklearn.experimental import enable_halving_search_cv # noqa
from sklearn.model_selection import HalvingRandomSearchCV

In [None]:
from scipy import stats

In [None]:
from annealing import simulated_annealing

In [None]:
import warnings
warnings.filterwarnings(action='ignore', category=FutureWarning)

In [None]:
# Figure aesthetics
sns.set(context='paper', style='white', font_scale=1.1)
sns.set_style('ticks', {'xtick.direction':'in', 'ytick.direction':'in'})

#### Power System Transient Stability Analysis Data (IEEE Benchmark Test Case)

In [None]:
data = pd.read_csv('GridDictionary2.csv')
data.head()

In [None]:
# print(data.columns.values)

In [None]:
# Percentage of "ones" in the "Stability" column.
print('There is {:.1f}% of unstable cases in the dataset!'
      .format(data['Stability'].sum()/float(len(data['Stability']))*100.))

In [None]:
no_features = len(data.columns) - 1
X_data = data.iloc[:, 0:no_features]  # features
print('X_data', X_data.shape)
y_data = data['Stability']
print('y_data', y_data.shape)

#### Stratify shuffle split

In [None]:
# Split dataset into train and test sets.
X_train, X_test, y_train, y_test = train_test_split(
    X_data, y_data, train_size=0.8, stratify=y_data, shuffle=True)

In [None]:
print('X_train', X_train.shape)
print('y_train', y_train.shape)
print('X_test', X_test.shape)
print('y_test', y_test.shape)

In [None]:
print('Unstable cases in training dataset: {:.1f}%:'
      .format(np.sum(y_train)/float(len(y_train))*100.))
print('Unstable cases in testing dataset {:.1f}%:'
      .format(np.sum(y_test)/float(len(y_test))*100.))

In [None]:
# Stable cases index values.
idx_stable = y_test==0

#### Scoring models using cross-validated metrics

In [None]:
def score_default(X, y):
    """ Scoring default SVC model. """
    # Score with default hyperparameters.
    scores = cross_val_score(svm.SVC(kernel='rbf', class_weight='balanced'), 
                             X, y, cv=3, scoring='f1')
    print('Score using 3-fold CV: {:g} +/- {:g}'
          .format(np.mean(scores), np.std(scores)))

In [None]:
def score_optimized(X, y, C, gamma):
    """ Scoring optimized SVC model. """
    # Score with the optimized hyperparameters.
    scores = cross_val_score(svm.SVC(C=C, gamma=gamma, kernel='rbf', 
                                     class_weight='balanced'), 
                             X, y, cv=3, scoring='f1', n_jobs=-1)
    print('Score using 3-fold CV: {:g} +/- {:g}'
          .format(np.mean(scores), np.std(scores)))

In [None]:
def plot_projection(X, idx):
    fig, ax = plt.subplots(figsize=(4,4))
    ax.scatter(X[idx,0], X[idx,1], 
            s=20, c='green', marker='o', edgecolors='k', alpha=0.5, label='Stable')
    ax.scatter(X[~idx,0], X[~idx,1], 
            s=20, c='red', marker='o', edgecolors='k', alpha=0.5, label='Unstable')
    ax.legend(loc='best')
    ax.set_xlabel('First component')
    ax.set_ylabel('Second component')
    ax.grid()
    fig.tight_layout()
    plt.show()

#### StandardScaler

In [None]:
# Standardize the input data.
scaler = preprocessing.StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

#### Following dimmensionality reduction methods are examined:

* **PCA** (principal components analysis)
* **kPCA** (kernelized principal components analysis)
* **tSVD** (truncated singular value decomposition)
* **iMAP** (isomap embedding)
* **t-SNE** (T-distributed stochastic neighbor embedding)
* **LLE** (locally linear embedding)
* **LLE:LTSA** (locally linear embedding with local tangent space alignment algorithm)
* **LLE:H** (locally linear embedding with Hessian eigenmap method)
* **SE** (spectral embedding)
* **MDS** (multi-dimensional scaling)

Some of these have their own hyperparameters (e.g. KernelPCA) which can be optimized together with the hyperparameters of the SVC estimator. This will be shown for the KernelPCA method.

### Hyperparameter optimization with simulated annealing

Simulated annealing is used for optimizing hyperparameters of the SVC estimator only.

In [None]:
def svc_cv(C, gamma, X_data, y_data):
    """ 
    SVC cross validation.
    
    This function will instantiate a SVC classifier with a 
    RBF kernel and hyper-parameters C and gamma. Combined
    with data and targets it will be used to perform cross
    validation. The goal is to find combinations of C and
    gamma that maximizes the `f1` scoring metric.
    
    Arguments
    ---------
    C: float
        Regularization parameter (penalty is a squared l2). 
    gamma: float
        Kernel coefficient.
    
    Returns
    -------
    cval: float
        Mean value of the score from the cross-validation.
    """
    # Instantiate SVC with RBF kernel and class weight balancing.
    estimator = svm.SVC(C=C, gamma=gamma, kernel='rbf', 
                        class_weight='balanced', probability=True)
    # Score the estimator using cross validation.
    cval = cross_val_score(estimator, X_data, y_data, 
                           scoring='f1', cv=2, n_jobs=-1)
    
    return -cval.mean()

In [None]:
def optimize_svc(X_data, y_data, x0, bounds=None, coolC=10., sigma=1., 
                 burn=10, eps=1e-6, verbose=False):
    """ Simulated Annealing to optimize SVC hyperparameters. """
    
    def svc_crossval(expC, expGamma):
        """ 
        Wrapper for the SVC cross-validation function.
        """
        # Exploring parameters in 'log' space.
        C = 10**expC
        gamma = 10**expGamma
        model_instance = svc_cv(C, gamma, X_data, y_data)
        
        return model_instance

    # Simulated Annealing.
    x, E = simulated_annealing(svc_crossval, x0, bounds=bounds,
                               C=coolC, sigma=sigma, burn=burn, eps=eps,
                               verbose=verbose)
    
    return x, E

In [None]:
# Temperature schedule.
x = np.arange(start=1, stop=200, step=1)
T0 = 1.
y1 = T0*np.exp(-x/10)
y2 = T0*0.9**(x)
fig, ax = plt.subplots(1, 2, figsize=(6.5,2.5))
ax[0].plot(x, y1, lw=2, label='T0*exp(-k/10)')
ax[0].plot(x, y2, lw=2, label='T0*0.9**k')
ax[0].set_xlabel('Iterations')
ax[0].set_ylabel('Temperature')
ax[0].legend(loc='upper right')
ax[0].grid()
ax[1].semilogy(x, y1, lw=2, label='T0*exp(-k/10)')
ax[1].semilogy(x, y2, lw=2, label='T0*0.9**k')
ax[1].set_xlabel('Iterations')
ax[1].set_ylabel('Temperature')
ax[1].legend(loc='lower left')
ax[1].grid(which='both')
fig.tight_layout()
plt.show()

In [None]:
# Initial values (C, gamma) for SVC optimization.
x0 = np.array([1., -2.])

### Principal components analysis (PCA)

In [None]:
# How many components are needed for the 90% explained variance?
pca = PCA(n_components=0.9, svd_solver='full').fit(X_train)
X_pca = pca.transform(X_test)
print(X_pca.shape)
# Score with the 90% explained variance.
score_default(X_pca, y_test)

In [None]:
# Dimensionality reduction.
# Set `whiten` to True/False to see if there is any difference.
pca = PCA(n_components=2, whiten=True).fit(X_train)
X_pca = pca.transform(X_test)

In [None]:
plot_projection(X_pca, idx_stable)

In [None]:
score_default(X_pca, y_test)

In [None]:
# Optimize SVC hyperparameters using simulated annealing.
x, E = optimize_svc(X_pca, y_test, x0, burn=20, eps=1e-10)
print(x, E)

In [None]:
score_optimized(X_pca, y_test, C=10**x[0], gamma=10**x[1])

### Dimensionality reduction using KernelPCA

Hyperparameters of the KernelPCA are optimized by means of the **unsupervised** learning.

In [None]:
def kpca_metric(X_data, gamma=None):
    kpca = KernelPCA(n_components=2, kernel='rbf', gamma=gamma,
                     fit_inverse_transform=True, n_jobs=-1)
    X_embedded = kpca.fit_transform(X_data)
    X_reconstructed = kpca.inverse_transform(X_embedded)
    # Compute paired distances between embedding and its reconstruction.
    distances = paired_distances(X_data, X_reconstructed, metric='euclidean')

    return distances.sum()

In [None]:
def optimize_kpca(X_data, x0, coolC=10., sigma=1., 
                      burn=10, eps=1e-6):
    """ Simulated Annealing for kPCA hyperparameters. """

    def kpca(expGamma):
        # Exploring parameters in 'log' space.
        gamma_kpca = 10**expGamma
        model_instance = kpca_metric(X_data, gamma=gamma_kpca)
        
        return model_instance

    # Simulated Annealing.
    x, E = simulated_annealing(kpca, x0, C=coolC, sigma=sigma, 
                               burn=burn, eps=eps)
    
    return x, E

In [None]:
kpca_metric(X_train)

In [None]:
# Initial value for the Gamma-kPCA
xk0 = np.array([-2.])
# Optimize kPCA hyperparameters using simulated annealing.
x, E = optimize_kpca(X_train, xk0, burn=20, eps=1e-10)
print(x, E)

In [None]:
kpca_opt = KernelPCA(n_components=2, kernel='rbf', 
                     gamma=10**x[0], # optimal kernel value
                     n_jobs=-1).fit(X_train)
X_kpca_opt = kpca_opt.transform(X_test)

In [None]:
plot_projection(X_kpca_opt, idx_stable)

In [None]:
score_default(X_kpca_opt, y_test)

In [None]:
# Optimize SVC hyperparameters using simulated annealing.
x, E = optimize_svc(X_kpca_opt, y_test, x0, burn=20, eps=1e-10)
print(x, E)

In [None]:
score_optimized(X_kpca_opt, y_test, C=10**x[0], gamma=10**x[1])

Simulated annealing is here used for optimizing the hyperparameters of the KernelPCA and the SVC estimator **at the same time**, using the **supervised** learning.

In [None]:
def kpca_svc_cv(C, gamma, gamma_kpca, X_data, y_data):
    """ SVC cross validation with KernelPCA. """
    # Instantiate SVC with RBF kernel and class weight balancing.
    estimator = svm.SVC(C=C, gamma=gamma, kernel='rbf', 
                        class_weight='balanced', probability=True)
    reduction = KernelPCA(n_components=2, kernel='rbf', gamma=gamma_kpca)
    pipe = Pipeline([
        ('kpca', reduction),
        ('svm', estimator)
    ])
    # Score the estimator using cross validation.
    cval = cross_val_score(pipe, X_data, y_data, 
                           scoring='f1', cv=2, n_jobs=-1)
    
    return -cval.mean()

In [None]:
def optimize_kpca_svc(X_data, y_data, x0, 
                      coolC=10., sigma=1., burn=10, eps=1e-6):
    """ Simulated Annealing for SVC & kPCA hyperparameters. """

    def kpca_svc_crossval(expC, expGamma, expGammkPCA):
        """ Wrapper for the cross-validation function. """
        # Exploring parameters in 'log' space.
        C = 10**expC
        gamma = 10**expGamma
        gamma_kpca = 10**expGammkPCA
        model_instance = kpca_svc_cv(C, gamma, gamma_kpca, X_data, y_data)
        
        return model_instance

    # Simulated Annealing.
    x, E = simulated_annealing(kpca_svc_crossval, x0, 
                               C=coolC, sigma=sigma, burn=burn, eps=eps)
    
    return x, E

In [None]:
# Initial values (C-SVM, Gamma-SVM, Gamma-kPCA)
xk0 = np.array([1., -2., -1.])
# Optimize kPCA & SVC hyperparameters using simulated annealing.
x, E = optimize_kpca_svc(X_train, y_train, xk0, burn=20, eps=1e-10)
print(x, E)

In [None]:
kpca_opt = KernelPCA(n_components=2, kernel='rbf', 
                     gamma=10**x[2], # optimal kernel value
                     n_jobs=-1).fit(X_train)
X_kpca_opt = kpca_opt.transform(X_test)

In [None]:
plot_projection(X_kpca_opt, idx_stable)

In [None]:
score_default(X_kpca_opt, y_test)

In [None]:
score_optimized(X_kpca_opt, y_test, C=10**x[0], gamma=10**x[1])

#### KernelPCA without the kPCA kernel optimization

In [None]:
# Reduce features in the dataset down to only 2 principal components.
kpca = KernelPCA(n_components=2, kernel='rbf', n_jobs=-1).fit(X_train)
X_kpca = kpca.transform(X_test)

In [None]:
plot_projection(X_kpca, idx_stable)

In [None]:
score_default(X_kpca, y_test)

In [None]:
# Apply bounds on SVC hyperparameters in log-space.
# Parameter C: 0.001 to 10000.
# Parameter gamma: 0.0001 to 10.
# Optimize SVC hyperparameters using simulated annealing.
x, E = optimize_svc(X_kpca, y_test, 
                    x0, bounds=[(-3,4), (-4,1)], 
                    burn=20, eps=1e-14, verbose=True)
print(x, E)

In [None]:
score_optimized(X_kpca, y_test, C=10**x[0], gamma=10**x[1])

#### Random Search CV for SVC hyperparameters optimization.

In [None]:
# Search type: ['random', 'halving']
search = 'random'

reduction = KernelPCA(n_components=2, kernel='rbf')
estimator = svm.SVC(kernel='rbf', class_weight='balanced', probability=True)
pipe = Pipeline([
    ('kpca', reduction),
    ('svm', estimator)
])
parameters = {
    'kpca__gamma': stats.expon(scale=.1),
    'svm__C':stats.expon(scale=100), 
    'svm__gamma':stats.expon(scale=.1)
}
if search == 'random':
    model = RandomizedSearchCV(estimator=pipe,
                               param_distributions=parameters,
                               n_iter=200,
                               cv=2, scoring='f1',
                               refit=True, n_jobs=-1)
elif search == 'halving':
    model = HalvingRandomSearchCV(estimator=pipe, 
                                  param_distributions=parameters, 
                                  cv=2, scoring='f1',
                                  refit=True, n_jobs=-1)
else:
    raise NotImplementedError(f'Search method: {search} not recognized.')
model.fit(X_train, y_train)

In [None]:
model.best_params_

In [None]:
scores = cross_val_score(model, X_test, y_test, cv=3, scoring='f1', n_jobs=-1)
print('Average score using 3-fold CV: {:.4f} +/- {:.4f}'
      .format(np.mean(scores), np.std(scores)))

### Dimensionality reduction using truncated SVD

In [None]:
svd = TruncatedSVD(n_components=2).fit(X_train)
X_svd = svd.transform(X_test)

In [None]:
plot_projection(X_svd, idx_stable)

In [None]:
score_default(X_svd, y_test)

In [None]:
# Optimize SVC hyperparameters using simulated annealing.
x, E = optimize_svc(X_svd, y_test, x0, burn=20, eps=1e-10)
print(x, E)

In [None]:
score_optimized(X_svd, y_test, C=10**x[0], gamma=10**x[1])

### Isomap embedding

In [None]:
iso = Isomap(n_components=2, n_neighbors=100, n_jobs=-1).fit(X_train)
X_iso = iso.transform(X_test)

In [None]:
plot_projection(X_iso, idx_stable)

In [None]:
score_default(X_iso, y_test)

In [None]:
# Optimize SVC hyperparameters using simulated annealing.
x, E = optimize_svc(X_iso, y_test, x0, burn=20, eps=1e-10)
print(x, E)

In [None]:
score_optimized(X_iso, y_test, C=10**x[0], gamma=10**x[1])

### t-distributed Stochastic Neighbor Embedding (t-SNE)

#### t-SNE with optimized hyperparameters

Here hyperparameters of the t-SNE (i.e. `perplexity`) is optimized.

In [None]:
def tsne_metric(perplex, X_data):
    """ KL divergence of the TSNE as a metric. """
    reduction = TSNE(n_components=2, perplexity=perplex, n_jobs=-1)
    # Score the estimator.
    cval = reduction.fit(X_data)
    
    return cval.kl_divergence_

In [None]:
def optimize_tsne(X_data, x0, coolC=10., sigma=1., burn=10, eps=1e-6, 
                  verbose=False):
    """ Simulated Annealing for t-SNE hyperparameters. """

    def tsne(expPerplex):
        """ Wrapper for the cross-validation function. """
        from numpy import exp

        # Exploring parameter perplexity in natural logarithm (ln) space.
        perplex = exp(expPerplex)
        instance = tsne_metric(perplex, X_data)
        
        return instance

    # Simulated Annealing.
    x, E = simulated_annealing(tsne, x0, 
                               C=coolC, sigma=sigma, burn=burn, 
                               eps=eps, verbose=verbose)
    
    return x, E

In [None]:
# Initial values (perplexity)
xt0 = np.array([3.])
# Optimize t-SNE & SVC hyperparameters using simulated annealing.
x, E = optimize_tsne(X_train, xt0, burn=20, eps=1e-10, verbose=True)
print(x, E)

In [None]:
X_tsne_opt = TSNE(n_components=2, 
                  perplexity=np.exp(x[0]), # optimal value
                  n_jobs=-1).fit_transform(X_test)

In [None]:
plot_projection(X_tsne_opt, idx_stable)

In [None]:
score_default(X_tsne_opt, y_test)

In [None]:
# Optimize SVC hyperparameters using simulated annealing.
x, E = optimize_svc(X_tsne_opt, y_test, x0, burn=20, eps=1e-10)
print(x, E)

In [None]:
score_optimized(X_tsne_opt, y_test, C=10**x[0], gamma=10**x[1])

#### t-SNE without hyperparameters optimization

Hyperparameters of the t-SNE are not being optimized.

In [None]:
X_tsne = TSNE(n_components=2, n_jobs=-1).fit_transform(X_test)

In [None]:
plot_projection(X_tsne, idx_stable)

In [None]:
score_default(X_tsne, y_test)

In [None]:
# Optimize SVC hyperparameters using simulated annealing.
x, E = optimize_svc(X_tsne, y_test, x0, burn=20, eps=1e-10)
print(x, E)

In [None]:
score_optimized(X_tsne, y_test, C=10**x[0], gamma=10**x[1])

### Locally Linear Embedding (LLE)

In [None]:
lle = LLE(n_components=2, n_neighbors=10, 
          method='standard', n_jobs=-1).fit(X_train)
X_lle = lle.transform(X_test)

In [None]:
plot_projection(X_lle, idx_stable)

In [None]:
score_default(X_lle, y_test)

In [None]:
# Optimize SVC hyperparameters using simulated annealing.
x, E = optimize_svc(X_lle, y_test, x0, burn=20, eps=1e-10)
print(x, E)

In [None]:
score_optimized(X_lle, y_test, C=10**x[0], gamma=10**x[1])

### Locally linear embedding with LTSA

In [None]:
ltsa = LLE(n_components=2, n_neighbors=10, 
           method='ltsa', eigen_solver='dense', n_jobs=-1).fit(X_train)
X_ltsa = ltsa.transform(X_test)

In [None]:
plot_projection(X_ltsa, idx_stable)

In [None]:
score_default(X_ltsa, y_test)

In [None]:
# Optimize SVC hyperparameters using simulated annealing.
x, E = optimize_svc(X_ltsa, y_test, x0, burn=20, eps=1e-10)
print(x, E)

In [None]:
score_optimized(X_ltsa, y_test, C=10**x[0], gamma=10**x[1])

### Locally linear embedding with Hessian

In [None]:
hess = LLE(n_components=2, n_neighbors=100, 
           method='hessian', n_jobs=-1).fit(X_train)
X_hess = hess.transform(X_test)

In [None]:
plot_projection(X_hess, idx_stable)

In [None]:
score_default(X_hess, y_test)

In [None]:
# Optimize SVC hyperparameters using simulated annealing.
x, E = optimize_svc(X_hess, y_test, x0, burn=20, eps=1e-10)
print(x, E)

In [None]:
score_optimized(X_hess, y_test, C=10**x[0], gamma=10**x[1])

### Modified locally linear embedding (MLLE)

In [None]:
mlle = LLE(n_components=2, n_neighbors=50, 
           method='modified', n_jobs=-1).fit(X_train)
X_mlle = lle.transform(X_test)

In [None]:
plot_projection(X_mlle, idx_stable)

In [None]:
score_default(X_mlle, y_test)

In [None]:
# Optimize SVC hyperparameters using simulated annealing.
x, E = optimize_svc(X_mlle, y_test, x0, burn=20, eps=1e-10)
print(x, E)

In [None]:
score_optimized(X_mlle, y_test, C=10**x[0], gamma=10**x[1])

### Spectral embedding

In [None]:
X_spec = SE(n_components=2, affinity='nearest_neighbors', 
            n_jobs=-1).fit_transform(X_test)

In [None]:
plot_projection(X_spec, idx_stable)

In [None]:
score_default(X_spec, y_test)

In [None]:
# Optimize SVC hyperparameters using simulated annealing.
x, E = optimize_svc(X_spec, y_test, x0, burn=20, eps=1e-10)
print(x, E)

In [None]:
score_optimized(X_spec, y_test, C=10**x[0], gamma=10**x[1])

### Multi-dimensional scaling (MDS)

In [None]:
# Set `metric` to True/False to see if there is any difference.
X_mds = MDS(n_components=2, metric=True, n_jobs=-1).fit_transform(X_test)

In [None]:
plot_projection(X_mds, idx_stable)

In [None]:
score_default(X_mds, y_test)

In [None]:
# Optimize SVC hyperparameters using simulated annealing.
x, E = optimize_svc(X_mds, y_test, x0, burn=20, eps=1e-10)
print(x, E)

In [None]:
score_optimized(X_mds, y_test, C=10**x[0], gamma=10**x[1])

### Precision-Recall Tradeoff

In [None]:
# TODO: Use "best" SVC parameters here.
C = 10**x[0]
gamma = 10**x[1]
best_parameters = {'C': C, 'gamma': gamma}

In [None]:
y_probas = cross_val_predict(svm.SVC(**best_parameters, probability=True, 
                                     class_weight='balanced'), 
                             X_test, y_test, cv=3, 
                             method='predict_proba',
                             n_jobs=-1)

In [None]:
y_scores = y_probas[:,1]  # score == probability of positive class
precisions, recalls, thresholds = metrics.precision_recall_curve(y_test, y_scores)

In [None]:
fig, ax = plt.subplots(figsize=(4.5,4.5))
ax.plot(precisions, recalls, lw=2, label='SVC')
default = np.argmin(np.abs(thresholds - 0.5))
ax.plot(precisions[default], recalls[default], '^', c='k', markersize=10, 
        label='Threshold = 0.5', fillstyle='none', mew=2)
ax.set_xlabel('Precision')
ax.set_ylabel('Recall')
ax.legend(loc='best')
ax.grid()
fig.tight_layout()
plt.show()

#### Plot decision region for test samples with only two features

In [None]:
# TODO: Select projected data.
X_test_best = X_mds

In [None]:
# TODO: Generate SVC from selected projection.
svc_best = svm.SVC(C=C, gamma=gamma, kernel='rbf', 
                   class_weight='balanced', 
                   probability=True).fit(X_test_best, y_test)

In [None]:
h = 0.1; delta = 0.01
x_min, x_max = X_test_best[:,0].min() - h, X_test_best[:,0].max() + h
y_min, y_max = X_test_best[:,1].min() - h, X_test_best[:,1].max() + h
xx, yy = np.meshgrid(np.arange(x_min, x_max, delta), np.arange(y_min, y_max, delta))
Z = svc_best.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:,1]
Z = Z.reshape(xx.shape)

In [None]:
fig, ax = plt.subplots(figsize=(6,5))
ax.contourf(xx, yy, Z, cmap=plt.cm.RdYlBu, alpha=0.8)
ax.scatter(X_test_best[idx_stable,0], X_test_best[idx_stable,1], 
           s=30, c='green', marker='o', edgecolors='k', alpha=0.5, label='Stable')
ax.scatter(X_test_best[~idx_stable,0], X_test_best[~idx_stable,1], 
           s=30, c='red', marker='o', edgecolors='k', alpha=0.5, label='Unstable')
ax.legend(loc='upper left')
ax.set_xlabel('1st component')
ax.set_ylabel('2nd component')
ax.set_xlim(x_min, x_max)
ax.set_ylim(y_min, y_max)
ax.grid()
plt.show()

#### Computing environment

In [None]:
import sys, IPython, sklearn, scipy, matplotlib
print("Notebook createad with:\
      \nPython {:s}\nIPython {:s}\nScikit-learn {:s}\nPandas {:s}\nNumpy \
      {:s}\nScipy {:s}\nMatplotlib {:s}"\
      .format(sys.version[:5], IPython.__version__, sklearn.__version__, 
              pd.__version__, np.__version__, scipy.__version__, 
              matplotlib.__version__))