diff --git a/imblearn/metrics/classification.py b/imblearn/metrics/classification.py
index d9a24d2fc..7973f8cfb 100644
--- a/imblearn/metrics/classification.py
+++ b/imblearn/metrics/classification.py
@@ -17,6 +17,7 @@
 from inspect import getcallargs
 
 import numpy as np
+import scipy as sp
 
 from sklearn.metrics.classification import (_check_targets, _prf_divide,
                                             precision_recall_fscore_support)
@@ -460,20 +461,27 @@ def geometric_mean_score(y_true,
                          y_pred,
                          labels=None,
                          pos_label=1,
-                         average='binary',
-                         sample_weight=None):
+                         average='multiclass',
+                         sample_weight=None,
+                         correction=0.0):
     """Compute the geometric mean
 
-    The geometric mean is the squared root of the product of the sensitivity
-    and specificity. This measure tries to maximize the accuracy on each
-    of the two classes while keeping these accuracies balanced.
+    The geometric mean (G-mean) is the root of the product of class-wise
+    sensitivity. This measure tries to maximize the accuracy on each of the
+    classes while keeping these accuracies balanced. For binary classification
+    G-mean is the squared root of the product of the sensitivity
+    and specificity. For multi-class problems it is a higher root of the
+    product of sensitivity for each class.
 
-    The specificity is the ratio ``tp / (tp + fn)`` where ``tp`` is the number
-    of true positives and ``fn`` the number of false negatives. The specificity
-    is intuitively the ability of the classifier to find all the positive
-    samples.
+    For compatibility with other imbalance performance measures, G-mean can
+    calculated for each class separately on a one-vs-rest basis when
+    ``average != 'multiclass'``.
 
-    The best value is 1 and the worst value is 0.
+    The best value is 1 and the worst value is 0. Traditionally if at least one
+    class is unrecognized by the classifier, G-mean resolves to zero. To
+    alleviate this property, for highly multi-class the sensitivity of
+    unrecognized classes can be "corrected" to be a user specified value
+    (instead of zero). This option works only if ``average == 'multiclass'``.
 
     Parameters
     ----------
@@ -492,11 +500,11 @@ def geometric_mean_score(y_true,
 
     pos_label : str or int, optional (default=1)
         The class to report if ``average='binary'`` and the data is binary.
-        If the data are multiclass or multilabel, this will be ignored;
+        If the data are multiclass, this will be ignored;
         setting ``labels=[pos_label]`` and ``average != 'binary'`` will report
         scores for that label only.
 
-    average : str or None, optional (default=None)
+    average : str or None, optional (default=``'multiclass'``)
         If ``None``, the scores for each class are returned. Otherwise, this
         determines the type of averaging performed on the data:
 
@@ -519,24 +527,26 @@ def geometric_mean_score(y_true,
             meaningful for multilabel classification where this differs from
             :func:`accuracy_score`).
 
-    warn_for : tuple or set, for internal use
-        This determines which warnings will be made in the case that this
-        function is being used to return only one of its metrics.
-
     sample_weight : ndarray, shape (n_samples, )
         Sample weights.
 
+    correction: float, optional (default=0.0)
+        Substitutes sensitivity of unrecognized classes from zero to a given
+        value.
+
     Returns
     -------
-    geometric_mean : float (if ``average`` = None) or ndarray, \
-        shape (n_unique_labels, )
+    geometric_mean : float
 
     Examples
     --------
-    >>> import numpy as np
     >>> from imblearn.metrics import geometric_mean_score
     >>> y_true = [0, 1, 2, 0, 1, 2]
     >>> y_pred = [0, 2, 1, 0, 0, 1]
+    >>> geometric_mean_score(y_true, y_pred)
+    0.0
+    >>> geometric_mean_score(y_true, y_pred, correction=0.001)
+    0.010000000000000004
     >>> geometric_mean_score(y_true, y_pred, average='macro')
     0.47140452079103168
     >>> geometric_mean_score(y_true, y_pred, average='micro')
@@ -556,18 +566,66 @@ def geometric_mean_score(y_true,
        36(3), (2003), pp 849-851.
 
     """
-    sen, spe, _ = sensitivity_specificity_support(
-        y_true,
-        y_pred,
-        labels=labels,
-        pos_label=pos_label,
-        average=average,
-        warn_for=('specificity', 'specificity'),
-        sample_weight=sample_weight)
+    if average is None or average != 'multiclass':
+        sen, spe, _ = sensitivity_specificity_support(
+            y_true,
+            y_pred,
+            labels=labels,
+            pos_label=pos_label,
+            average=average,
+            warn_for=('specificity', 'specificity'),
+            sample_weight=sample_weight)
+
+        LOGGER.debug('The sensitivity and specificity are : %s - %s' %
+                     (sen, spe))
+
+        return np.sqrt(sen * spe)
+    else:
+        present_labels = unique_labels(y_true, y_pred)
+
+        if labels is None:
+            labels = present_labels
+            n_labels = None
+        else:
+            n_labels = len(labels)
+            labels = np.hstack([labels, np.setdiff1d(present_labels, labels,
+                                                     assume_unique=True)])
+
+        le = LabelEncoder()
+        le.fit(labels)
+        y_true = le.transform(y_true)
+        y_pred = le.transform(y_pred)
+        sorted_labels = le.classes_
+
+        # labels are now from 0 to len(labels) - 1 -> use bincount
+        tp = y_true == y_pred
+        tp_bins = y_true[tp]
+
+        if sample_weight is not None:
+            tp_bins_weights = np.asarray(sample_weight)[tp]
+        else:
+            tp_bins_weights = None
+
+        if len(tp_bins):
+            tp_sum = bincount(tp_bins, weights=tp_bins_weights,
+                              minlength=len(labels))
+        else:
+            # Pathological case
+            true_sum = tp_sum = np.zeros(len(labels))
+        if len(y_true):
+            true_sum = bincount(y_true, weights=sample_weight,
+                                minlength=len(labels))
+
+        # Retain only selected labels
+        indices = np.searchsorted(sorted_labels, labels[:n_labels])
+        tp_sum = tp_sum[indices]
+        true_sum = true_sum[indices]
 
-    LOGGER.debug('The sensitivity and specificity are : %s - %s' % (sen, spe))
+        recall = _prf_divide(tp_sum, true_sum, "recall", "true", None,
+                             "recall")
+        recall[recall == 0] = correction
 
-    return np.sqrt(sen * spe)
+        return sp.stats.mstats.gmean(recall)
 
 
 def make_index_balanced_accuracy(alpha=0.1, squared=True):
@@ -616,7 +674,14 @@ def compute_score(*args, **kwargs):
             # Get the signature of the sens/spec function
             sens_spec_sig = signature(sensitivity_specificity_support)
             # Filter the inputs required by the sens/spec function
-            tags_sens_spec = sens_spec_sig.bind(**tags_scoring_func)
+            if scoring_func != geometric_mean_score:
+                tags_sens_spec = sens_spec_sig.bind(**tags_scoring_func)
+            else:
+                # Adapt the parameters to sens/spec function
+                del tags_scoring_func['correction']
+                if "average" not in kwargs:
+                    tags_scoring_func['average'] = 'binary'
+                tags_sens_spec = sens_spec_sig.bind(**tags_scoring_func)
             # Call the sens/spec function
             sen, spe, _ = sensitivity_specificity_support(
                 *tags_sens_spec.args,
diff --git a/imblearn/metrics/tests/test_classification.py b/imblearn/metrics/tests/test_classification.py
index ec5c1685a..395765210 100644
--- a/imblearn/metrics/tests/test_classification.py
+++ b/imblearn/metrics/tests/test_classification.py
@@ -6,7 +6,7 @@
 
 import numpy as np
 
-from numpy.testing import (assert_allclose, assert_array_equal,
+from numpy.testing import (assert_array_almost_equal, assert_array_equal,
                            assert_no_warnings, assert_equal,
                            assert_almost_equal, assert_raises)
 from sklearn.utils.testing import assert_warns_message, ignore_warnings
@@ -27,7 +27,6 @@
 from imblearn.metrics import classification_report_imbalanced
 
 RND_SEED = 42
-R_TOL = 1e-2
 
 ###############################################################################
 # Utilities for testing
@@ -88,8 +87,8 @@ def test_sensitivity_specificity_score_binary():
     # detailed measures for each class
     sen, spe, sup = sensitivity_specificity_support(
         y_true, y_pred, average=None)
-    assert_allclose(sen, [0.88, 0.68], rtol=R_TOL)
-    assert_allclose(spe, [0.68, 0.88], rtol=R_TOL)
+    assert_array_almost_equal(sen, [0.88, 0.68], 2)
+    assert_array_almost_equal(spe, [0.68, 0.88], 2)
     assert_array_equal(sup, [25, 25])
 
     # individual scoring function that can be used for grid search: in the
@@ -99,10 +98,10 @@ def test_sensitivity_specificity_score_binary():
             'average': 'binary'
     }, assert_no_warnings)]:
         sen = my_assert(sensitivity_score, y_true, y_pred, **kwargs)
-        assert_allclose(sen, 0.68, rtol=R_TOL)
+        assert_array_almost_equal(sen, 0.68, 2)
 
         spe = my_assert(specificity_score, y_true, y_pred, **kwargs)
-        assert_allclose(spe, 0.88, rtol=R_TOL)
+        assert_array_almost_equal(spe, 0.88, 2)
 
 
 def test_sensitivity_specificity_f_binary_single_class():
@@ -125,22 +124,22 @@ def test_sensitivity_specificity_extra_labels():
     # No average: zeros in array
     actual = specificity_score(
         y_true, y_pred, labels=[0, 1, 2, 3, 4], average=None)
-    assert_allclose([1., 0.67, 1., 1., 1.], actual, rtol=R_TOL)
+    assert_array_almost_equal([1., 0.67, 1., 1., 1.], actual, 2)
 
     # Macro average is changed
     actual = specificity_score(
         y_true, y_pred, labels=[0, 1, 2, 3, 4], average='macro')
-    assert_allclose(np.mean([1., 0.67, 1., 1., 1.]), actual, rtol=R_TOL)
+    assert_array_almost_equal(np.mean([1., 0.67, 1., 1., 1.]), actual, 2)
 
     # Check for micro
     actual = specificity_score(
         y_true, y_pred, labels=[0, 1, 2, 3, 4], average='micro')
-    assert_allclose(15. / 16., actual, rtol=R_TOL)
+    assert_array_almost_equal(15. / 16., actual)
 
     # Check for weighted
     actual = specificity_score(
         y_true, y_pred, labels=[0, 1, 2, 3, 4], average='macro')
-    assert_allclose(np.mean([1., 0.67, 1., 1., 1.]), actual, rtol=R_TOL)
+    assert_array_almost_equal(np.mean([1., 0.67, 1., 1., 1.]), actual, 2)
 
 
 @ignore_warnings
@@ -152,7 +151,7 @@ def test_sensitivity_specificity_ignored_labels():
     specificity_13 = partial(specificity_score, y_true, y_pred, labels=[1, 3])
     specificity_all = partial(specificity_score, y_true, y_pred, labels=None)
 
-    assert_allclose([1., 0.33], specificity_13(average=None), rtol=R_TOL)
+    assert_array_almost_equal([1., 0.33], specificity_13(average=None), 2)
     assert_almost_equal(
         np.mean([1., 0.33]), specificity_13(average='macro'), 2)
     assert_almost_equal(
@@ -224,20 +223,84 @@ def test_geometric_mean_support_binary():
 
 
 def test_geometric_mean_multiclass():
-    """Test geometric mean for multiclass classification task"""
+    y_true = [0, 0, 1, 1]
+    y_pred = [0, 0, 1, 1]
+    assert_almost_equal(geometric_mean_score(y_true, y_pred), 1.0, 10)
+
+    y_true = [0, 0, 0, 0]
+    y_pred = [1, 1, 1, 1]
+    assert_almost_equal(geometric_mean_score(y_true, y_pred), 0.0, 10)
+
+    cor = 0.001
+    y_true = [0, 0, 0, 0]
+    y_pred = [0, 0, 0, 0]
+    assert_almost_equal(geometric_mean_score(y_true, y_pred, correction=cor),
+                        1.0, 10)
+
+    y_true = [0, 0, 0, 0]
+    y_pred = [1, 1, 1, 1]
+    assert_almost_equal(geometric_mean_score(y_true, y_pred, correction=cor),
+                        cor, 10)
+
+    y_true = [0, 0, 1, 1]
+    y_pred = [0, 1, 1, 0]
+    assert_almost_equal(geometric_mean_score(y_true, y_pred, correction=cor),
+                        0.5, 10)
+
+    y_true = [0, 1, 2, 0, 1, 2]
+    y_pred = [0, 2, 1, 0, 0, 1]
+    assert_almost_equal(geometric_mean_score(y_true, y_pred, correction=cor),
+                        (1*cor*cor)**(1.0/3.0), 10)
+
+    y_true = [0, 1, 2, 3, 4, 5]
+    y_pred = [0, 1, 2, 3, 4, 5]
+    assert_almost_equal(geometric_mean_score(y_true, y_pred, correction=cor),
+                        1, 10)
+
+    y_true = [0, 1, 1, 1, 1, 0]
+    y_pred = [0, 0, 1, 1, 1, 1]
+    assert_almost_equal(geometric_mean_score(y_true, y_pred, correction=cor),
+                        (0.5*0.75)**0.5, 10)
+
+    y_true = [0, 1, 2, 0, 1, 2]
+    y_pred = [0, 2, 1, 0, 0, 1]
+    assert_almost_equal(geometric_mean_score(y_true, y_pred, average='macro'),
+                        0.47140452079103168, 10)
+    assert_almost_equal(geometric_mean_score(y_true, y_pred, average='micro'),
+                        0.47140452079103168, 10)
+    assert_almost_equal(geometric_mean_score(y_true, y_pred,
+                                             average='weighted'),
+                        0.47140452079103168, 10)
+    assert_almost_equal(geometric_mean_score(y_true, y_pred, average=None),
+                        [0.8660254, 0.0, 0.0])
+
+    y_true = [0, 1, 2, 0, 1, 2]
+    y_pred = [0, 1, 1, 0, 0, 1]
+    assert_almost_equal(geometric_mean_score(y_true, y_pred, labels=[0, 1]),
+                        0.70710678118654752, 10)
+    assert_almost_equal(geometric_mean_score(y_true, y_pred, labels=[0, 1],
+                                             sample_weight=[1, 2, 1, 1, 2, 1]),
+                        0.70710678118654752, 10)
+    assert_almost_equal(geometric_mean_score(y_true, y_pred, labels=[0, 1],
+                                             sample_weight=[1, 2, 1, 1, 2, 1],
+                                             average='weighted'),
+                        0.3333333333, 10)
+
     y_true, y_pred, _ = make_prediction(binary=False)
 
+    geo_mean = geometric_mean_score(y_true, y_pred)
+    assert_array_almost_equal(geo_mean, 0.41, 2)
+
     # Compute the geometric mean for each of the classes
     geo_mean = geometric_mean_score(y_true, y_pred, average=None)
-    assert_allclose(geo_mean, [0.85, 0.29, 0.7], rtol=R_TOL)
+    assert_array_almost_equal(geo_mean, [0.85, 0.29, 0.7], 2)
 
     # average tests
     geo_mean = geometric_mean_score(y_true, y_pred, average='macro')
     assert_almost_equal(geo_mean, 0.68, 2)
 
     geo_mean = geometric_mean_score(y_true, y_pred, average='weighted')
-    assert_allclose(geo_mean, 0.65, rtol=R_TOL)
-
+    assert_array_almost_equal(geo_mean, 0.65, 2)
 
 def test_iba_geo_mean_binary():
     """Test to test the iba using the geometric mean"""