diff --git a/.gitignore b/.gitignore
index 569c3277c5f..73d7ddd9b33 100644
--- a/.gitignore
+++ b/.gitignore
@@ -160,3 +160,4 @@ sktime/contrib/nothing_to_see_here.py
 sktime/distances/elastic_cython.html
 *.html
 sktime/contrib/distance_based/old_time_series_neighbors.py
+sktime/contrib/debug.py
diff --git a/sktime/base/_base.py b/sktime/base/_base.py
index b1b75665d0d..ea32fc4351d 100644
--- a/sktime/base/_base.py
+++ b/sktime/base/_base.py
@@ -49,7 +49,6 @@ class name: BaseEstimator
 __all__ = ["BaseEstimator", "BaseObject"]
 
 import inspect
-
 from copy import deepcopy
 
 from sklearn import clone
@@ -147,13 +146,13 @@ def get_tag(self, tag_name, tag_value_default=None, raise_error=True):
         Returns
         -------
         tag_value :
-            Value of the `tag_name` tag in self. If not found, returns
-            `tag_value_default`.
+            Value of the `tag_name` tag in self. If not found, returns an error if
+            raise_error is True, otherwise it returns `tag_value_default`.
 
         Raises
         ------
-        ValueError if raise_error is True and tag_name does not exist
-            i.e., if tag_name is not in self.get_tags().keys()
+        ValueError if raise_error is True i.e. if tag_name is not in self.get_tags(
+        ).keys()
         """
         collected_tags = self.get_tags()
 
diff --git a/sktime/classification/base.py b/sktime/classification/base.py
index 0d48d814d7b..8767fdea9ba 100644
--- a/sktime/classification/base.py
+++ b/sktime/classification/base.py
@@ -1,18 +1,14 @@
 # -*- coding: utf-8 -*-
 # copyright: sktime developers, BSD-3-Clause License (see LICENSE file)
 """
-Base class template for time series classifier scitype.
+Abstract base class for time series classifiers.
 
     class name: BaseClassifier
 
-Scitype defining methods:
+Defining methods:
     fitting         - fit(self, X, y)
     predicting      - predict(self, X)
-
-State:
-    fitted model/strategy   - by convention, any attributes ending in "_"
-    fitted state flag       - is_fitted (property)
-    fitted state inspection - check_is_fitted()
+                    - predict_proba(self, X)
 
 Inspection methods:
     hyper-parameter inspection  - get_params()
@@ -26,60 +22,39 @@ class name: BaseClassifier
 
 __all__ = [
     "BaseClassifier",
-    "classifier_list",
 ]
-__author__ = ["mloning", "fkiraly"]
+__author__ = ["mloning", "fkiraly", "TonyBagnall", "MatthewMiddlehurst"]
 
 import numpy as np
 
 from sktime.base import BaseEstimator
+from sktime.utils.validation import check_n_jobs
 from sktime.utils.validation.panel import check_X, check_X_y
 
-"""
-Main list of classifiers extending this class. For clarity, some utility classifiers,
-such as Proximity Stump, are not listed.
-"""
-classifier_list = [
-    # in classification/distance_based
-    "ProximityForest",
-    # "KNeighborsTimeSeriesClassifier",
-    # "ElasticEnsemble",
-    # "ShapeDTW",
-    # in classification/dictionary_based
-    "BOSS",
-    "ContractableBOSS",
-    "TemporalDictionaryEnsemble",
-    "WEASEL",
-    "MUSE",
-    # in classification/interval_based
-    "RandomIntervalSpectralForest",
-    "TimeSeriesForest",
-    "CanonicalIntervalForest",
-    # in classification/shapelet_based
-    "ShapeletTransformClassifier",
-    "ROCKET",
-    "MrSEQLClassifier",
-]
-
 
 class BaseClassifier(BaseEstimator):
-    """Base time series classifier template class.
-
-    The base classifier specifies the methods and method
-    signatures that all forecasters have to implement.
+    """Abstract base class for time series classifiers.
 
-    Specific implementations of these methods is deferred to concrete
-    forecasters.
+    The base classifier specifies the methods and method signatures that all
+    classifiers have to implement.
     """
 
     _tags = {
         "coerce-X-to-numpy": True,
         "coerce-X-to-pandas": False,
+        "capability:multivariate": False,
+        "capability:unequal_length": False,
+        "capability:missing_values": False,
+        "capability:train_estimate": False,
+        "capability:contractable": False,
+        "capability:multithreading": False,
     }
 
     def __init__(self):
-        self._is_fitted = False
-
+        self.classes_ = []
+        self.n_classes_ = 0
+        self._class_dictionary = {}
+        self._threads_to_use = 1
         super(BaseClassifier, self).__init__()
 
     def fit(self, X, y):
@@ -87,11 +62,13 @@ def fit(self, X, y):
 
         Parameters
         ----------
-        X : 3D np.array, array-like or sparse matrix
-                of shape = [n_instances,n_dimensions,series_length]
-                or shape = [n_instances,series_length]
-            or single-column pd.DataFrame with pd.Series entries
-        y : array-like, shape =  [n_instances] - the class labels.
+        X : 2D np.array (univariate, equal length series) of shape = [n_instances,
+        series_length]
+            or 3D np.array (any number of dimensions, equal length series) of shape =
+            [n_instances,n_dimensions,series_length]
+            or pd.DataFrame with each column a dimension, each cell a pd.Series (any
+            number of dimensions, equal or unequal length series)
+        y : 1D np.array of shape =  [n_instances] - the class labels.
 
         Returns
         -------
@@ -103,12 +80,31 @@ def fit(self, X, y):
         Changes state by creating a fitted model that updates attributes
         ending in "_" and sets is_fitted flag to True.
         """
-        coerce_to_numpy = self.get_tag("coerce-X-to-numpy", False)
-        coerce_to_pandas = self.get_tag("coerce-X-to-pandas", False)
+        coerce_to_numpy = self.get_tag("coerce-X-to-numpy")
+        coerce_to_pandas = self.get_tag("coerce-X-to-pandas")
+        allow_multivariate = self.get_tag("capability:multivariate")
         X, y = check_X_y(
-            X, y, coerce_to_numpy=coerce_to_numpy, coerce_to_pandas=coerce_to_pandas
+            X,
+            y,
+            coerce_to_numpy=coerce_to_numpy,
+            coerce_to_pandas=coerce_to_pandas,
+            enforce_univariate=not allow_multivariate,
         )
 
+        multithread = self.get_tag("capability:multithreading")
+        if multithread:
+            try:
+                self._threads_to_use = check_n_jobs(self.n_jobs)
+            except NameError:
+                raise AttributeError(
+                    "self.n_jobs must be set if capability:multithreading is True"
+                )
+
+        self.classes_ = np.unique(y)
+        self.n_classes_ = self.classes_.shape[0]
+        for index, classVal in enumerate(self.classes_):
+            self._class_dictionary[classVal] = index
+
         self._fit(X, y)
 
         # this should happen last
@@ -121,58 +117,74 @@ def predict(self, X):
 
         Parameters
         ----------
-        X : 3D np.array, array-like or sparse matrix
-                of shape = [n_instances,n_dimensions,series_length]
-                or shape = [n_instances,series_length]
-            or single-column pd.DataFrame with pd.Series entries
+        X : 2D np.array (univariate, equal length series) of shape = [n_instances,
+        series_length]
+            or 3D np.array (any number of dimensions, equal length series) of shape =
+            [n_instances,n_dimensions,series_length]
+            or pd.DataFrame with each column a dimension, each cell a pd.Series (any
+            number of dimensions, equal or unequal length series)
 
         Returns
         -------
-        y : array-like, shape =  [n_instances] - predicted class labels
+        y : 1D np.array of shape =  [n_instances] - predicted class labels
         """
-        coerce_to_numpy = self.get_tag("coerce-X-to-numpy", False)
-        coerce_to_pandas = self.get_tag("coerce-X-to-pandas", False)
-        X = check_X(
-            X, coerce_to_numpy=coerce_to_numpy, coerce_to_pandas=coerce_to_pandas
-        )
         self.check_is_fitted()
 
-        y = self._predict(X)
+        coerce_to_numpy = self.get_tag("coerce-X-to-numpy")
+        coerce_to_pandas = self.get_tag("coerce-X-to-pandas")
+        allow_multivariate = self.get_tag("capability:multivariate")
+        X = check_X(
+            X,
+            coerce_to_numpy=coerce_to_numpy,
+            coerce_to_pandas=coerce_to_pandas,
+            enforce_univariate=not allow_multivariate,
+        )
 
-        return y
+        return self._predict(X)
 
     def predict_proba(self, X):
         """Predicts labels probabilities for sequences in X.
 
         Parameters
         ----------
-        X : 3D np.array, array-like or sparse matrix
-                of shape = [n_instances,n_dimensions,series_length]
-                or shape = [n_instances,series_length]
-            or single-column pd.DataFrame with pd.Series entries
+        X : 2D np.array (univariate, equal length series) of shape = [n_instances,
+        series_length]
+            or 3D np.array (any number of dimensions, equal length series) of shape =
+            [n_instances,n_dimensions,series_length]
+            or pd.DataFrame with each column a dimension, each cell a pd.Series (any
+            number of dimensions, equal or unequal length series)
 
         Returns
         -------
-        y : array-like, shape =  [n_instances, n_classes] - predictive pmf
+        y : 2D array of shape =  [n_instances, n_classes] - estimated class
+        probabilities
         """
-        coerce_to_numpy = self.get_tag("coerce-X-to-numpy", False)
-        coerce_to_pandas = self.get_tag("coerce-X-to-pandas", False)
+        self.check_is_fitted()
+
+        coerce_to_numpy = self.get_tag("coerce-X-to-numpy")
+        coerce_to_pandas = self.get_tag("coerce-X-to-pandas")
+        allow_multivariate = self.get_tag("capability:multivariate")
         X = check_X(
-            X, coerce_to_numpy=coerce_to_numpy, coerce_to_pandas=coerce_to_pandas
+            X,
+            coerce_to_numpy=coerce_to_numpy,
+            coerce_to_pandas=coerce_to_pandas,
+            enforce_univariate=not allow_multivariate,
         )
-        self.check_is_fitted()
+
         return self._predict_proba(X)
 
-    def score(self, X, y):
+    def score(self, X, y) -> float:
         """Scores predicted labels against ground truth labels on X.
 
         Parameters
         ----------
-        X : 3D np.array, array-like or sparse matrix
-                of shape = [n_instances,n_dimensions,series_length]
-                or shape = [n_instances,series_length]
-            or single-column pd.DataFrame with pd.Series entries
-        y : array-like, shape =  [n_instances] - predicted class labels
+        X : 2D np.array (univariate, equal length series) of shape = [n_instances,
+        series_length]
+            or 3D np.array (any number of dimensions, equal length series) of shape =
+            [n_instances,n_dimensions,series_length]
+            or pd.DataFrame with each column a dimension, each cell a pd.Series (any
+            number of dimensions, equal or unequal length series)
+        y : array-like, shape =  [n_instances] - actual class labels
 
         Returns
         -------
@@ -185,14 +197,14 @@ def score(self, X, y):
     def _fit(self, X, y):
         """Fit time series classifier to training data.
 
-        core logic
+        Abstract method, must be implemented.
 
         Parameters
         ----------
         X : 3D np.array, array-like or sparse matrix
                 of shape = [n_instances,n_dimensions,series_length]
                 or shape = [n_instances,series_length]
-            or single-column pd.DataFrame with pd.Series entries
+            or pd.DataFrame with each column a dimension, each cell a pd.Series
         y : array-like, shape = [n_instances] - the class labels
 
         Returns
@@ -205,46 +217,52 @@ def _fit(self, X, y):
         Changes state by creating a fitted model that updates attributes
         ending in "_" and sets is_fitted flag to True.
         """
-        raise NotImplementedError("abstract method")
+        raise NotImplementedError(
+            "_fit is a protected abstract method, it must be implemented."
+        )
 
     def _predict(self, X):
         """Predicts labels for sequences in X.
 
-        core logic
+        Abstract method, must be implemented.
 
         Parameters
         ----------
         X : 3D np.array, array-like or sparse matrix
                 of shape = [n_instances,n_dimensions,series_length]
                 or shape = [n_instances,series_length]
-            or single-column pd.DataFrame with pd.Series entries
+            or pd.DataFrame with each column a dimension, each cell a pd.Series
 
         Returns
         -------
         y : array-like, shape =  [n_instances] - predicted class labels
         """
-        distributions = self.predict_proba(X)
-        predictions = []
-        for instance_index in range(0, X.shape[0]):
-            distribution = distributions[instance_index]
-            prediction = np.argmax(distribution)
-            predictions.append(prediction)
-        y = self.label_encoder.inverse_transform(predictions)
-
-        return y
+        raise NotImplementedError(
+            "_predict is a protected abstract method, it must be implemented."
+        )
 
     def _predict_proba(self, X):
         """Predicts labels probabilities for sequences in X.
 
+        Default behaviour is to call _predict and set the predicted class probability
+        to 1, other class probabilities to 0. Override if better estimates are
+        obtainable.
+
         Parameters
         ----------
         X : 3D np.array, array-like or sparse matrix
                 of shape = [n_instances,n_dimensions,series_length]
                 or shape = [n_instances,series_length]
-            or single-column pd.DataFrame with pd.Series entries
+            or pd.DataFrame with each column a dimension, each cell a pd.Series
 
         Returns
         -------
-        y : array-like, shape =  [n_instances, n_classes] - predictive pmf
+        y : array-like, shape =  [n_instances, n_classes] - estimated probabilities
+        of class membership.
         """
-        raise NotImplementedError("abstract method")
+        dists = np.zeros((X.shape[0], self.n_classes_))
+        preds = self._predict(X)
+        for i in range(0, X.shape[0]):
+            dists[i, self._class_dictionary[preds[i]]] = 1
+
+        return dists
diff --git a/sktime/forecasting/base/_base.py b/sktime/forecasting/base/_base.py
index f2479325a5f..be356537254 100644
--- a/sktime/forecasting/base/_base.py
+++ b/sktime/forecasting/base/_base.py
@@ -26,27 +26,26 @@ class name: BaseForecaster
 """
 
 
-__author__ = ["Markus Löning", "@big-o", "fkiraly"]
+__author__ = ["mloning", "@big-o", "fkiraly"]
 __all__ = ["BaseForecaster"]
 
-from sktime.base import BaseEstimator
-
 from contextlib import contextmanager
 from warnings import warn
 
 import numpy as np
 import pandas as pd
 
-from sktime.utils.datetime import _shift
-from sktime.utils.validation.forecasting import check_X
-from sktime.utils.validation.forecasting import check_alpha
-from sktime.utils.validation.forecasting import check_cv
-from sktime.utils.validation.forecasting import check_fh
-from sktime.utils.validation.forecasting import check_y_X
-from sktime.utils.validation.series import check_series, check_equal_time_index
-
+from sktime.base import BaseEstimator
 from sktime.datatypes import convert_to, mtype
-
+from sktime.utils.datetime import _shift
+from sktime.utils.validation.forecasting import (
+    check_alpha,
+    check_cv,
+    check_fh,
+    check_X,
+    check_y_X,
+)
+from sktime.utils.validation.series import check_equal_time_index, check_series
 
 DEFAULT_ALPHA = 0.05
 
diff --git a/sktime/registry/_tags.py b/sktime/registry/_tags.py
index 608a7b8182e..4867ec0932c 100644
--- a/sktime/registry/_tags.py
+++ b/sktime/registry/_tags.py
@@ -43,7 +43,6 @@
 
 import pandas as pd
 
-
 ESTIMATOR_TAG_REGISTER = [
     (
         "ignores-exogeneous-X",
@@ -89,13 +88,13 @@
     ),
     (
         "X-y-must-have-same-index",
-        ["forecaster", "classifier", "regressor"],
+        ["forecaster", "regressor"],
         "bool",
         "do X/y in fit/update and X/fh in predict have to be same indices?",
     ),
     (
         "enforce_index_type",
-        ["forecaster", "classifier", "regressor"],
+        ["forecaster", "regressor"],
         "type",
         "passed to input checks, input conversion index type to enforce",
     ),
@@ -139,13 +138,13 @@
         "capability:multivariate",
         "classifier",
         "bool",
-        "can classifier classify time series with 2 or more variables?",
+        "can the classifier classify time series with 2 or more variables?",
     ),
     (
         "capability:unequal_length",
         "classifier",
         "bool",
-        "can classifier handle unequal length time series?",
+        "can the classifier handle unequal length time series?",
     ),
     # "capability:missing_values" is same as "handles-missing-data" tag.
     # They are kept distinct intentionally for easier TSC refactoring.
@@ -168,6 +167,12 @@
         "bool",
         "contract time setting, i.e. does the estimator support limiting max fit time?",
     ),
+    (
+        "capability:multithreading",
+        "classifier",
+        "bool",
+        "can the classifier set n_jobs to use multiple threads?",
+    ),
     (
         "coerce-X-to-pandas",
         ["classifier", "transformer"],