kedro-org · rashidakanchwala · Jan 17, 2023 · Jan 11, 2023 · Jan 11, 2023 · Jan 12, 2023
@@ -6,7 +6,7 @@ package:
 	cd package && python setup.py clean --all
 	cd package && python setup.py sdist bdist_wheel
 
-build: 
+build:
 	rm -rf build package/build package/dist package/kedro_viz/html pip-wheel-metadata package/kedro_viz.egg-info
 	npm run build
 	cp -R build package/kedro_viz/html
@@ -16,10 +16,10 @@ PROJECT_PATH ?= demo-project
 run:
 	PYTHONPATH=$(shell pwd)/package python3 package/kedro_viz/server.py $(PROJECT_PATH)
 
-pytest: 
+pytest:
 	cd package && pytest --cov-fail-under=100
 
-e2e-tests: 
+e2e-tests:
 	cd package && behave
 
 lint: format-fix lint-check
@@ -34,7 +34,7 @@ format-check:
 
 lint-check:
 	pylint --rcfile=package/.pylintrc -j 0 package/kedro_viz
-	pylint --rcfile=package/.pylintrc -j 0 --disable=protected-access,missing-docstring,redefined-outer-name,no-self-use,invalid-name,too-few-public-methods,no-member,unused-argument,duplicate-code package/tests
+	pylint --rcfile=package/.pylintrc -j 0 --disable=protected-access,missing-docstring,redefined-outer-name,invalid-name,too-few-public-methods,no-member,unused-argument,duplicate-code package/tests
 	pylint --rcfile=package/.pylintrc -j 0 --disable=missing-docstring,no-name-in-module,unused-argument package/features
 	flake8 --config=package/.flake8 package
 	mypy --config-file=package/mypy.ini package

diff --git a/demo-project/conf/base/catalog_08_reporting.yml b/demo-project/conf/base/catalog_08_reporting.yml
@@ -25,7 +25,7 @@ reporting.feature_importance:
   versioned: true
 
 reporting.cancellation_policy_grid:
-  type: demo_project.extras.datasets.image_dataset.ImageDataSet
+  type: image_dataset.ImageDataSet
   filepath: ${base_location}/08_reporting/cancellation_policy_grid.png
 
 reporting.confusion_matrix:

@@ -50,7 +50,7 @@ confidence=
 # --enable=similarities". If you want to run only the classes checker, but have
 # no Warning level messages displayed, use"--disable=all --enable=classes
 # --disable=W"
-disable=ungrouped-imports,bad-continuation,attribute-defined-outside-init,too-many-arguments,duplicate-code,fixme
+disable=ungrouped-imports,attribute-defined-outside-init,too-many-arguments,duplicate-code,fixme
 
 # Enable the message, report, category or checker with the given id(s). You can
 # either give multiple identifier separated by comma (,) or put this option
@@ -92,35 +92,22 @@ max-nested-blocks=5
 
 [BASIC]
 
-# Naming hint for argument names
-argument-name-hint=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$
-
 # Regular expression matching correct argument names
 argument-rgx=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$
 
-# Naming hint for attribute names
-attr-name-hint=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$
 
 # Regular expression matching correct attribute names
 attr-rgx=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$
 
 # Bad variable names which should always be refused, separated by a comma
 bad-names=foo,bar,baz,toto,tutu,tata
 
-# Naming hint for class attribute names
-class-attribute-name-hint=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$
-
 # Regular expression matching correct class attribute names
 class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$
 
-# Naming hint for class names
-class-name-hint=[A-Z_][a-zA-Z0-9]+$
-
 # Regular expression matching correct class names
 class-rgx=[A-Z_][a-zA-Z0-9]+$
 
-# Naming hint for constant names
-const-name-hint=(([A-Z_][A-Z0-9_]*)|(__.*__))$
 
 # Regular expression matching correct constant names
 const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$
@@ -129,9 +116,6 @@ const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$
 # ones are exempt.
 docstring-min-length=-1
 
-# Naming hint for function names
-function-name-hint=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$
-
 # Regular expression matching correct function names
 function-rgx=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$
 
@@ -141,21 +125,12 @@ good-names=i,j,k,ex,Run,_,id
 # Include a hint for the correct naming format with invalid-name
 include-naming-hint=no
 
-# Naming hint for inline iteration names
-inlinevar-name-hint=[A-Za-z_][A-Za-z0-9_]*$
-
 # Regular expression matching correct inline iteration names
 inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$
 
-# Naming hint for method names
-method-name-hint=(([a-z][a-z0-9_]{2,60})|(_[a-z0-9_]*))$
-
 # Regular expression matching correct method names
 method-rgx=(([a-z][a-z0-9_]{2,60})|(_[a-z0-9_]*))$
 
-# Naming hint for module names
-module-name-hint=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
-
 # Regular expression matching correct module names
 module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
 
@@ -171,9 +146,6 @@ no-docstring-rgx=^_
 # to this list to register other decorators that produce valid properties.
 property-classes=abc.abstractproperty
 
-# Naming hint for variable names
-variable-name-hint=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$
-
 # Regular expression matching correct variable names
 variable-rgx=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$
 
@@ -199,12 +171,6 @@ max-line-length=100
 # Maximum number of lines in a module
 max-module-lines=1000
 
-# List of optional constructs for which whitespace checking is disabled. `dict-
-# separator` is used to allow tabulation in dicts, etc.: {1  : 1,\n222: 2}.
-# `trailing-comma` allows a space between comma and closing bracket: (a, ).
-# `empty-line` allows space-only lines.
-no-space-check=trailing-comma,dict-separator
-
 # Allow the body of a class to be on the same line as the declaration if body
 # contains single statement.
 single-line-class-stmt=no

@@ -85,7 +85,7 @@ def format_run_tracking_data(
         Dictionary with formatted tracking data for selected runs
 
     Example:
-        >>> from kedro.extras.datasets.tracking import MetricsDataSet
+        >>> from kedro_datasets.tracking import MetricsDataSet
         >>> tracking_data = {
         >>>     'My Favorite Sprint': {
         >>>         'bootstrap':0.8

@@ -121,7 +121,7 @@ class Config:
         schema_extra = {
             "example": {
                 "filepath": "/my-kedro-project/data/03_primary/master_table.csv",
-                "type": "kedro.extras.datasets.pandas.csv_dataset.CSVDataSet",
+                "type": "kedro_datasets.pandas.csv_dataset.CSVDataSet",
                 "run_command": 'kedro run --to-outputs="master_table"',
             }
         }

diff --git a/package/kedro_viz/data_access/repositories/tracking_datasets.py b/package/kedro_viz/data_access/repositories/tracking_datasets.py
@@ -10,7 +10,7 @@
     TRACKING_DATASET_GROUPS,
     TrackingDatasetGroup,
     TrackingDatasetModel,
-    get_dataset_type,
+    get_dataset_module_class,
 )
 
 
@@ -34,12 +34,14 @@ def add_tracking_dataset(
         self, dataset_name: str, dataset: AbstractVersionedDataSet
     ) -> None:
         tracking_dataset = TrackingDatasetModel(dataset_name, dataset)
-        tracking_dataset_group = TRACKING_DATASET_GROUPS[tracking_dataset.dataset_type]
+        tracking_dataset_group = TRACKING_DATASET_GROUPS[
+            tracking_dataset.dataset_module_class
+        ]
         self.tracking_datasets_by_group[tracking_dataset_group].append(tracking_dataset)
 
     @staticmethod
     def is_tracking_dataset(dataset) -> bool:
         return (
-            get_dataset_type(dataset) in TRACKING_DATASET_GROUPS
+            get_dataset_module_class(dataset) in TRACKING_DATASET_GROUPS
             and dataset._version is not None
         )
@@ -10,12 +10,21 @@
 from typing import Any, Dict, Optional, Tuple
 
 from kedro import __version__
-from kedro.extras.datasets import (  # Safe since ImportErrors are suppressed within kedro.
-    json,
-    matplotlib,
-    plotly,
-    tracking,
-)
+
+try:
+    from kedro_datasets import (  # isort:skip
+        json,
+        matplotlib,
+        plotly,
+        tracking,
+    )
+except ImportError:
+    from kedro.extras.datasets import (  # Safe since ImportErrors are suppressed within kedro.
+        json,
+        matplotlib,
+        plotly,
+        tracking,
+    )
 from kedro.io import DataCatalog
 from kedro.io.core import get_filepath_str
 from kedro.pipeline import Pipeline
@@ -150,16 +159,16 @@ def matplotlib_writer_load(dataset: matplotlib.MatplotlibWriter) -> str:
             base64_bytes = base64.b64encode(img_file.read())
         return base64_bytes.decode("utf-8")
 
-    matplotlib.MatplotlibWriter._load = matplotlib_writer_load  # type:ignore
+    matplotlib.MatplotlibWriter._load = matplotlib_writer_load
 
 if hasattr(plotly, "JSONDataSet"):
-    plotly.JSONDataSet._load = json.JSONDataSet._load  # type:ignore
+    plotly.JSONDataSet._load = json.JSONDataSet._load
 
 if hasattr(plotly, "PlotlyDataSet"):
-    plotly.PlotlyDataSet._load = json.JSONDataSet._load  # type:ignore
+    plotly.PlotlyDataSet._load = json.JSONDataSet._load
 
 if hasattr(tracking, "JSONDataSet"):
-    tracking.JSONDataSet._load = json.JSONDataSet._load  # type:ignore
+    tracking.JSONDataSet._load = json.JSONDataSet._load
 
 if hasattr(tracking, "MetricsDataSet"):
-    tracking.MetricsDataSet._load = json.JSONDataSet._load  # type:ignore
+    tracking.MetricsDataSet._load = json.JSONDataSet._load
@@ -51,13 +51,12 @@ class TrackingDatasetGroup(str, Enum):
     JSON = "json"
 
 
-# pylint: disable=line-too-long
 TRACKING_DATASET_GROUPS = {
-    "kedro.extras.datasets.plotly.plotly_dataset.PlotlyDataSet": TrackingDatasetGroup.PLOT,
-    "kedro.extras.datasets.plotly.json_dataset.JSONDataSet": TrackingDatasetGroup.PLOT,
-    "kedro.extras.datasets.matplotlib.matplotlib_writer.MatplotlibWriter": TrackingDatasetGroup.PLOT,
-    "kedro.extras.datasets.tracking.metrics_dataset.MetricsDataSet": TrackingDatasetGroup.METRIC,
-    "kedro.extras.datasets.tracking.json_dataset.JSONDataSet": TrackingDatasetGroup.JSON,
+    "plotly.plotly_dataset.PlotlyDataSet": TrackingDatasetGroup.PLOT,
+    "plotly.json_dataset.JSONDataSet": TrackingDatasetGroup.PLOT,
+    "matplotlib.matplotlib_writer.MatplotlibWriter": TrackingDatasetGroup.PLOT,
+    "tracking.metrics_dataset.MetricsDataSet": TrackingDatasetGroup.METRIC,
+    "tracking.json_dataset.JSONDataSet": TrackingDatasetGroup.JSON,
 }
 
 
@@ -67,14 +66,15 @@ class TrackingDatasetModel:
 
     dataset_name: str
     # dataset is the actual dataset instance, whereas dataset_type is a string.
-    # e.g. "kedro.extras.datasets.tracking.metrics_dataset.MetricsDataSet"
+    # e.g. "kedro_datasets.tracking.metrics_dataset.MetricsDataSet"
     dataset: AbstractVersionedDataSet
     dataset_type: str = field(init=False)
     # runs is a mapping from run_id to loaded data.
     runs: Dict[str, Any] = field(init=False, default_factory=dict)
 
     def __post_init__(self):
         self.dataset_type = get_dataset_type(self.dataset)
+        self.dataset_module_class = get_dataset_module_class(self.dataset)
 
     def load_tracking_data(self, run_id: str):
         # No need to reload data that has already been loaded.
@@ -93,7 +93,10 @@ def load_tracking_data(self, run_id: str):
             return
 
         try:
-            if TRACKING_DATASET_GROUPS[self.dataset_type] is TrackingDatasetGroup.PLOT:
+            if (
+                TRACKING_DATASET_GROUPS[self.dataset_module_class]
+                is TrackingDatasetGroup.PLOT
+            ):
                 self.runs[run_id] = {self.dataset._filepath.name: self.dataset.load()}
             else:
                 self.runs[run_id] = self.dataset.load()
@@ -111,3 +114,9 @@ def load_tracking_data(self, run_id: str):
 
 def get_dataset_type(dataset: AbstractVersionedDataSet) -> str:
-def get_dataset_type(dataset: AbstractVersionedDataSet) -> str:
+def get_full_dataset_type(dataset: AbstractVersionedDataSet) -> str:
+    """e.g. kedro.extras.datasets.plotly.plotly_dataset.PlotlyDataSet or kedro_datasets.plotly.plotly_dataset.PlotlyDataSet"""
-def get_dataset_type(dataset: AbstractVersionedDataSet) -> str:
+def get_full_dataset_type(dataset: AbstractVersionedDataSet) -> str:
+    """e.g. kedro.extras.datasets.plotly.plotly_dataset.PlotlyDataSet or kedro_datasets.plotly.plotly_dataset.PlotlyDataSet"""
     return f"{dataset.__class__.__module__}.{dataset.__class__.__qualname__}"
+
+
+def get_dataset_module_class(dataset: AbstractVersionedDataSet) -> str:
+    class_name = f"{dataset.__class__.__qualname__}"
+    _, dataset_type, dataset_file = f"{dataset.__class__.__module__}".rsplit(".", 2)
+    return f"{dataset_type}.{dataset_file}.{class_name}"
-def get_dataset_module_class(dataset: AbstractVersionedDataSet) -> str:
-    class_name = f"{dataset.__class__.__qualname__}"
-    _, dataset_type, dataset_file = f"{dataset.__class__.__module__}".rsplit(".", 2)
-    return f"{dataset_type}.{dataset_file}.{class_name}"
+def get_abbreviated_dataset_type(dataset: AbstractVersionedDataSet) -> str:
+    """e.g. plotly.plotly_dataset.PlotlyDataSet"""
+    abbreviated_module_name = ".".join(dataset.__class__.__module__.split(".")[-2:])
+    return f"{abbreviated_module_name}.{dataset.__class__.__qualname__}"
-def get_dataset_module_class(dataset: AbstractVersionedDataSet) -> str:
-    class_name = f"{dataset.__class__.__qualname__}"
-    _, dataset_type, dataset_file = f"{dataset.__class__.__module__}".rsplit(".", 2)
-    return f"{dataset_type}.{dataset_file}.{class_name}"
+def get_abbreviated_dataset_type(dataset: AbstractVersionedDataSet) -> str:
+    """e.g. plotly.plotly_dataset.PlotlyDataSet"""
+    abbreviated_module_name = ".".join(dataset.__class__.__module__.split(".")[-2:])
+    return f"{abbreviated_module_name}.{dataset.__class__.__qualname__}"
@@ -458,6 +458,10 @@ def __post_init__(self):
             else None
         )
 
+        self.dataset_module_class = (
+            self.get_dataset_module_class(self.kedro_obj) if self.kedro_obj else None
+        )
+
         # the modular pipelines that a data node belongs to
         # are derived from its namespace, which in turn
         # is derived from the dataset's name.
@@ -466,37 +470,39 @@ def __post_init__(self):
             self._get_namespace(self.full_name)
         )
 
+    @staticmethod
+    def get_dataset_module_class(kedro_object) -> str:
+        """Get dataset class and the two last parts of the module part."""
+        class_name = f"{kedro_object.__class__.__qualname__}"
+        _, dataset_type, dataset_file = f"{kedro_object.__class__.__module__}".rsplit(
+            ".", 2
+        )
+        return f"{dataset_type}.{dataset_file}.{class_name}"
+
     # TODO: improve this scheme.
     def is_plot_node(self):
         """Check if the current node is a plot node.
         Currently it only recognises one underlying dataset as a plot node.
         In the future, we might want to make this generic.
         """
-        return self.dataset_type in (
-            "kedro.extras.datasets.plotly.plotly_dataset.PlotlyDataSet",
-            "kedro.extras.datasets.plotly.json_dataset.JSONDataSet",
+        return self.dataset_module_class in (
+            "plotly.plotly_dataset.PlotlyDataSet",
+            "plotly.json_dataset.JSONDataSet",
         )
 
     def is_image_node(self):
         """Check if the current node is a matplotlib image node."""
         return (
-            self.dataset_type
-            == "kedro.extras.datasets.matplotlib.matplotlib_writer.MatplotlibWriter"
+            self.dataset_module_class == "matplotlib.matplotlib_writer.MatplotlibWriter"
         )
 
     def is_metric_node(self):
         """Check if the current node is a metrics node."""
-        return (
-            self.dataset_type
-            == "kedro.extras.datasets.tracking.metrics_dataset.MetricsDataSet"
-        )
+        return self.dataset_module_class == "tracking.metrics_dataset.MetricsDataSet"
 
     def is_json_node(self):
         """Check if the current node is a JSONDataSet node."""
-        return (
-            self.dataset_type
-            == "kedro.extras.datasets.tracking.json_dataset.JSONDataSet"
-        )
+        return self.dataset_module_class == "tracking.json_dataset.JSONDataSet"
 
     def is_tracking_node(self):
         """Checks if the current node is a tracking data node"""