slidoapp · mrshu · Nov 15, 2022 · Oct 22, 2022 · Oct 22, 2022 · Oct 22, 2022
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ## [Unreleased]
+### Added
+- Support for model selection by path.
 
 ## [0.2.3] - 2022-10-14
 ### Added

diff --git a/README.md b/README.md
@@ -98,6 +98,34 @@ jaffle_shop.stg_payments                               2/4      50.0%
 Total                                                 14/38     36.8%
 ```
 
+You can also choose a subset of tables to compare using one or multiple `--model-path-filter` options.
+
+```console
+$ cd jaffle_shop
+$ dbt run  # Materialize models
+$ dbt docs generate  # Generate catalog.json and manifest.json
+$ dbt-coverage compute doc --cov-report coverage-doc.json --model-path-filter staging/  # Compute doc coverage for a subset of tables, print it and write it to coverage-doc.json file
+
+Coverage report
+======================================================
+jaffle_shop.stg_customers              0/3       0.0%
+jaffle_shop.stg_orders                 0/4       0.0%
+jaffle_shop.stg_payments               0/4       0.0%
+======================================================
+Total                                  0/20      0.0%
+
+$ dbt-coverage compute doc --cov-report coverage-doc.json --model-path-filter models/orders.sql --model-path-filter staging/  # Compute doc coverage for a subset of tables, print it and write it to coverage-doc.json file
+
+Coverage report
+======================================================
+jaffle_shop.orders                     0/9       0.0%
+jaffle_shop.stg_customers              0/3       0.0%
+jaffle_shop.stg_orders                 0/4       0.0%
+jaffle_shop.stg_payments               0/4       0.0%
+======================================================
+Total                                  0/20      0.0%
+```
+
 ### Compare
 
 Compare two `coverage.json` files generated by the `compute` command. This is
@@ -154,7 +182,7 @@ the support matrix.
 |-------------|----------------|
 | <0.20       | not tested     |
 | 0.20 - 0.21 | 0.1            |
-| 1.0 - 1.3   | 0.2            |
+| 1.0 - 1.3   | 0.2, 0.3       |
 
 ## Related packages
 

diff --git a/dbt_coverage/__init__.py b/dbt_coverage/__init__.py
@@ -5,7 +5,7 @@
 import logging
 from dataclasses import dataclass, field, replace
 from enum import Enum
-from pathlib import Path
+from pathlib import Path, PurePath
 from typing import Dict, Set, List, Optional
 
 import typer
@@ -55,16 +55,38 @@ class Table:
     """Dataclass containing the information about a database table and its columns."""
 
     name: str
+    unique_id: str
+    original_file_path: None
     columns: Dict[str, Column]
 
     @staticmethod
     def from_node(node) -> Table:
         columns = [Column.from_node(col) for col in node['columns'].values()]
         return Table(
             f"{node['metadata']['schema']}.{node['metadata']['name']}".lower(),
-            {col.name: col for col in columns}
+            node['unique_id'], None, {col.name: col for col in columns}
         )
 
+    def update_original_file_path(self, manifest: Manifest) -> None:
+        """
+        Update Table's ``original_file_path`` attribute by retrieving this information from a
+        Manifest.
+
+        :param manifest: the Manifest used which contains the ``original_file_path`` for a Table
+        :returns: None
+        """
+        old_original_file_path_value = self.original_file_path
+
+        manifest_attributes = vars(manifest)
+        for attribute_type_name, attribute_type_dict in manifest_attributes.items():
+            for attribute_instance_name, attribute_instance in attribute_type_dict.items():
+                if self.unique_id in attribute_instance.values():
+                    self.original_file_path = attribute_instance['original_file_path']
+
+        if self.original_file_path is None or self.original_file_path == \
+                old_original_file_path_value:
+            logging.info(f"original_file_path value not found in manifest for {self.unique_id}")
+
     def get_column(self, column_name):
         return self.columns.get(column_name)
 
@@ -75,6 +97,37 @@ class Catalog:
 
     tables: Dict[str, Table]
 
+    def filter_catalog(self, model_path_filter: List[str]) -> Catalog:
+        """
+        Filter ``Catalog``'s ``tables`` attribute to ``Tables`` that have a ``model_path_filter``
+        value as part of their ``original_file_path``.
+
+        :param model_path_filter: the model_path string(s) to filter tables on, (matches using
+        the ``in`` operator)
+
+        :returns: Catalog
+        :raises ValueError: if no ``Table`` in the ``tables`` Catalog attribute have an
+        ``original_file_path`` that contains any ``model_path_filter`` value
+        """
+        filtered_tables = {}
+
+        original_tables_dict = {key: val for key, val in self.tables.items()}
+        for key, table in original_tables_dict.items():
+            for path in model_path_filter:
+                if path in table.original_file_path:
+                    filtered_tables[key] = table
+                    break
+
+        if len(filtered_tables) < 1:
+            logging.error("len(filtered_tables) < 1", exc_info=True)
+            raise ValueError(
+                "After filtering the Catalog contains no tables. Ensure your model_path_filter "
+                "is correct")
+        else:
+            logging.info("Successfully filtered tables. Total tables: %d tables", len(self.tables))
+
+            return Catalog(tables=filtered_tables)
+
     @staticmethod
     def from_nodes(nodes):
         tables = [Table.from_node(table) for table in nodes]
@@ -86,10 +139,10 @@ def get_table(self, table_name):
 
 @dataclass
 class Manifest:
-    sources: Dict[str, Dict[str, Dict]]
-    models: Dict[str, Dict[str, Dict]]
-    seeds: Dict[str, Dict[str, Dict]]
-    snapshots: Dict[str, Dict[str, Dict]]
+    sources: Dict[str, Dict[str, Dict[str, Dict]]]
+    models: Dict[str, Dict[str, Dict[str, Dict]]]
+    seeds: Dict[str, Dict[str, Dict[str, Dict]]]
+    snapshots: Dict[str, Dict[str, Dict[str, Dict]]]
     tests: Dict[str, Dict[str, List[Dict]]]
 
     @classmethod
@@ -98,21 +151,30 @@ def from_nodes(cls, manifest_nodes: Dict[str, Dict]) -> Manifest:
 
         sources = [table for table in manifest_nodes.values()
                    if table['resource_type'] == 'source']
-        sources = {cls._full_table_name(table): cls._normalize_column_names(table['columns'])
-                   for table in sources}
-
-        models = [table for table in manifest_nodes.values() if table['resource_type'] == 'model']
-        models = {cls._full_table_name(table): cls._normalize_column_names(table['columns'])
-                  for table in models}
+        sources = {cls._full_table_name(table): {
+            'columns': cls._normalize_column_names(table['columns']),
+            'original_file_path': cls._normalize_path(table['original_file_path']),
+            'unique_id': table['unique_id']} for table in sources}
+
+        models = [table for table in manifest_nodes.values()
+                  if table['resource_type'] == 'model']
+        models = {cls._full_table_name(table): {
+            'columns': cls._normalize_column_names(table['columns']),
+            'original_file_path': cls._normalize_path(table['original_file_path']),
+            'unique_id': table['unique_id']} for table in models}
 
         seeds = [table for table in manifest_nodes.values() if table['resource_type'] == 'seed']
-        seeds = {cls._full_table_name(table): cls._normalize_column_names(table['columns'])
-                 for table in seeds}
+        seeds = {cls._full_table_name(table): {
+            'columns': cls._normalize_column_names(table['columns']),
+            'original_file_path': cls._normalize_path(table['original_file_path']),
+            'unique_id': table['unique_id']} for table in seeds}
 
         snapshots = [table for table in manifest_nodes.values()
                      if table['resource_type'] == 'snapshot']
-        snapshots = {cls._full_table_name(table): cls._normalize_column_names(table['columns'])
-                     for table in snapshots}
+        snapshots = {cls._full_table_name(table): {
+            'columns': cls._normalize_column_names(table['columns']),
+            'original_file_path': cls._normalize_path(table['original_file_path']),
+            'unique_id': table['unique_id']} for table in snapshots}
 
         tests = cls._parse_tests(manifest_nodes)
 
@@ -168,6 +230,10 @@ def _normalize_column_names(columns):
             col['name'] = col['name'].lower()
         return {col['name']: col for col in columns.values()}
 
+    @staticmethod
+    def _normalize_path(path: str) -> str:
+        return str(PurePath(path).as_posix())
+
 
 @dataclass
 class CoverageReport:
@@ -536,6 +602,7 @@ def load_files(project_dir: Path) -> Catalog:
 
     for table_name in catalog.tables:
         catalog_table = catalog.get_table(table_name)
+        catalog_table.update_original_file_path(manifest)
         manifest_source_table = manifest.sources.get(table_name, {})
         manifest_model_table = manifest.models.get(table_name, {})
         manifest_seed_table = manifest.seeds.get(table_name, {})
@@ -607,14 +674,18 @@ def fail_compare(coverage_report: CoverageReport, compare_path: Path):
 
 def do_compute(project_dir: Path = Path('.'), cov_report: Path = Path('coverage.json'),
                cov_type: CoverageType = CoverageType.DOC, cov_fail_under: float = None,
-               cov_fail_compare: Path = None):
+               cov_fail_compare: Path = None, model_path_filter: Optional[List[str]] = None):
     """
     Computes coverage for a dbt project.
 
     Use this method in your Python code to bypass typer.
     """
 
     catalog = load_files(project_dir)
+
+    if len(model_path_filter) >= 1:
+        catalog = catalog.filter_catalog(model_path_filter)
+
     coverage_report = compute_coverage(catalog, cov_type)
 
     print(coverage_report.to_formatted_string())
@@ -655,10 +726,15 @@ def compute(project_dir: Path = typer.Option('.', help="dbt project directory pa
                                                              "with and fail if current coverage "
                                                              "is lower. Normally used to prevent "
                                                              "coverage drop between subsequent "
-                                                             "tests.")):
+                                                             "tests."),
+            model_path_filter: Optional[List[str]] = typer.Option(None, help="The model_path "
+                                                                             "string(s) to "
+                                                                             "filter tables "
+                                                                             "on.")):
     """Compute coverage for project in PROJECT_DIR from catalog.json and manifest.json."""
 
-    return do_compute(project_dir, cov_report, cov_type, cov_fail_under, cov_fail_compare)
+    return do_compute(project_dir, cov_report, cov_type, cov_fail_under, cov_fail_compare,
+                      model_path_filter)
 
 
 @app.command()