Merge branch 'develop' of github.com:centerofci/mathesar into i18n-lo…

…ad-en-translations-parallely
mathesar-foundation · Sep 6, 2023 · 855da7b · 855da7b
2 parents b0ab693 + 2726d9b
commit 855da7b
Show file tree

Hide file tree

Showing 253 changed files with 8,182 additions and 4,125 deletions.
diff --git a/.github/workflows/run-pytest.yml b/.github/workflows/run-pytest.yml
@@ -32,17 +32,5 @@ jobs:
       - name: Build the stack
         run: docker compose -f docker-compose.yml -f docker-compose.dev.yml up --build -d test-service
 
-      - name: Create coverage directory
-        run: docker exec mathesar_service_test mkdir coverage_report
-
       - name: Run tests with pytest
-        run: docker exec mathesar_service_test pytest --exitfirst --ignore=mathesar/tests/integration/ --cov-report=xml:coverage_report/coverage.xml
-
-      - name: Save the coverage report
-        uses: codecov/codecov-action@v2
-        with:
-          token: ${{ secrets.CODECOV_TOKEN }}
-          directory: ./coverage_report
-          flags: pytest-backend
-          name: codecov-mathesar
-          verbose: true
+        run: docker exec mathesar_service_test ./run_pytest.sh
diff --git a/.gitignore b/.gitignore
@@ -193,3 +193,6 @@ test-results/
 
 # core dumps generated sometimes when something goes wrong with Playwright
 core
+
+# non tracked settings
+config/settings/local.py
diff --git a/Dockerfile.devdb b/Dockerfile.devdb
@@ -1,4 +1,4 @@
 FROM postgres:13
 
 RUN apt update
-RUN apt install -y postgresql-13-pgtap && rm -rf /var/lib/apt/lists/*
+RUN apt install -y postgresql-13-pgtap postgresql-13-pldebugger && rm -rf /var/lib/apt/lists/*
diff --git a/config/settings/common_settings.py b/config/settings/common_settings.py
@@ -192,6 +192,7 @@ def pipe_delim(pipe_string):
     'DESCRIPTION': '',
     'VERSION': '1.0.0',
     'SERVE_INCLUDE_SCHEMA': False,
+    'PREPROCESSING_HOOKS': ['config.settings.openapi.custom_preprocessing_hook'],
     'POSTPROCESSING_HOOKS': [
         'config.settings.openapi.remove_url_prefix_hook',
     ],

diff --git a/config/settings/development.py b/config/settings/development.py
@@ -1,3 +1,10 @@
 from config.settings.common_settings import * # noqa
 
 # Override default settings
+
+
+# Use a local.py module for settings that shouldn't be version tracked
+try:
+    from .local import * # noqa
+except ImportError:
+    pass
diff --git a/config/settings/openapi.py b/config/settings/openapi.py
@@ -1,3 +1,12 @@
+def custom_preprocessing_hook(endpoints):
+    filtered = []
+    for (path, path_regex, method, callback) in endpoints:
+        # Remove all but DRF API endpoints
+        if path.startswith("/api/db/v0/databases/") or path.startswith("/api/db/v0/data_files/") or path.startswith("/api/db/v0/schemas/"):
+            filtered.append((path, path_regex, method, callback))
+    return filtered
+
+
 def remove_url_prefix_hook(result, **kwargs):
     # Remove namespace and version URL prefix from the operation Id of the generated API schema
     for path, path_info in result['paths'].items():

diff --git a/config/settings/production.py b/config/settings/production.py
@@ -1,3 +1,10 @@
 from config.settings.common_settings import * # noqa
 
 # Override default settings
+
+
+# Use a local.py module for settings that shouldn't be version tracked
+try:
+    from .local import * # noqa 
+except ImportError:
+    pass
diff --git a/conftest.py b/conftest.py
@@ -54,7 +54,7 @@ def create_db(request, SES_engine_cache):
     A factory for Postgres mathesar-installed databases. A fixture made of this method tears down
     created dbs when leaving scope.
 
-    This method is used to create two fixtures with different scopes, that's why it's not a fixture
+    This method is used to create fixtures with different scopes, that's why it's not a fixture
     itself.
     """
     engine_cache = SES_engine_cache
@@ -261,6 +261,8 @@ def _get_connection_string(username, password, hostname, database):
 LIBRARY_SQL = os.path.join(RESOURCES, "library_without_checkouts.sql")
 LIBRARY_CHECKOUTS_SQL = os.path.join(RESOURCES, "library_add_checkouts.sql")
 FRAUDULENT_PAYMENTS_SQL = os.path.join(RESOURCES, "fraudulent_payments.sql")
+PLAYER_PROFILES_SQL = os.path.join(RESOURCES, "player_profiles.sql")
+MARATHON_ATHLETES_SQL = os.path.join(RESOURCES, "marathon_athletes.sql")
 
 
 @pytest.fixture
@@ -349,3 +351,37 @@ def payments_db_table(engine_with_fraudulent_payment):
     metadata = MetaData(bind=engine)
     table = Table("Payments", metadata, schema=schema, autoload_with=engine)
     return table
+
+
+@pytest.fixture
+def engine_with_player_profiles(engine_with_schema):
+    engine, schema = engine_with_schema
+    with engine.begin() as conn, open(PLAYER_PROFILES_SQL) as f:
+        conn.execute(text(f"SET search_path={schema}"))
+        conn.execute(text(f.read()))
+    yield engine, schema
+
+
+@pytest.fixture
+def players_db_table(engine_with_player_profiles):
+    engine, schema = engine_with_player_profiles
+    metadata = MetaData(bind=engine)
+    table = Table("Players", metadata, schema=schema, autoload_with=engine)
+    return table
+
+
+@pytest.fixture
+def engine_with_marathon_athletes(engine_with_schema):
+    engine, schema = engine_with_schema
+    with engine.begin() as conn, open(MARATHON_ATHLETES_SQL) as f:
+        conn.execute(text(f"SET search_path={schema}"))
+        conn.execute(text(f.read()))
+    yield engine, schema
+
+
+@pytest.fixture
+def athletes_db_table(engine_with_marathon_athletes):
+    engine, schema = engine_with_marathon_athletes
+    metadata = MetaData(bind=engine)
+    table = Table("Marathon", metadata, schema=schema, autoload_with=engine)
+    return table
diff --git a/db/columns/operations/select.py b/db/columns/operations/select.py
@@ -1,21 +1,12 @@
 import warnings
 
-from pglast import Node, parse_sql
-from sqlalchemy import and_, asc, cast, select, text, exists
+from sqlalchemy import and_, asc, cast, select, text, exists, Identity
 
 from db.columns.exceptions import DynamicDefaultWarning
+from db.connection import execute_msar_func_with_engine
 from db.tables.operations.select import reflect_table_from_oid
 from db.utils import execute_statement, get_pg_catalog_table
 
-# These tags define which nodes in the AST built by pglast we consider to be
-# "dynamic" when found in a column default clause.  The nodes are best
-# documented by C header files that define the underlying structs:
-# https://github.com/pganalyze/libpg_query/blob/13-latest/src/postgres/include/nodes/parsenodes.h
-# https://github.com/pganalyze/libpg_query/blob/13-latest/src/postgres/include/nodes/primnodes.h
-# It's possible that more dynamic nodes will be found.  Their tags should be
-# added to this set.
-DYNAMIC_NODE_TAGS = {"SQLValueFunction", "FuncCall"}
-
 
 def get_column_attnum_from_names_as_map(table_oid, column_names, engine, metadata, connection_to_use=None):
     statement = _get_columns_attnum_from_names(table_oid, column_names, engine, metadata=metadata)
@@ -127,11 +118,16 @@ def get_column_default_dict(table_oid, attnum, engine, metadata, connection_to_u
         metadata=metadata,
         connection_to_use=connection_to_use,
     )
-    if column.server_default is None:
+    default = column.server_default
+
+    if default is None:
         return
 
-    is_dynamic = _is_default_expr_dynamic(column.server_default)
-    sql_text = str(column.server_default.arg)
+    is_dynamic = execute_msar_func_with_engine(
+        engine, 'is_default_possibly_dynamic', table_oid, attnum
+    ).fetchone()[0]
+
+    sql_text = str(default.arg) if not isinstance(default, Identity) else 'identity'
 
     if is_dynamic:
         warnings.warn(
@@ -203,12 +199,3 @@ def _statement_for_triples_of_column_name_and_attnum_and_table_oid(
         conditions.append(attnum_positive)
     sel = sel.where(and_(*conditions))
     return sel
-
-
-def _is_default_expr_dynamic(server_default):
-    prepared_expr = f"""SELECT {server_default.arg.text};"""
-    expr_ast_root = Node(parse_sql(prepared_expr))
-    ast_nodes = {
-        n.node_tag for n in expr_ast_root.traverse() if isinstance(n, Node)
-    }
-    return not ast_nodes.isdisjoint(DYNAMIC_NODE_TAGS)
diff --git a/db/links/operations/create.py b/db/links/operations/create.py
@@ -1,13 +1,5 @@
-from sqlalchemy import ForeignKey, MetaData
 from db.connection import execute_msar_func_with_engine
 
-from db.columns.base import MathesarColumn
-from db.constraints.utils import naming_convention
-from db.tables.operations.create import create_mathesar_table
-from db.tables.operations.select import reflect_tables_from_oids
-from db.tables.utils import get_primary_key_column
-from db.metadata import get_empty_metadata
-
 
 def create_foreign_key_link(
         engine,
@@ -41,24 +33,27 @@ def create_foreign_key_link(
     ).fetchone()[0]
 
 
-def create_many_to_many_link(engine, schema, map_table_name, referents):
-    with engine.begin() as conn:
-        referent_tables_oid = [referent['referent_table'] for referent in referents]
-        referent_tables = reflect_tables_from_oids(
-            referent_tables_oid, engine, connection_to_use=conn, metadata=get_empty_metadata()
-        )
-        metadata = MetaData(bind=engine, schema=schema, naming_convention=naming_convention)
-        # Throws sqlalchemy.exc.NoReferencedTableError if metadata is not reflected.
-        metadata.reflect()
-        referrer_columns = []
-        for referent in referents:
-            referent_table_oid = referent['referent_table']
-            referent_table = referent_tables[referent_table_oid]
-            col_name = referent['column_name']
-            primary_key_column = get_primary_key_column(referent_table)
-            foreign_keys = {ForeignKey(primary_key_column)}
-            column = MathesarColumn(
-                col_name, primary_key_column.type, foreign_keys=foreign_keys,
-            )
-            referrer_columns.append(column)
-        create_mathesar_table(map_table_name, schema, referrer_columns, engine, metadata)
+def create_many_to_many_link(engine, schema_oid, map_table_name, referents_dict):
+    """
+    Creates a Many-to-Many link.
+
+    Args:
+        engine: SQLAlchemy engine object for connecting.
+        schema_oid: The OID of the schema in
+                    which new referrer table is to be created.
+        map_table_name: Name of the referrer table to be created.
+        referents_dict: A python dict that contain 2 keys
+                        'referent_table_oids' & 'column_names' with values as
+                        ordered lists of table_oids & col_names respectively
+
+    Returns:
+        Returns the OID of the newly created table.
+    """
+    return execute_msar_func_with_engine(
+        engine,
+        'create_many_to_many_link',
+        schema_oid,
+        map_table_name,
+        referents_dict['referent_table_oids'],
+        referents_dict['column_names']
+    ).fetchone()[0]
diff --git a/db/records/operations/group.py b/db/records/operations/group.py
@@ -49,9 +49,9 @@ def __init__(
             prefix_length=None,
             extract_field=None,
     ):
-        self._columns = tuple(columns) if type(columns) != str else tuple([columns])
+        self._columns = tuple(columns) if type(columns) is not str else tuple([columns])
         self._mode = mode
-        if type(preproc) == str:
+        if type(preproc) is str:
             self._preproc = tuple([preproc])
         elif preproc is not None:
             self._preproc = tuple(preproc)
@@ -134,7 +134,7 @@ def validate(self):
 
         elif (
                 self.mode == GroupMode.PERCENTILE.value
-                and not type(self.num_groups) == int
+                and not type(self.num_groups) is int
         ):
             raise records_exceptions.BadGroupFormat(
                 f'{GroupMode.PERCENTILE.value} mode requires integer num_groups'
@@ -182,7 +182,7 @@ def validate(self):
             )
 
         for col in self.columns:
-            if type(col) != str:
+            if type(col) is not str:
                 raise records_exceptions.BadGroupFormat(
                     f"Group column {col} must be a string."
                 )

diff --git a/db/records/operations/insert.py b/db/records/operations/insert.py
@@ -7,6 +7,7 @@
 from psycopg2.errors import NotNullViolation, ForeignKeyViolation, DatatypeMismatch, UniqueViolation, ExclusionViolation
 from db.columns.exceptions import NotNullError, ForeignKeyError, TypeMismatchError, UniqueValueError, ExclusionError
 from db.columns.base import MathesarColumn
+from db.constants import ID, ID_ORIGINAL
 from db.encoding_utils import get_sql_compatible_encoding
 from db.records.operations.select import get_record
 from sqlalchemy import select
@@ -33,6 +34,18 @@ def insert_record_or_records(table, engine, record_data):
     return None
 
 
+def get_records_from_dataframe(df):
+    """
+    We convert the dataframe to JSON using to_json() method and then to a Python object.
+    This method replaces 'NaN' values in the dataframe with 'None' values in Python
+    object. The reason behind not using df.to_dict() method is beacuse it stringifies
+    'NaN' values rather than converting them to a 'None' value.
+    We pass 'records' as the orientation parameter because we want each record to contain
+    data of a single row and not of a single column (which is the default behaviour).
+    """
+    return json.loads(df.to_json(orient='records'))
+
+
 def insert_records_from_json(table, engine, json_filepath, column_names, max_level):
     """
     Normalizes JSON data and inserts it into a table.
@@ -49,10 +62,7 @@ def insert_records_from_json(table, engine, json_filepath, column_names, max_lev
         2.  We normalize data into a pandas dataframe using pandas.json_normalize() method.
             The method takes column names as meta. We provide all possible keys as column
             names, hence it adds missing keys to JSON objects and marks their values as NaN.
-        3.  We convert the dataframe to JSON using to_json() method and then to a Python object.
-            This method replaces 'NaN' values in the dataframe with 'None' values in Python
-            object. The reason behind not using df.to_dict() method is beacuse it stringifies
-            'NaN' values rather than converting them to a 'None' value.
+        3.  We get records from the dataframe using the method get_records_from_dataframe().
         4.  The processed data is now a list of dict objects. Each dict has same keys, that are
             the column names of the table. We loop through each dict object, and if any value is
             a dict or a list, we stringify them before inserting them into the table. This way,
@@ -77,16 +87,23 @@ def insert_records_from_json(table, engine, json_filepath, column_names, max_lev
         our table and not just the keys from the first JSON object.
     """
     df = pandas.json_normalize(data, max_level=max_level, meta=column_names)
-    data = json.loads(df.to_json(orient='records'))
+    records = get_records_from_dataframe(df)
 
-    for i, row in enumerate(data):
-        data[i] = {
+    for i, row in enumerate(records):
+        if ID in row and ID_ORIGINAL in column_names:
+            row[ID_ORIGINAL] = row.pop("id")
+        records[i] = {
             k: json.dumps(v)
             if (isinstance(v, dict) or isinstance(v, list))
             else v
             for k, v in row.items()
         }
-    insert_record_or_records(table, engine, data)
+    insert_record_or_records(table, engine, records)
+
+
+def insert_records_from_excel(table, engine, dataframe):
+    records = get_records_from_dataframe(dataframe)
+    insert_record_or_records(table, engine, records)
 
 
 def insert_records_from_csv(table, engine, csv_filepath, column_names, header, delimiter=None, escape=None, quote=None, encoding=None):
@@ -159,22 +176,22 @@ def insert_from_select(from_table, target_table, engine, col_mappings=None):
         try:
             result = conn.execute(ins)
         except IntegrityError as e:
-            if type(e.orig) == NotNullViolation:
+            if type(e.orig) is NotNullViolation:
                 raise NotNullError
-            elif type(e.orig) == ForeignKeyViolation:
+            elif type(e.orig) is ForeignKeyViolation:
                 raise ForeignKeyError
-            elif type(e.orig) == UniqueViolation:
+            elif type(e.orig) is UniqueViolation:
                 # ToDo: Try to differentiate between the types of unique violations
                 # Scenario 1: Adding a duplicate value into a column with uniqueness constraint in the target table.
                 # Scenario 2: Adding a non existing value twice in a column with uniqueness constraint in the target table.
                 # Both the scenarios currently result in the same exception being thrown.
                 raise UniqueValueError
-            elif type(e.orig) == ExclusionViolation:
+            elif type(e.orig) is ExclusionViolation:
                 raise ExclusionError
             else:
                 raise e
         except ProgrammingError as e:
-            if type(e.orig) == DatatypeMismatch:
+            if type(e.orig) is DatatypeMismatch:
                 raise TypeMismatchError
             else:
                 raise e

diff --git a/db/records/operations/update.py b/db/records/operations/update.py
@@ -13,9 +13,9 @@ def update_record(table, engine, id_value, record_data):
                 table.update().where(primary_key_column == id_value).values(record_data)
             )
         except DataError as e:
-            if type(e.orig) == DatetimeFieldOverflow:
+            if type(e.orig) is DatetimeFieldOverflow:
                 raise InvalidDate
-            elif type(e.orig) == InvalidDatetimeFormat:
+            elif type(e.orig) is InvalidDatetimeFormat:
                 raise InvalidDateFormat
             else:
                 raise e