Skip to content

Commit

Permalink
Merge branch 'develop' of github.com:centerofci/mathesar into i18n-lo…
Browse files Browse the repository at this point in the history
…ad-en-translations-parallely
  • Loading branch information
rajatvijay committed Sep 6, 2023
2 parents b0ab693 + 2726d9b commit 855da7b
Show file tree
Hide file tree
Showing 253 changed files with 8,182 additions and 4,125 deletions.
14 changes: 1 addition & 13 deletions .github/workflows/run-pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,17 +32,5 @@ jobs:
- name: Build the stack
run: docker compose -f docker-compose.yml -f docker-compose.dev.yml up --build -d test-service

- name: Create coverage directory
run: docker exec mathesar_service_test mkdir coverage_report

- name: Run tests with pytest
run: docker exec mathesar_service_test pytest --exitfirst --ignore=mathesar/tests/integration/ --cov-report=xml:coverage_report/coverage.xml

- name: Save the coverage report
uses: codecov/codecov-action@v2
with:
token: ${{ secrets.CODECOV_TOKEN }}
directory: ./coverage_report
flags: pytest-backend
name: codecov-mathesar
verbose: true
run: docker exec mathesar_service_test ./run_pytest.sh
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -193,3 +193,6 @@ test-results/

# core dumps generated sometimes when something goes wrong with Playwright
core

# non tracked settings
config/settings/local.py
2 changes: 1 addition & 1 deletion Dockerfile.devdb
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM postgres:13

RUN apt update
RUN apt install -y postgresql-13-pgtap && rm -rf /var/lib/apt/lists/*
RUN apt install -y postgresql-13-pgtap postgresql-13-pldebugger && rm -rf /var/lib/apt/lists/*
1 change: 1 addition & 0 deletions config/settings/common_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ def pipe_delim(pipe_string):
'DESCRIPTION': '',
'VERSION': '1.0.0',
'SERVE_INCLUDE_SCHEMA': False,
'PREPROCESSING_HOOKS': ['config.settings.openapi.custom_preprocessing_hook'],
'POSTPROCESSING_HOOKS': [
'config.settings.openapi.remove_url_prefix_hook',
],
Expand Down
7 changes: 7 additions & 0 deletions config/settings/development.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
from config.settings.common_settings import * # noqa

# Override default settings


# Use a local.py module for settings that shouldn't be version tracked
try:
from .local import * # noqa
except ImportError:
pass
9 changes: 9 additions & 0 deletions config/settings/openapi.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
def custom_preprocessing_hook(endpoints):
filtered = []
for (path, path_regex, method, callback) in endpoints:
# Remove all but DRF API endpoints
if path.startswith("/api/db/v0/databases/") or path.startswith("/api/db/v0/data_files/") or path.startswith("/api/db/v0/schemas/"):
filtered.append((path, path_regex, method, callback))
return filtered


def remove_url_prefix_hook(result, **kwargs):
# Remove namespace and version URL prefix from the operation Id of the generated API schema
for path, path_info in result['paths'].items():
Expand Down
7 changes: 7 additions & 0 deletions config/settings/production.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
from config.settings.common_settings import * # noqa

# Override default settings


# Use a local.py module for settings that shouldn't be version tracked
try:
from .local import * # noqa
except ImportError:
pass
38 changes: 37 additions & 1 deletion conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def create_db(request, SES_engine_cache):
A factory for Postgres mathesar-installed databases. A fixture made of this method tears down
created dbs when leaving scope.
This method is used to create two fixtures with different scopes, that's why it's not a fixture
This method is used to create fixtures with different scopes, that's why it's not a fixture
itself.
"""
engine_cache = SES_engine_cache
Expand Down Expand Up @@ -261,6 +261,8 @@ def _get_connection_string(username, password, hostname, database):
LIBRARY_SQL = os.path.join(RESOURCES, "library_without_checkouts.sql")
LIBRARY_CHECKOUTS_SQL = os.path.join(RESOURCES, "library_add_checkouts.sql")
FRAUDULENT_PAYMENTS_SQL = os.path.join(RESOURCES, "fraudulent_payments.sql")
PLAYER_PROFILES_SQL = os.path.join(RESOURCES, "player_profiles.sql")
MARATHON_ATHLETES_SQL = os.path.join(RESOURCES, "marathon_athletes.sql")


@pytest.fixture
Expand Down Expand Up @@ -349,3 +351,37 @@ def payments_db_table(engine_with_fraudulent_payment):
metadata = MetaData(bind=engine)
table = Table("Payments", metadata, schema=schema, autoload_with=engine)
return table


@pytest.fixture
def engine_with_player_profiles(engine_with_schema):
engine, schema = engine_with_schema
with engine.begin() as conn, open(PLAYER_PROFILES_SQL) as f:
conn.execute(text(f"SET search_path={schema}"))
conn.execute(text(f.read()))
yield engine, schema


@pytest.fixture
def players_db_table(engine_with_player_profiles):
engine, schema = engine_with_player_profiles
metadata = MetaData(bind=engine)
table = Table("Players", metadata, schema=schema, autoload_with=engine)
return table


@pytest.fixture
def engine_with_marathon_athletes(engine_with_schema):
engine, schema = engine_with_schema
with engine.begin() as conn, open(MARATHON_ATHLETES_SQL) as f:
conn.execute(text(f"SET search_path={schema}"))
conn.execute(text(f.read()))
yield engine, schema


@pytest.fixture
def athletes_db_table(engine_with_marathon_athletes):
engine, schema = engine_with_marathon_athletes
metadata = MetaData(bind=engine)
table = Table("Marathon", metadata, schema=schema, autoload_with=engine)
return table
33 changes: 10 additions & 23 deletions db/columns/operations/select.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,12 @@
import warnings

from pglast import Node, parse_sql
from sqlalchemy import and_, asc, cast, select, text, exists
from sqlalchemy import and_, asc, cast, select, text, exists, Identity

from db.columns.exceptions import DynamicDefaultWarning
from db.connection import execute_msar_func_with_engine
from db.tables.operations.select import reflect_table_from_oid
from db.utils import execute_statement, get_pg_catalog_table

# These tags define which nodes in the AST built by pglast we consider to be
# "dynamic" when found in a column default clause. The nodes are best
# documented by C header files that define the underlying structs:
# https://github.com/pganalyze/libpg_query/blob/13-latest/src/postgres/include/nodes/parsenodes.h
# https://github.com/pganalyze/libpg_query/blob/13-latest/src/postgres/include/nodes/primnodes.h
# It's possible that more dynamic nodes will be found. Their tags should be
# added to this set.
DYNAMIC_NODE_TAGS = {"SQLValueFunction", "FuncCall"}


def get_column_attnum_from_names_as_map(table_oid, column_names, engine, metadata, connection_to_use=None):
statement = _get_columns_attnum_from_names(table_oid, column_names, engine, metadata=metadata)
Expand Down Expand Up @@ -127,11 +118,16 @@ def get_column_default_dict(table_oid, attnum, engine, metadata, connection_to_u
metadata=metadata,
connection_to_use=connection_to_use,
)
if column.server_default is None:
default = column.server_default

if default is None:
return

is_dynamic = _is_default_expr_dynamic(column.server_default)
sql_text = str(column.server_default.arg)
is_dynamic = execute_msar_func_with_engine(
engine, 'is_default_possibly_dynamic', table_oid, attnum
).fetchone()[0]

sql_text = str(default.arg) if not isinstance(default, Identity) else 'identity'

if is_dynamic:
warnings.warn(
Expand Down Expand Up @@ -203,12 +199,3 @@ def _statement_for_triples_of_column_name_and_attnum_and_table_oid(
conditions.append(attnum_positive)
sel = sel.where(and_(*conditions))
return sel


def _is_default_expr_dynamic(server_default):
prepared_expr = f"""SELECT {server_default.arg.text};"""
expr_ast_root = Node(parse_sql(prepared_expr))
ast_nodes = {
n.node_tag for n in expr_ast_root.traverse() if isinstance(n, Node)
}
return not ast_nodes.isdisjoint(DYNAMIC_NODE_TAGS)
53 changes: 24 additions & 29 deletions db/links/operations/create.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,5 @@
from sqlalchemy import ForeignKey, MetaData
from db.connection import execute_msar_func_with_engine

from db.columns.base import MathesarColumn
from db.constraints.utils import naming_convention
from db.tables.operations.create import create_mathesar_table
from db.tables.operations.select import reflect_tables_from_oids
from db.tables.utils import get_primary_key_column
from db.metadata import get_empty_metadata


def create_foreign_key_link(
engine,
Expand Down Expand Up @@ -41,24 +33,27 @@ def create_foreign_key_link(
).fetchone()[0]


def create_many_to_many_link(engine, schema, map_table_name, referents):
with engine.begin() as conn:
referent_tables_oid = [referent['referent_table'] for referent in referents]
referent_tables = reflect_tables_from_oids(
referent_tables_oid, engine, connection_to_use=conn, metadata=get_empty_metadata()
)
metadata = MetaData(bind=engine, schema=schema, naming_convention=naming_convention)
# Throws sqlalchemy.exc.NoReferencedTableError if metadata is not reflected.
metadata.reflect()
referrer_columns = []
for referent in referents:
referent_table_oid = referent['referent_table']
referent_table = referent_tables[referent_table_oid]
col_name = referent['column_name']
primary_key_column = get_primary_key_column(referent_table)
foreign_keys = {ForeignKey(primary_key_column)}
column = MathesarColumn(
col_name, primary_key_column.type, foreign_keys=foreign_keys,
)
referrer_columns.append(column)
create_mathesar_table(map_table_name, schema, referrer_columns, engine, metadata)
def create_many_to_many_link(engine, schema_oid, map_table_name, referents_dict):
"""
Creates a Many-to-Many link.
Args:
engine: SQLAlchemy engine object for connecting.
schema_oid: The OID of the schema in
which new referrer table is to be created.
map_table_name: Name of the referrer table to be created.
referents_dict: A python dict that contain 2 keys
'referent_table_oids' & 'column_names' with values as
ordered lists of table_oids & col_names respectively
Returns:
Returns the OID of the newly created table.
"""
return execute_msar_func_with_engine(
engine,
'create_many_to_many_link',
schema_oid,
map_table_name,
referents_dict['referent_table_oids'],
referents_dict['column_names']
).fetchone()[0]
8 changes: 4 additions & 4 deletions db/records/operations/group.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@ def __init__(
prefix_length=None,
extract_field=None,
):
self._columns = tuple(columns) if type(columns) != str else tuple([columns])
self._columns = tuple(columns) if type(columns) is not str else tuple([columns])
self._mode = mode
if type(preproc) == str:
if type(preproc) is str:
self._preproc = tuple([preproc])
elif preproc is not None:
self._preproc = tuple(preproc)
Expand Down Expand Up @@ -134,7 +134,7 @@ def validate(self):

elif (
self.mode == GroupMode.PERCENTILE.value
and not type(self.num_groups) == int
and not type(self.num_groups) is int
):
raise records_exceptions.BadGroupFormat(
f'{GroupMode.PERCENTILE.value} mode requires integer num_groups'
Expand Down Expand Up @@ -182,7 +182,7 @@ def validate(self):
)

for col in self.columns:
if type(col) != str:
if type(col) is not str:
raise records_exceptions.BadGroupFormat(
f"Group column {col} must be a string."
)
Expand Down
43 changes: 30 additions & 13 deletions db/records/operations/insert.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from psycopg2.errors import NotNullViolation, ForeignKeyViolation, DatatypeMismatch, UniqueViolation, ExclusionViolation
from db.columns.exceptions import NotNullError, ForeignKeyError, TypeMismatchError, UniqueValueError, ExclusionError
from db.columns.base import MathesarColumn
from db.constants import ID, ID_ORIGINAL
from db.encoding_utils import get_sql_compatible_encoding
from db.records.operations.select import get_record
from sqlalchemy import select
Expand All @@ -33,6 +34,18 @@ def insert_record_or_records(table, engine, record_data):
return None


def get_records_from_dataframe(df):
"""
We convert the dataframe to JSON using to_json() method and then to a Python object.
This method replaces 'NaN' values in the dataframe with 'None' values in Python
object. The reason behind not using df.to_dict() method is beacuse it stringifies
'NaN' values rather than converting them to a 'None' value.
We pass 'records' as the orientation parameter because we want each record to contain
data of a single row and not of a single column (which is the default behaviour).
"""
return json.loads(df.to_json(orient='records'))


def insert_records_from_json(table, engine, json_filepath, column_names, max_level):
"""
Normalizes JSON data and inserts it into a table.
Expand All @@ -49,10 +62,7 @@ def insert_records_from_json(table, engine, json_filepath, column_names, max_lev
2. We normalize data into a pandas dataframe using pandas.json_normalize() method.
The method takes column names as meta. We provide all possible keys as column
names, hence it adds missing keys to JSON objects and marks their values as NaN.
3. We convert the dataframe to JSON using to_json() method and then to a Python object.
This method replaces 'NaN' values in the dataframe with 'None' values in Python
object. The reason behind not using df.to_dict() method is beacuse it stringifies
'NaN' values rather than converting them to a 'None' value.
3. We get records from the dataframe using the method get_records_from_dataframe().
4. The processed data is now a list of dict objects. Each dict has same keys, that are
the column names of the table. We loop through each dict object, and if any value is
a dict or a list, we stringify them before inserting them into the table. This way,
Expand All @@ -77,16 +87,23 @@ def insert_records_from_json(table, engine, json_filepath, column_names, max_lev
our table and not just the keys from the first JSON object.
"""
df = pandas.json_normalize(data, max_level=max_level, meta=column_names)
data = json.loads(df.to_json(orient='records'))
records = get_records_from_dataframe(df)

for i, row in enumerate(data):
data[i] = {
for i, row in enumerate(records):
if ID in row and ID_ORIGINAL in column_names:
row[ID_ORIGINAL] = row.pop("id")
records[i] = {
k: json.dumps(v)
if (isinstance(v, dict) or isinstance(v, list))
else v
for k, v in row.items()
}
insert_record_or_records(table, engine, data)
insert_record_or_records(table, engine, records)


def insert_records_from_excel(table, engine, dataframe):
records = get_records_from_dataframe(dataframe)
insert_record_or_records(table, engine, records)


def insert_records_from_csv(table, engine, csv_filepath, column_names, header, delimiter=None, escape=None, quote=None, encoding=None):
Expand Down Expand Up @@ -159,22 +176,22 @@ def insert_from_select(from_table, target_table, engine, col_mappings=None):
try:
result = conn.execute(ins)
except IntegrityError as e:
if type(e.orig) == NotNullViolation:
if type(e.orig) is NotNullViolation:
raise NotNullError
elif type(e.orig) == ForeignKeyViolation:
elif type(e.orig) is ForeignKeyViolation:
raise ForeignKeyError
elif type(e.orig) == UniqueViolation:
elif type(e.orig) is UniqueViolation:
# ToDo: Try to differentiate between the types of unique violations
# Scenario 1: Adding a duplicate value into a column with uniqueness constraint in the target table.
# Scenario 2: Adding a non existing value twice in a column with uniqueness constraint in the target table.
# Both the scenarios currently result in the same exception being thrown.
raise UniqueValueError
elif type(e.orig) == ExclusionViolation:
elif type(e.orig) is ExclusionViolation:
raise ExclusionError
else:
raise e
except ProgrammingError as e:
if type(e.orig) == DatatypeMismatch:
if type(e.orig) is DatatypeMismatch:
raise TypeMismatchError
else:
raise e
Expand Down
4 changes: 2 additions & 2 deletions db/records/operations/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ def update_record(table, engine, id_value, record_data):
table.update().where(primary_key_column == id_value).values(record_data)
)
except DataError as e:
if type(e.orig) == DatetimeFieldOverflow:
if type(e.orig) is DatetimeFieldOverflow:
raise InvalidDate
elif type(e.orig) == InvalidDatetimeFormat:
elif type(e.orig) is InvalidDatetimeFormat:
raise InvalidDateFormat
else:
raise e
Expand Down

0 comments on commit 855da7b

Please sign in to comment.