Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Removed code duplication while importing datafiles #2926

Merged
merged 3 commits into from
Jun 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
36 changes: 36 additions & 0 deletions mathesar/imports/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from mathesar.database.base import create_mathesar_engine
from mathesar.models.base import Table
from mathesar.imports.csv import create_db_table_from_csv_data_file
from mathesar.imports.json import create_db_table_from_json_data_file
from db.tables.operations.select import get_oid_from_table
from mathesar.errors import InvalidTableError

ALLOWED_DELIMITERS = ",\t:|;"
SAMPLE_SIZE = 20000
CHECK_ROWS = 10


def create_table_from_data_file(data_file, name, schema, comment=None):
engine = create_mathesar_engine(schema.database.name)
if data_file.type == 'csv' or data_file.type == 'tsv':
db_table = create_db_table_from_csv_data_file(
data_file, name, schema, comment=comment
)
elif data_file.type == 'json':
db_table = create_db_table_from_json_data_file(
data_file, name, schema, comment=comment
)
else:
raise InvalidTableError
db_table_oid = get_oid_from_table(db_table.name, db_table.schema, engine)
# Using current_objects to create the table instead of objects. objects
# triggers re-reflection, which will cause a race condition to create the table
table = Table.current_objects.get(
oid=db_table_oid,
schema=schema,
)
table.import_verified = False
table.save()
data_file.table_imported_to = table
data_file.save()
return table
110 changes: 30 additions & 80 deletions mathesar/imports/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,13 @@

import clevercsv as csv

from db.identifiers import truncate_if_necessary
from db.tables.operations.alter import update_pk_sequence_to_latest
from mathesar.database.base import create_mathesar_engine
from mathesar.models.base import Table
from db.records.operations.insert import insert_records_from_csv
from db.tables.operations.create import create_string_column_table
from db.tables.operations.select import get_oid_from_table
from db.tables.operations.drop import drop_table
from mathesar.errors import InvalidTableError
from mathesar.imports.utils import process_column_names
from db.constants import ID, ID_ORIGINAL, COLUMN_NAME_TEMPLATE
from psycopg2.errors import IntegrityError, DataError

Expand Down Expand Up @@ -109,98 +107,50 @@ def get_sv_reader(file, header, dialect=None):
return reader


def create_db_table_from_data_file(data_file, name, schema, comment=None):
def insert_data_from_csv_data_file(name, schema, column_names, engine, comment, data_file):
dialect = csv.dialect.SimpleDialect(data_file.delimiter, data_file.quotechar,
data_file.escapechar)
encoding = get_file_encoding(data_file.file)
table = create_string_column_table(
name=name,
schema=schema.name,
column_names=column_names,
engine=engine,
comment=comment,
)
insert_records_from_csv(
table,
engine,
data_file.file.path,
column_names,
data_file.header,
delimiter=dialect.delimiter,
escape=dialect.escapechar,
quote=dialect.quotechar,
encoding=encoding
)
return table


def create_db_table_from_csv_data_file(data_file, name, schema, comment=None):
db_name = schema.database.name
engine = create_mathesar_engine(db_name)
sv_filename = data_file.file.path
header = data_file.header
dialect = csv.dialect.SimpleDialect(data_file.delimiter, data_file.quotechar,
data_file.escapechar)
encoding = get_file_encoding(data_file.file)
with open(sv_filename, 'rb') as sv_file:
sv_reader = get_sv_reader(sv_file, header, dialect=dialect)
column_names = _process_column_names(sv_reader.fieldnames)
table = create_string_column_table(
name=name,
schema=schema.name,
column_names=column_names,
engine=engine,
comment=comment,
)
column_names = process_column_names(sv_reader.fieldnames)
try:
insert_records_from_csv(
table,
engine,
sv_filename,
column_names,
header,
delimiter=dialect.delimiter,
escape=dialect.escapechar,
quote=dialect.quotechar,
encoding=encoding
)
table = insert_data_from_csv_data_file(name, schema, column_names, engine, comment, data_file)
update_pk_sequence_to_latest(engine, table)
except (IntegrityError, DataError):
drop_table(name=name, schema=schema.name, engine=engine)
column_names_alt = [
column_name if column_name != ID else ID_ORIGINAL
for column_name in column_names
]
table = create_string_column_table(
name=name,
schema=schema.name,
column_names=column_names_alt,
engine=engine,
comment=comment,
)
insert_records_from_csv(
table,
engine,
sv_filename,
column_names_alt,
header,
delimiter=dialect.delimiter,
escape=dialect.escapechar,
quote=dialect.quotechar,
encoding=encoding
)
insert_data_from_csv_data_file(name, schema, column_names_alt, engine, comment, data_file)
reset_reflection(db_name=db_name)
return table


def _process_column_names(column_names):
column_names = (
column_name.strip()
for column_name
in column_names
)
column_names = (
truncate_if_necessary(column_name)
for column_name
in column_names
)
column_names = (
f"{COLUMN_NAME_TEMPLATE}{i}" if name == '' else name
for i, name
in enumerate(column_names)
)
return list(column_names)


def create_table_from_csv(data_file, name, schema, comment=None):
engine = create_mathesar_engine(schema.database.name)
db_table = create_db_table_from_data_file(
data_file, name, schema, comment=comment
)
db_table_oid = get_oid_from_table(db_table.name, db_table.schema, engine)
# Using current_objects to create the table instead of objects. objects
# triggers re-reflection, which will cause a race condition to create the table
table = Table.current_objects.get(
oid=db_table_oid,
schema=schema,
)
table.import_verified = False
table.save()
data_file.table_imported_to = table
data_file.save()
return table
83 changes: 19 additions & 64 deletions mathesar/imports/json.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
import json

from db.identifiers import truncate_if_necessary
from db.tables.operations.alter import update_pk_sequence_to_latest
from mathesar.database.base import create_mathesar_engine
from mathesar.models.base import Table
from db.records.operations.insert import insert_records_from_json
from db.tables.operations.create import create_string_column_table
from db.tables.operations.select import get_oid_from_table
from db.tables.operations.drop import drop_table
from db.constants import ID, ID_ORIGINAL, COLUMN_NAME_TEMPLATE
from mathesar.imports.utils import process_column_names
from db.constants import ID, ID_ORIGINAL
from psycopg2.errors import IntegrityError, DataError

from mathesar.state import reset_reflection
Expand All @@ -23,82 +21,39 @@ def get_column_names_from_json(data_file):
return list(data.keys())


def _process_column_names(column_names):
column_names = (
column_name.strip()
for column_name
in column_names
)
column_names = (
truncate_if_necessary(column_name)
for column_name
in column_names
def insert_data_from_json_data_file(name, schema, column_names, engine, comment, json_filepath):
table = create_string_column_table(
name=name,
schema=schema.name,
column_names=column_names,
engine=engine,
comment=comment,
)
column_names = (
f"{COLUMN_NAME_TEMPLATE}{i}" if name == '' else name
for i, name
in enumerate(column_names)
insert_records_from_json(
table,
engine,
json_filepath
)
return list(column_names)
return table


def create_db_table_from_json_data_file(data_file, name, schema, comment=None):
db_name = schema.database.name
engine = create_mathesar_engine(db_name)
json_filepath = data_file.file.path
column_names = _process_column_names(
get_column_names_from_json(data_file.file.path)
)
table = create_string_column_table(
name=name,
schema=schema.name,
column_names=column_names,
engine=engine,
comment=comment,
column_names = process_column_names(
get_column_names_from_json(json_filepath)
)
try:
insert_records_from_json(
table,
engine,
json_filepath
)
table = insert_data_from_json_data_file(name, schema, column_names, engine, comment, json_filepath)
update_pk_sequence_to_latest(engine, table)
except (IntegrityError, DataError):
drop_table(name=name, schema=schema.name, engine=engine)
column_names_alt = [
fieldname if fieldname != ID else ID_ORIGINAL
for fieldname in column_names
]
table = create_string_column_table(
name=name,
schema=schema.name,
column_names=column_names_alt,
engine=engine,
comment=comment,
)
insert_records_from_json(
table,
engine,
json_filepath
)
reset_reflection(db_name=db_name)
return table
table = insert_data_from_json_data_file(name, schema, column_names_alt, engine, comment, json_filepath)


def create_table_from_json(data_file, name, schema, comment=None):
engine = create_mathesar_engine(schema.database.name)
db_table = create_db_table_from_json_data_file(
data_file, name, schema, comment=comment
)
db_table_oid = get_oid_from_table(
db_table.name, db_table.schema, engine
)
table = Table.current_objects.get(
oid=db_table_oid,
schema=schema,
)
table.import_verified = False
table.save()
data_file.table_imported_to = table
data_file.save()
reset_reflection(db_name=db_name)
return table
21 changes: 21 additions & 0 deletions mathesar/imports/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from db.identifiers import truncate_if_necessary
from db.constants import COLUMN_NAME_TEMPLATE


def process_column_names(column_names):
column_names = (
column_name.strip()
for column_name
in column_names
)
column_names = (
truncate_if_necessary(column_name)
for column_name
in column_names
)
column_names = (
f"{COLUMN_NAME_TEMPLATE}{i}" if name == '' else name
for i, name
in enumerate(column_names)
)
return list(column_names)
4 changes: 2 additions & 2 deletions mathesar/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from db.schemas.utils import get_schema_oid_from_name

import mathesar.tests.conftest
from mathesar.imports.csv import create_table_from_csv
from mathesar.imports.base import create_table_from_data_file
from mathesar.models.base import Schema, Table, Database, DataFile
from mathesar.models.base import Column as mathesar_model_column
from mathesar.models.users import DatabaseRole, SchemaRole, User
Expand Down Expand Up @@ -322,7 +322,7 @@ def create_table(create_schema):
def _create_table(table_name, schema_name, csv_filepath):
data_file = _get_datafile_for_path(csv_filepath)
schema_model = create_schema(schema_name)
return create_table_from_csv(data_file, table_name, schema_model)
return create_table_from_data_file(data_file, table_name, schema_model)
return _create_table


Expand Down