Skip to content

Commit

Permalink
Merge pull request #2926 from IamEzio/remove-duplication
Browse files Browse the repository at this point in the history
Removed code duplication while importing datafiles
  • Loading branch information
IamEzio committed Jun 17, 2023
2 parents ad605b5 + a62e850 commit 76f9d85
Show file tree
Hide file tree
Showing 7 changed files with 119 additions and 163 deletions.
36 changes: 36 additions & 0 deletions mathesar/imports/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from mathesar.database.base import create_mathesar_engine
from mathesar.models.base import Table
from mathesar.imports.csv import create_db_table_from_csv_data_file
from mathesar.imports.json import create_db_table_from_json_data_file
from db.tables.operations.select import get_oid_from_table
from mathesar.errors import InvalidTableError

ALLOWED_DELIMITERS = ",\t:|;"
SAMPLE_SIZE = 20000
CHECK_ROWS = 10


def create_table_from_data_file(data_file, name, schema, comment=None):
engine = create_mathesar_engine(schema.database.name)
if data_file.type == 'csv' or data_file.type == 'tsv':
db_table = create_db_table_from_csv_data_file(
data_file, name, schema, comment=comment
)
elif data_file.type == 'json':
db_table = create_db_table_from_json_data_file(
data_file, name, schema, comment=comment
)
else:
raise InvalidTableError
db_table_oid = get_oid_from_table(db_table.name, db_table.schema, engine)
# Using current_objects to create the table instead of objects. objects
# triggers re-reflection, which will cause a race condition to create the table
table = Table.current_objects.get(
oid=db_table_oid,
schema=schema,
)
table.import_verified = False
table.save()
data_file.table_imported_to = table
data_file.save()
return table
110 changes: 30 additions & 80 deletions mathesar/imports/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,13 @@

import clevercsv as csv

from db.identifiers import truncate_if_necessary
from db.tables.operations.alter import update_pk_sequence_to_latest
from mathesar.database.base import create_mathesar_engine
from mathesar.models.base import Table
from db.records.operations.insert import insert_records_from_csv
from db.tables.operations.create import create_string_column_table
from db.tables.operations.select import get_oid_from_table
from db.tables.operations.drop import drop_table
from mathesar.errors import InvalidTableError
from mathesar.imports.utils import process_column_names
from db.constants import ID, ID_ORIGINAL, COLUMN_NAME_TEMPLATE
from psycopg2.errors import IntegrityError, DataError

Expand Down Expand Up @@ -109,98 +107,50 @@ def get_sv_reader(file, header, dialect=None):
return reader


def create_db_table_from_data_file(data_file, name, schema, comment=None):
def insert_data_from_csv_data_file(name, schema, column_names, engine, comment, data_file):
dialect = csv.dialect.SimpleDialect(data_file.delimiter, data_file.quotechar,
data_file.escapechar)
encoding = get_file_encoding(data_file.file)
table = create_string_column_table(
name=name,
schema=schema.name,
column_names=column_names,
engine=engine,
comment=comment,
)
insert_records_from_csv(
table,
engine,
data_file.file.path,
column_names,
data_file.header,
delimiter=dialect.delimiter,
escape=dialect.escapechar,
quote=dialect.quotechar,
encoding=encoding
)
return table


def create_db_table_from_csv_data_file(data_file, name, schema, comment=None):
db_name = schema.database.name
engine = create_mathesar_engine(db_name)
sv_filename = data_file.file.path
header = data_file.header
dialect = csv.dialect.SimpleDialect(data_file.delimiter, data_file.quotechar,
data_file.escapechar)
encoding = get_file_encoding(data_file.file)
with open(sv_filename, 'rb') as sv_file:
sv_reader = get_sv_reader(sv_file, header, dialect=dialect)
column_names = _process_column_names(sv_reader.fieldnames)
table = create_string_column_table(
name=name,
schema=schema.name,
column_names=column_names,
engine=engine,
comment=comment,
)
column_names = process_column_names(sv_reader.fieldnames)
try:
insert_records_from_csv(
table,
engine,
sv_filename,
column_names,
header,
delimiter=dialect.delimiter,
escape=dialect.escapechar,
quote=dialect.quotechar,
encoding=encoding
)
table = insert_data_from_csv_data_file(name, schema, column_names, engine, comment, data_file)
update_pk_sequence_to_latest(engine, table)
except (IntegrityError, DataError):
drop_table(name=name, schema=schema.name, engine=engine)
column_names_alt = [
column_name if column_name != ID else ID_ORIGINAL
for column_name in column_names
]
table = create_string_column_table(
name=name,
schema=schema.name,
column_names=column_names_alt,
engine=engine,
comment=comment,
)
insert_records_from_csv(
table,
engine,
sv_filename,
column_names_alt,
header,
delimiter=dialect.delimiter,
escape=dialect.escapechar,
quote=dialect.quotechar,
encoding=encoding
)
insert_data_from_csv_data_file(name, schema, column_names_alt, engine, comment, data_file)
reset_reflection(db_name=db_name)
return table


def _process_column_names(column_names):
column_names = (
column_name.strip()
for column_name
in column_names
)
column_names = (
truncate_if_necessary(column_name)
for column_name
in column_names
)
column_names = (
f"{COLUMN_NAME_TEMPLATE}{i}" if name == '' else name
for i, name
in enumerate(column_names)
)
return list(column_names)


def create_table_from_csv(data_file, name, schema, comment=None):
engine = create_mathesar_engine(schema.database.name)
db_table = create_db_table_from_data_file(
data_file, name, schema, comment=comment
)
db_table_oid = get_oid_from_table(db_table.name, db_table.schema, engine)
# Using current_objects to create the table instead of objects. objects
# triggers re-reflection, which will cause a race condition to create the table
table = Table.current_objects.get(
oid=db_table_oid,
schema=schema,
)
table.import_verified = False
table.save()
data_file.table_imported_to = table
data_file.save()
return table
83 changes: 19 additions & 64 deletions mathesar/imports/json.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
import json

from db.identifiers import truncate_if_necessary
from db.tables.operations.alter import update_pk_sequence_to_latest
from mathesar.database.base import create_mathesar_engine
from mathesar.models.base import Table
from db.records.operations.insert import insert_records_from_json
from db.tables.operations.create import create_string_column_table
from db.tables.operations.select import get_oid_from_table
from db.tables.operations.drop import drop_table
from db.constants import ID, ID_ORIGINAL, COLUMN_NAME_TEMPLATE
from mathesar.imports.utils import process_column_names
from db.constants import ID, ID_ORIGINAL
from psycopg2.errors import IntegrityError, DataError

from mathesar.state import reset_reflection
Expand All @@ -23,82 +21,39 @@ def get_column_names_from_json(data_file):
return list(data.keys())


def _process_column_names(column_names):
column_names = (
column_name.strip()
for column_name
in column_names
)
column_names = (
truncate_if_necessary(column_name)
for column_name
in column_names
def insert_data_from_json_data_file(name, schema, column_names, engine, comment, json_filepath):
table = create_string_column_table(
name=name,
schema=schema.name,
column_names=column_names,
engine=engine,
comment=comment,
)
column_names = (
f"{COLUMN_NAME_TEMPLATE}{i}" if name == '' else name
for i, name
in enumerate(column_names)
insert_records_from_json(
table,
engine,
json_filepath
)
return list(column_names)
return table


def create_db_table_from_json_data_file(data_file, name, schema, comment=None):
db_name = schema.database.name
engine = create_mathesar_engine(db_name)
json_filepath = data_file.file.path
column_names = _process_column_names(
get_column_names_from_json(data_file.file.path)
)
table = create_string_column_table(
name=name,
schema=schema.name,
column_names=column_names,
engine=engine,
comment=comment,
column_names = process_column_names(
get_column_names_from_json(json_filepath)
)
try:
insert_records_from_json(
table,
engine,
json_filepath
)
table = insert_data_from_json_data_file(name, schema, column_names, engine, comment, json_filepath)
update_pk_sequence_to_latest(engine, table)
except (IntegrityError, DataError):
drop_table(name=name, schema=schema.name, engine=engine)
column_names_alt = [
fieldname if fieldname != ID else ID_ORIGINAL
for fieldname in column_names
]
table = create_string_column_table(
name=name,
schema=schema.name,
column_names=column_names_alt,
engine=engine,
comment=comment,
)
insert_records_from_json(
table,
engine,
json_filepath
)
reset_reflection(db_name=db_name)
return table
table = insert_data_from_json_data_file(name, schema, column_names_alt, engine, comment, json_filepath)


def create_table_from_json(data_file, name, schema, comment=None):
engine = create_mathesar_engine(schema.database.name)
db_table = create_db_table_from_json_data_file(
data_file, name, schema, comment=comment
)
db_table_oid = get_oid_from_table(
db_table.name, db_table.schema, engine
)
table = Table.current_objects.get(
oid=db_table_oid,
schema=schema,
)
table.import_verified = False
table.save()
data_file.table_imported_to = table
data_file.save()
reset_reflection(db_name=db_name)
return table
21 changes: 21 additions & 0 deletions mathesar/imports/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from db.identifiers import truncate_if_necessary
from db.constants import COLUMN_NAME_TEMPLATE


def process_column_names(column_names):
column_names = (
column_name.strip()
for column_name
in column_names
)
column_names = (
truncate_if_necessary(column_name)
for column_name
in column_names
)
column_names = (
f"{COLUMN_NAME_TEMPLATE}{i}" if name == '' else name
for i, name
in enumerate(column_names)
)
return list(column_names)
4 changes: 2 additions & 2 deletions mathesar/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from db.schemas.utils import get_schema_oid_from_name

import mathesar.tests.conftest
from mathesar.imports.csv import create_table_from_csv
from mathesar.imports.base import create_table_from_data_file
from mathesar.models.base import Schema, Table, Database, DataFile
from mathesar.models.base import Column as mathesar_model_column
from mathesar.models.users import DatabaseRole, SchemaRole, User
Expand Down Expand Up @@ -322,7 +322,7 @@ def create_table(create_schema):
def _create_table(table_name, schema_name, csv_filepath):
data_file = _get_datafile_for_path(csv_filepath)
schema_model = create_schema(schema_name)
return create_table_from_csv(data_file, table_name, schema_model)
return create_table_from_data_file(data_file, table_name, schema_model)
return _create_table


Expand Down

0 comments on commit 76f9d85

Please sign in to comment.