diff --git a/HISTORY.rst b/HISTORY.rst index 915f41a..edc66e5 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -2,6 +2,10 @@ History ======= +UNRELEASED +---------- +* Extend response of import job api + 0.5.0 (2023-12-19) ------------------ * Drop support of python 3.9 diff --git a/import_export_extensions/admin/model_admins/import_job_admin.py b/import_export_extensions/admin/model_admins/import_job_admin.py index e25ec17..1ff7915 100644 --- a/import_export_extensions/admin/model_admins/import_job_admin.py +++ b/import_export_extensions/admin/model_admins/import_job_admin.py @@ -141,6 +141,7 @@ def get_readonly_fields( readonly_fields.extend( [ "resource_path", + "input_errors_file", "data_file", "resource_kwargs", ], @@ -241,7 +242,10 @@ def get_fieldsets( data = ( _("Importing data"), { - "fields": ("_input_errors",), + "fields": ( + "input_errors_file", + "_input_errors", + ), "classes": ("collapse",), }, ) diff --git a/import_export_extensions/api/serializers/import_job.py b/import_export_extensions/api/serializers/import_job.py index ec939f6..ed3dc6e 100644 --- a/import_export_extensions/api/serializers/import_job.py +++ b/import_export_extensions/api/serializers/import_job.py @@ -5,6 +5,7 @@ from celery import states from ... import models, resources +from . import import_job_details as details from .progress import ProgressSerializer @@ -26,13 +27,54 @@ class ImportJobSerializer(serializers.ModelSerializer): progress = ImportProgressSerializer() + import_params = details.ImportParamsSerializer( + read_only=True, + source="*", + ) + totals = details.TotalsSerializer( + read_only=True, + source="*", + ) + parse_error = serializers.CharField( + source="error_message", + read_only=True, + allow_blank=True, + ) + input_error = details.InputErrorSerializer( + source="*", + read_only=True, + ) + skipped_errors = details.SkippedErrorsSerializer( + source="*", + read_only=True, + ) + importing_data = details.ImportingDataSerializer( + read_only=True, + source="*", + ) + input_errors_file = serializers.FileField( + read_only=True, + allow_null=True, + ) + is_all_rows_shown = details.IsAllRowsShowField( + source="*", + read_only=True, + ) + class Meta: model = models.ImportJob fields = ( "id", - "import_status", - "data_file", "progress", + "import_status", + "import_params", + "totals", + "parse_error", + "input_error", + "skipped_errors", + "is_all_rows_shown", + "importing_data", + "input_errors_file", "import_started", "import_finished", "force_import", diff --git a/import_export_extensions/api/serializers/import_job_details.py b/import_export_extensions/api/serializers/import_job_details.py new file mode 100644 index 0000000..aac9764 --- /dev/null +++ b/import_export_extensions/api/serializers/import_job_details.py @@ -0,0 +1,197 @@ +import itertools +import typing + +from rest_framework import serializers + +from import_export.results import RowResult + +from ... import models + + +class SkippedErrorsDict(typing.TypedDict): + """Typed dict for skipped errors.""" + non_field_skipped_errors: list[str] + field_skipped_errors: dict[str, list[str]] + + +class ImportParamsSerializer(serializers.Serializer): + """Serializer for representing import parameters.""" + data_file = serializers.FileField() + resource_path = serializers.CharField() + resource_kwargs = serializers.CharField() + + +class ImportDiffSerializer(serializers.Serializer): + """Serializer for representing importing rows diff.""" + previous = serializers.CharField(allow_blank=True, allow_null=True) + current = serializers.CharField(allow_blank=True, allow_null=True) + + +class ImportRowSerializer(serializers.Serializer): + """Serializer for representing importing rows. + + Used to generate correct openapi spec. + + """ + operation = serializers.CharField() + parsed_fields = serializers.ListField( + child=ImportDiffSerializer(allow_null=True), + allow_null=True, + ) + + +class ImportingDataSerializer(serializers.Serializer): + """Serializer for representing importing data.""" + headers = serializers.ListField( + child=serializers.CharField(), + ) + rows = serializers.ListField( + child=ImportRowSerializer(), + ) + + def to_representation(self, instance: models.ImportJob): + """Return dict with import details.""" + if instance.import_status not in models.ImportJob.success_statuses: + return super().to_representation(self.get_initial()) + + rows = [] + resource = instance.resource + for row in instance.result.rows: + # errors displayed in input_error.row_errors(InputErrorSerializer) + if row.import_type == RowResult.IMPORT_TYPE_ERROR: + continue + + original_fields = [ + resource.export_field(f, row.original) if row.original else "" + for f in resource.get_user_visible_fields() + ] + current_fields = [ + resource.export_field(f, row.instance) + for f in resource.get_user_visible_fields() + ] + + rows.append({ + "operation": row.import_type, + "parsed_fields": [ + { + "previous": v1, + "current": v2, + } for v1, v2 in itertools.zip_longest( + original_fields, + current_fields, + fillvalue="", + ) + ], + }) + + importing_data = { + "headers": instance.result.diff_headers, + "rows": rows, + } + return super().to_representation(importing_data) + + +class TotalsSerializer(serializers.Serializer): + """Serializer to represent import totals.""" + new = serializers.IntegerField(allow_null=True, required=False) + update = serializers.IntegerField(allow_null=True, required=False) + delete = serializers.IntegerField(allow_null=True, required=False) + skip = serializers.IntegerField(allow_null=True, required=False) + error = serializers.IntegerField(allow_null=True, required=False) + + def to_representation(self, instance): + """Return dict with import totals.""" + if instance.import_status not in models.ImportJob.results_statuses: + return super().to_representation(self.get_initial()) + return super().to_representation(instance.result.totals) + + +class RowError(serializers.Serializer): + """Represent single row errors.""" + line = serializers.IntegerField() + error = serializers.CharField() + row = serializers.ListField( + child=serializers.CharField(), + ) + + +class InputErrorSerializer(serializers.Serializer): + """Represent Input errors.""" + base_errors = serializers.ListField( + child=serializers.CharField(), + ) + row_errors = serializers.ListField( + child=serializers.ListField( + child=RowError(), + ), + ) + + def to_representation(self, instance: models.ImportJob): + """Return dict with input errors.""" + if instance.import_status not in models.ImportJob.results_statuses: + return super().to_representation(self.get_initial()) + + input_errors: dict[str, list[typing.Any]] = { + "base_errors": [], + "row_errors": [], + } + + if instance.result.base_errors: + input_errors["base_errors"] = [ + str(error.error) for error in instance.result.base_errors + ] + + if instance.result.row_errors(): + for line, errors in instance.result.row_errors(): + line_errors = [ + { + "line": line, + "error": str(error.error), + "row": error.row.values(), + } for error in errors + ] + input_errors["row_errors"].append(line_errors) + + return super().to_representation(input_errors) + + +class IsAllRowsShowField(serializers.BooleanField): + """Field for representing `all_rows_saved` value.""" + + def to_representation(self, instance): + """Return boolean if all rows shown in importing data.""" + if instance.import_status not in models.ImportJob.success_statuses: + return False + return instance.result.total_rows == len(instance.result.rows) + + +class SkippedErrorsSerializer(serializers.Serializer): + """Serializer for import job skipped rows.""" + + non_field_skipped_errors = serializers.ListField( + child=serializers.CharField(), + ) + field_skipped_errors = serializers.DictField( + child=serializers.ListField(child=serializers.CharField()), + ) + + def to_representation(self, instance: models.ImportJob): + """Parse skipped errors from import job result.""" + if ( + instance.import_status + not in models.ImportJob.results_statuses + ): + return super().to_representation(self.get_initial()) + skipped_errors: SkippedErrorsDict = { + "non_field_skipped_errors": [], + "field_skipped_errors": {}, + } + for row in instance.result.skipped_rows: + non_field_errors = [ + error.error for error in row.non_field_skipped_errors + ] + skipped_errors["non_field_skipped_errors"].extend(non_field_errors) + for field, errors in row.field_skipped_errors.items(): + errors = [error.messages for error in errors] + skipped_errors["field_skipped_errors"][field] = errors + return super().to_representation(skipped_errors) diff --git a/import_export_extensions/migrations/0006_importjob_input_errors_file.py b/import_export_extensions/migrations/0006_importjob_input_errors_file.py new file mode 100644 index 0000000..fd1971c --- /dev/null +++ b/import_export_extensions/migrations/0006_importjob_input_errors_file.py @@ -0,0 +1,30 @@ +# Generated by Django 4.2.7 on 2024-01-15 10:40 + +from django.db import migrations, models +import functools +import import_export_extensions.models.tools + + +class Migration(migrations.Migration): + + dependencies = [ + ("import_export_extensions", "0005_importjob_force_import"), + ] + + operations = [ + migrations.AddField( + model_name="importjob", + name="input_errors_file", + field=models.FileField( + help_text="File that contain failed rows", + max_length=512, + null=True, + upload_to=functools.partial( + import_export_extensions.models.tools.upload_file_to, + *(), + **{"main_folder_name": "import"} + ), + verbose_name="Input errors file", + ), + ), + ] diff --git a/import_export_extensions/models/import_job.py b/import_export_extensions/models/import_job.py index 36f2d57..f8da99c 100644 --- a/import_export_extensions/models/import_job.py +++ b/import_export_extensions/models/import_job.py @@ -5,8 +5,10 @@ from typing import Optional, Sequence, Type from django.conf import settings +from django.core.files import base as django_files from django.db import models, transaction from django.utils import encoding, module_loading, timezone +from django.utils.encoding import force_bytes from django.utils.translation import gettext_lazy as _ import tablib @@ -14,6 +16,7 @@ from import_export.formats import base_formats from import_export.results import Result +from ..resources import CeleryResource from . import tools from .core import BaseJob, TaskStateInfo @@ -136,6 +139,13 @@ class ImportStatus(models.TextChoices): upload_to=tools.upload_import_file_to, help_text=_("File that contain data to be imported"), ) + input_errors_file = models.FileField( + max_length=512, + null=True, + verbose_name=_("Input errors file"), + help_text=_("File that contain failed rows"), + upload_to=tools.upload_import_file_to, + ) parse_task_id = models.CharField( default=str, @@ -205,6 +215,7 @@ def save( """ is_created = self._state.adding + self._save_input_errors_file() super().save( force_insert=force_insert, force_update=force_update, @@ -230,7 +241,7 @@ def save( transaction.on_commit(self.start_parse_data_task) @property - def resource(self): + def resource(self) -> CeleryResource: """Get initialized resource instance.""" resource_class = module_loading.import_string(self.resource_path) resource = resource_class( @@ -265,10 +276,7 @@ def progress(self) -> Optional[TaskStateInfo]: https://docs.celeryproject.org/en/latest/userguide/tasks.html#states """ - if self.import_status not in ( - self.ImportStatus.PARSING, - self.ImportStatus.IMPORTING, - ): + if self.import_status not in self.progress_statuses: return None current_task = ( @@ -360,7 +368,7 @@ def _parse_data_inner(self) -> Result: dataset, dry_run=True, raise_errors=False, - collect_failures=True, + collect_failed_rows=True, force_import=self.force_import, ) @@ -451,7 +459,7 @@ def _import_data_inner(self) -> Result: dry_run=False, raise_errors=True, use_transactions=True, - collect_failures=True, + collect_failed_rows=True, force_import=self.force_import, ) @@ -461,11 +469,11 @@ def _get_import_format_by_ext( ) -> Type[base_formats.Format]: """Determine import file format by file extension.""" supported_formats = self.resource.get_supported_formats() + for import_format in supported_formats: - if import_format().get_title().upper() == file_ext.upper().replace( - ".", "", - ): + if import_format().get_title().upper() == file_ext.upper().replace(".", ""): return import_format + supported_formats_titles = ",".join( supported_format().get_title() for supported_format in supported_formats @@ -558,3 +566,39 @@ def _get_task_state(self, task_id: str) -> TaskStateInfo: state=async_result.state, info=async_result.info, ) + + def _save_input_errors_file(self): + """Save input errors file. + + This should be saved after parsing and after importing + and if there are row errors in result. + + """ + if ( + self.import_status not in self.results_statuses + or not self.result + or not self.result.row_errors() + or self.input_errors_file + ): + return + _, file_ext = os.path.splitext(self.data_file.name) + file_format = self._get_import_format_by_ext( + file_ext=file_ext, + )() + export_data = file_format.export_data( + dataset=self.result.failed_dataset, + ) + + # create file if `export_data` is not file + if not hasattr(export_data, "read"): + export_data = django_files.ContentFile(force_bytes(export_data)) + + file_name = self.resource.generate_export_filename( + file_format, + ).replace("/", "-") + + self.input_errors_file.save( + name=f"FailedRows{file_name}", + content=export_data, + save=True, + ) diff --git a/import_export_extensions/models/tools.py b/import_export_extensions/models/tools.py index 763642f..c1686b5 100644 --- a/import_export_extensions/models/tools.py +++ b/import_export_extensions/models/tools.py @@ -32,3 +32,7 @@ def upload_file_to( upload_file_to, main_folder_name="import", ) +upload_import_error_file_to = functools.partial( + upload_file_to, + main_folder_name="errors", +) diff --git a/import_export_extensions/resources.py b/import_export_extensions/resources.py index f4c42b1..a776914 100644 --- a/import_export_extensions/resources.py +++ b/import_export_extensions/resources.py @@ -303,3 +303,6 @@ def get_model_queryset(cls): """ return cls._meta.model.objects.all() + + class Meta: + store_instance = True diff --git a/tests/fake_app/models.py b/tests/fake_app/models.py index 7cb7c74..cf04d54 100644 --- a/tests/fake_app/models.py +++ b/tests/fake_app/models.py @@ -17,7 +17,10 @@ def __str__(self) -> str: class Artist(models.Model): """Model representing artist.""" - name = models.CharField(max_length=100) + name = models.CharField( + max_length=100, + blank=False, + ) bands = models.ManyToManyField("Band", through="Membership") instrument = models.ForeignKey( diff --git a/tests/fake_app/resources.py b/tests/fake_app/resources.py index 3ac71e0..180dc48 100644 --- a/tests/fake_app/resources.py +++ b/tests/fake_app/resources.py @@ -35,3 +35,4 @@ class ArtistResourceWithM2M(CeleryModelResource): class Meta: model = Artist fields = ["id", "name", "bands", "instrument"] + clean_model_instances = True diff --git a/tests/test_models/test_import/test_parse_data.py b/tests/test_models/test_import/test_parse_data.py index e61dbbd..63a3059 100644 --- a/tests/test_models/test_import/test_parse_data.py +++ b/tests/test_models/test_import/test_parse_data.py @@ -163,3 +163,5 @@ def test_parse_data_invalid_row_file( import_job.refresh_from_db() assert import_job.import_status == expected_status + if expected_status == ImportJob.ImportStatus.INPUT_ERROR: + assert import_job.input_errors_file is not None