Skip to content

Commit

Permalink
fix: add support to import CSV file with multiple image urls for one …
Browse files Browse the repository at this point in the history
…product on separate rows (Carrefour) + fix fake download of images for tests (#9058)

* fix: add support for semi-colon ; as a separator

* fix tests for uploading images

* support for multiple lines with image_other_url for the same product

* update tests

* support for multiple lines with image_other_url for the same product

* support for multiple lines with image_other_url for the same product

---------

Co-authored-by: off <off@openfoodfacts.org>
  • Loading branch information
stephanegigandet and off committed Sep 27, 2023
1 parent 72b5c84 commit b0280f6
Show file tree
Hide file tree
Showing 24 changed files with 1,615 additions and 44 deletions.
7 changes: 5 additions & 2 deletions lib/ProductOpener/Images.pm
Original file line number Diff line number Diff line change
Expand Up @@ -654,7 +654,8 @@ Process an image uploaded to a product (from the web site, from the API, or from
=head4 Image field $imagefield
Indicates what the image is and its language.
Indicates what the image is and its language, or indicate a path to the image file
(for imports and when uploading an image with a barcode)
Format: [front|ingredients|nutrition|packaging|other]_[2 letter language code]
Expand Down Expand Up @@ -703,6 +704,7 @@ sub process_image_upload ($product_id, $imagefield, $user_id, $time, $comment, $
my $extension = 'jpg';

# Image that was already read by barcode scanner: can't read it again
# $image_field can be a path to the image file (for imports and when uploading an image with a barcode)
my $tmp_filename;
if ($imagefield =~ /\//) {
$tmp_filename = $imagefield;
Expand Down Expand Up @@ -767,7 +769,7 @@ sub process_image_upload ($product_id, $imagefield, $user_id, $time, $comment, $
my $filename = get_string_id_for_lang("no_language", remote_addr() . '_' . $`);

my $current_product_ref = retrieve_product($product_id);
$imgid = $current_product_ref->{max_imgid} + 1;
$imgid = ($current_product_ref->{max_imgid} || 0) + 1;

# if for some reason the images directories were not created at product creation (it can happen if the images directory's permission / ownership are incorrect at some point)
# create them
Expand All @@ -794,6 +796,7 @@ sub process_image_upload ($product_id, $imagefield, $user_id, $time, $comment, $
$log->debug("new imgid: ", {imgid => $imgid, extension => $extension}) if $log->is_debug();

my $img_orig = "$product_www_root/images/products/$path/$imgid.$extension.orig";
$log->debug("writing the original image", {img_orig => $img_orig}) if $log->is_debug();
open(my $out, ">", $img_orig)
or $log->warn("could not open image path for saving", {path => $img_orig, error => $!});
while (my $chunk = <$file>) {
Expand Down
42 changes: 25 additions & 17 deletions lib/ProductOpener/Import.pm
Original file line number Diff line number Diff line change
Expand Up @@ -2272,26 +2272,34 @@ sub import_csv_file ($args_ref) {
# image_other_url
# image_other_url.2 : a second "other" photo

next if $field !~ /^image_((front|ingredients|nutrition|packaging|other)(_[a-z]{2})?)_url/;
next
if $field
!~ /^image_((?:front|ingredients|nutrition|packaging|other)(?:_[a-z]{2})?)_url(_[a-z]{2})?(\.[0-9]+)?$/;

my $imagefield = $1 . $'; # e.g. image_front_url_fr -> front_fr
my $imagefield = $1 . ($2 || ''); # e.g. image_front_url_fr or image_front_url_fr -> front_fr
my $number = $3;

# If the imagefield is other, and we have a value for image_other_type, try to identify the imagefield
if ( ($imagefield eq "other")
and (defined $imported_product_ref->{"image_other_type"})
and ($imported_product_ref->{"image_other_type"} ne ""))
{
my $type_imagefield
= get_imagefield_from_string($product_ref->{lc}, $imported_product_ref->{"image_other_type"});
$log->debug(
"imagefield is other, tried to guess it image_other_type",
{
imagefield => $imagefield,
type_imagefield => $type_imagefield,
image_other_type => $imported_product_ref->{"image_other_type"}
}
) if $log->is_debug();
$imagefield = $type_imagefield;
if ($imagefield eq "other") {
my $image_other_type_field = "image_other_type";
if (defined $number) {
$image_other_type_field .= $number;
}

if ($imported_product_ref->{$image_other_type_field}) {
my $type_imagefield
= get_imagefield_from_string($product_ref->{lc},
$imported_product_ref->{$image_other_type_field});
$log->debug(
"imagefield is other, tried to guess with image_other_type",
{
imagefield => $imagefield,
type_imagefield => $type_imagefield,
image_other_type => $imported_product_ref->{$image_other_type_field}
}
) if $log->is_debug();
$imagefield = $type_imagefield;
}
}

$log->debug("image file",
Expand Down
27 changes: 20 additions & 7 deletions lib/ProductOpener/Producers.pm
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,10 @@ sub load_csv_or_excel_file ($file) { # path and file name
if ($line =~ /\t/) {
$separator = "\t";
}
# Otherwise, if the first line does not have a separator, check if it is a ;
elsif (($line !~ /$separator/) and ($line =~ /;/)) {
$separator = ";";
}
}
}

Expand Down Expand Up @@ -460,7 +464,7 @@ sub convert_file ($default_values_ref, $file, $columns_fields_file, $converted_f
my $products_ref = {};

# Keep track of the number of lines for each product
my %product_lines = {};
my %product_lines = ();

# We may add some output columns if there are products on multiple lines
my $extra_output_headers_ref = [];
Expand Down Expand Up @@ -495,14 +499,23 @@ sub convert_file ($default_values_ref, $file, $columns_fields_file, $converted_f
my $field = $field_orig; # Needed in order to be able to modify $field without changing the array content
my $col = $output_to_input_columns_ref->{$field};

# If the field is of the form packaging_1_*
# and we have multiple lines per product,
# we rename the field to packaging_[current number of lines of the product]_*
# If we have multiple lines per product, we need to rename some fields by adding a number
# so that the values on multiple lines are saved in multiple columns

if ($product_lines{$code} > 1) {

if (($product_lines{$code} > 1) and ($field =~ /^packaging_1_/)) {
$field = "packaging_" . $product_lines{$code} . "_" . $';
# If the field is of the form packaging_1_*
# we rename the field to packaging_[current number of lines of the product]_*
if ($field =~ /^packaging_1_/) {
$field = "packaging_" . $product_lines{$code} . "_" . $';
}
# If the field is of the form image_other_url or image_other_type
# we rename the field to image_other_(url|type).[current number of lines of the product]
elsif (($product_lines{$code} > 1) and ($field =~ /^image_other_/)) {
$field .= '.' . $product_lines{$code};
}

# Add the field to the list of columns in the output file
# Add the field to the list of columns in the output file if the column does not exist yet
if (not exists $output_to_input_columns_ref->{$field}) {
$output_to_input_columns_ref->{$field} = undef;
push @$extra_output_headers_ref, $field;
Expand Down
14 changes: 11 additions & 3 deletions tests/integration/convert_and_import_excel_file.t
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,11 @@ sub fake_download_image ($) {
my $fname = (split(m|/|, $image_url))[-1];
my $image_path = $inputs_dir . $fname;
my $response = qobj(
is_success => qmeth {return (-e $fname);},
is_success => qmeth {return (-e $image_path);},
decoded_content => qmeth {
open(my $image, "<r", $fname);
my $content = <$image>;
open(my $image, "<", $image_path);
binmode($image);
read $image, my $content, -s $image;
close $image;
return $content;
},
Expand All @@ -55,6 +56,12 @@ my @tests = (
excel_file => "packagings-mousquetaires.xlsx",
columns_fields_json => "packagings-mousquetaires.columns_fields.json",
default_values => {lc => "fr", countries => "fr"},
},
{
test_case => "carrefour-images",
excel_file => "carrefour-images.csv",
columns_fields_json => "carrefour-images.columns_fields.json",
default_values => {lc => "fr", countries => "fr"},
}
);

Expand Down Expand Up @@ -116,6 +123,7 @@ foreach my $test_ref (@tests) {
"owner_id" => "org-test-org",
"csv_file" => $converted_file,
"exported_t" => $datestring,
"images_download_dir" => $outputs_test_dir . "/images",
};

my $stats_ref;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"products" : 2,
"rows" : 8,
"status" : "success"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"code" : "3560070167470",
"countries" : "fr",
"image_other_type" : "Front",
"image_other_type.2" : "Back",
"image_other_type.3" : "Right",
"image_other_type.4" : "",
"image_other_type.5" : "",
"image_other_url" : "https://basephoto-cdn.carrefour.com/product_pictures/carrefour-470-1-f.jpg",
"image_other_url.2" : "https://basephoto-cdn.carrefour.com/product_pictures/carrefour-470-2-b.jpg",
"image_other_url.3" : "https://basephoto-cdn.carrefour.com/product_pictures/carrefour-470-3-r.jpg",
"image_other_url.4" : "",
"image_other_url.5" : "",
"lc" : "fr"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"code" : "3560070815746",
"countries" : "fr",
"image_other_type" : "3/4",
"image_other_type.2" : "3/4",
"image_other_type.3" : "Front",
"image_other_type.4" : "Front",
"image_other_type.5" : "Left",
"image_other_url" : "https://basephoto-cdn.carrefour.com/product_pictures/carrefour-746-1-o.jpg",
"image_other_url.2" : "https://basephoto-cdn.carrefour.com/product_pictures/carrefour-746-2-o.jpg",
"image_other_url.3" : "https://basephoto-cdn.carrefour.com/product_pictures/carrefour-746-3-f.jpg",
"image_other_url.4" : "https://basephoto-cdn.carrefour.com/product_pictures/carrefour-746-4-f.jpg",
"image_other_url.5" : "https://basephoto-cdn.carrefour.com/product_pictures/carrefour-746-5-o.jpg",
"lc" : "fr"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
lc countries code image_other_url image_other_type image_other_url.2 image_other_type.2 image_other_url.3 image_other_type.3 image_other_url.4 image_other_type.4 image_other_url.5 image_other_type.5
fr fr 3560070167470 https://basephoto-cdn.carrefour.com/product_pictures/carrefour-470-1-f.jpg Front https://basephoto-cdn.carrefour.com/product_pictures/carrefour-470-2-b.jpg Back https://basephoto-cdn.carrefour.com/product_pictures/carrefour-470-3-r.jpg Right
fr fr 3560070815746 https://basephoto-cdn.carrefour.com/product_pictures/carrefour-746-1-o.jpg 3/4 https://basephoto-cdn.carrefour.com/product_pictures/carrefour-746-2-o.jpg 3/4 https://basephoto-cdn.carrefour.com/product_pictures/carrefour-746-3-f.jpg Front https://basephoto-cdn.carrefour.com/product_pictures/carrefour-746-4-f.jpg Front https://basephoto-cdn.carrefour.com/product_pictures/carrefour-746-5-o.jpg Left
Loading

0 comments on commit b0280f6

Please sign in to comment.