Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix #6492 EMX importer fails on BOM preventing CSV's to be uploaded in some cases #6520

Merged
merged 4 commits into from Aug 18, 2017
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -3,6 +3,7 @@
import au.com.bytecode.opencsv.CSVReader;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.input.BOMInputStream;
import org.molgenis.data.Entity;
import org.molgenis.data.MolgenisDataException;
import org.molgenis.data.UnknownEntityException;
Expand All @@ -18,6 +19,7 @@
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;

import static java.lang.String.format;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.molgenis.data.csv.CsvRepositoryCollection.MAC_ZIP;

Expand Down Expand Up @@ -59,27 +61,60 @@ public class CsvIterator implements CloseableIterator<Entity>
String fileRepositoryName = FilenameUtils.getBaseName(entry.getName());
if (fileRepositoryName.equalsIgnoreCase(repositoryName))
{
csvReader = createCSVReader(entry.getName(), zipFile.getInputStream(entry));
csvReader = createCSVReader(entry.getName(),
convertInputStreamToBomInputstream(zipFile.getInputStream(entry),
zipFile.getName()));
break;
}
}
}
}
else if (file.getName().toLowerCase().startsWith(repositoryName.toLowerCase()))
{
csvReader = createCSVReader(file.getName(), new FileInputStream(file));

csvReader = createCSVReader(file.getName(),
convertInputStreamToBomInputstream(new FileInputStream(file), file.getName()));
}

if (csvReader == null)
{
throw new UnknownEntityException("Unknown entity [" + repositoryName + "] ");
throw new UnknownEntityException(format("Unknown entity [ %s ]", repositoryName));
}

colNamesMap = toColNamesMap(csvReader.readNext());
}
catch (IOException e)
{
throw new MolgenisDataException("Exception reading [" + file.getAbsolutePath() + "]", e);
throw new MolgenisDataException(format("Exception reading [ %s ]", file.getAbsolutePath()), e);
}
}

/**
* <p>Convert the inputstreams that can be generated by the CsvIterator and check on BOM-attachements./p>
*
* @param inputStream from zipfile or normal files
* @param fileName for logging purposes
* @return inputstream without BOM (always)
*/
private InputStream convertInputStreamToBomInputstream(InputStream inputStream, String fileName)
{
try
{
BOMInputStream bomInputStream = new BOMInputStream(inputStream);
if (bomInputStream.hasBOM())
{
long skippedBytes = bomInputStream.skip(bomInputStream.getBOM().length());
if (skippedBytes < 0)
{
throw new MolgenisDataException(format("Could not skip BOM from this file [ %s ]", fileName));
}
}
return bomInputStream;
}
catch (IOException err)
{
throw new MolgenisDataException(format("Could not determine if BOM is attached to file [ %s ]", fileName),
err);
}
}

Expand Down Expand Up @@ -144,7 +179,7 @@ private Entity get()
}
catch (IOException e)
{
throw new MolgenisDataException("Exception reading line of csv file [" + repositoryName + "]", e);
throw new MolgenisDataException(format("Exception reading line of csv file [ %s ]", repositoryName), e);
}
}

Expand Down Expand Up @@ -187,15 +222,18 @@ private CSVReader createCSVReader(String fileName, InputStream in)
return new CSVReader(reader, '\t');
}

throw new MolgenisDataException("Unknown file type: [" + fileName + "] for csv repository");
throw new MolgenisDataException(format("Unknown file type: [ %s ] for csv repository", fileName));
}

return new CSVReader(reader, this.separator);
}

private Map<String, Integer> toColNamesMap(String[] headers)
{
if ((headers == null) || (headers.length == 0)) return Collections.emptyMap();
if ((headers == null) || (headers.length == 0))
{
return Collections.emptyMap();
}

int capacity = (int) (headers.length / 0.75) + 1;
Map<String, Integer> columnIdx = new LinkedHashMap<>(capacity);
Expand Down