Skip to content

Commit

Permalink
Add IgnoreUnknownColumns to CsvFileDescription
Browse files Browse the repository at this point in the history
- Modify article.htm to document the changes
- Add unit test to make sure the new functionality works
- Add ToString() method in `TypeFieldInfo` class for better debugging
  • Loading branch information
omederos committed Feb 28, 2014
1 parent d86a25f commit cc281ce
Show file tree
Hide file tree
Showing 6 changed files with 204 additions and 25 deletions.
40 changes: 38 additions & 2 deletions LINQtoCSV.Tests/CsvContextReadTests.cs
Expand Up @@ -125,7 +125,7 @@ public void GoodFileCommaDelimitedUseFieldIndexForReadingDataCharUSEnglish()
CsvFileDescription fileDescription_namesUs = new CsvFileDescription
{
SeparatorChar = ',',
IgnoreMissingColumns = true,
IgnoreUnknownColumns = true,
UseFieldIndexForReadingData = true,
FirstLineHasColumnNames = false,
EnforceCsvColumnAttribute = true, // default is false
Expand Down Expand Up @@ -162,7 +162,7 @@ public void GoodFileCommaDelimitedUseFieldIndexForReadingDataCharUseOutputFormat
CsvFileDescription fileDescription_namesUs = new CsvFileDescription
{
SeparatorChar = ',',
IgnoreMissingColumns = true,
IgnoreUnknownColumns = true,
UseOutputFormatForParsingCsvValue = true,

UseFieldIndexForReadingData = true,
Expand Down Expand Up @@ -331,5 +331,41 @@ two newlines

AssertRead(testInput, fileDescription_namesUs, expected);
}

[TestMethod()]
public void FileWithUnknownColumns_ShouldDiscardColumns() {
var description = new CsvFileDescription
{
SeparatorChar = ',',
FirstLineHasColumnNames = true,
IgnoreUnknownColumns = true,
};

//The following input has 5 columns: Id | Name | Last Name | Age | City. Only the Name, Last Name and Age will be read.

string input =
@"Id,Name,Last Name,Age,City
1,John,Doe,15,Washington
2,Jane,Doe,20,New York
";
var expected = new[]
{
new Person
{
Name = "John",
LastName = "Doe",
Age = 15
},
new Person
{
Name = "Jane",
LastName = "Doe",
Age = 20
},
};

AssertRead(input, description, expected);

}
}
}
1 change: 1 addition & 0 deletions LINQtoCSV.Tests/LINQtoCSV.Tests.csproj
Expand Up @@ -52,6 +52,7 @@
<Compile Include="CsvContextWriteTests.cs" />
<Compile Include="CsvContextReadTests.cs" />
<Compile Include="IAssertable.cs" />
<Compile Include="Person.cs" />
<Compile Include="ProductDataSpecificFieldIndex.cs" />
<Compile Include="ProductData.cs" />
<Compile Include="ProductData_DuplicateIndices.cs" />
Expand Down
23 changes: 23 additions & 0 deletions LINQtoCSV.Tests/Person.cs
@@ -0,0 +1,23 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Microsoft.VisualStudio.TestTools.UnitTesting;

namespace LINQtoCSV.Tests
{
public class Person : IAssertable<Person> {
[CsvColumn(Name = "Name")]
public string Name { get; set; }
[CsvColumn(Name = "Last Name")]
public string LastName { get; set; }
[CsvColumn(Name = "Age")]
public int Age { get; set; }

public void AssertEqual(Person other) {
Assert.AreEqual(other.Name, Name);
Assert.AreEqual(other.LastName, LastName);
Assert.AreEqual(other.Age, Age);
}
}
}
7 changes: 6 additions & 1 deletion LINQtoCSV/CsvFileDescription.cs
Expand Up @@ -87,10 +87,14 @@ public int MaximumNbrExceptions
public Encoding TextEncoding { get; set; }
public bool DetectEncodingFromByteOrderMarks { get; set; }

public bool IgnoreMissingColumns { get; set; }
public bool UseFieldIndexForReadingData { get; set; }
public bool UseOutputFormatForParsingCsvValue { get; set; }
public bool IgnoreTrailingSeparatorChar { get; set; }

/// <summary>
/// If set to true, wil read only the fields specified as attributes, and will discard other fields in the CSV file
/// </summary>
public bool IgnoreUnknownColumns { get; set; }

// ---------------

Expand All @@ -107,6 +111,7 @@ public CsvFileDescription()
NoSeparatorChar = false;
UseFieldIndexForReadingData = false;
UseOutputFormatForParsingCsvValue = false;
IgnoreUnknownColumns = false;
}
}
}
81 changes: 60 additions & 21 deletions LINQtoCSV/FieldMapper.cs
Expand Up @@ -39,6 +39,10 @@ public int CompareTo(TypeFieldInfo other)
{
return index.CompareTo(other.index);
}

public override string ToString() {
return string.Format("Index: {0}, Name: {1}", index, name);
}
}

// -----------------------------
Expand All @@ -47,6 +51,11 @@ public int CompareTo(TypeFieldInfo other)
// to its TypeFieldInfo.
protected TypeFieldInfo[] m_IndexToInfo = null;

/// <summary>
/// Contains a mapping between the CSV column indexes that will read and the property indexes in the business object.
/// </summary>
protected IDictionary<int, int> _mappingIndexes = new Dictionary<int, int>();

// Used to build IndexToInfo
protected Dictionary<string, TypeFieldInfo> m_NameToInfo = null;

Expand Down Expand Up @@ -191,6 +200,8 @@ public int CompareTo(TypeFieldInfo other)

int nbrTypeFields = m_NameToInfo.Keys.Count;
m_IndexToInfo = new TypeFieldInfo[nbrTypeFields];

_mappingIndexes = new Dictionary<int, int>();

int i=0;
foreach (KeyValuePair<string, TypeFieldInfo> kvp in m_NameToInfo)
Expand Down Expand Up @@ -371,6 +382,7 @@ internal class FieldMapper_Reading<T> : FieldMapper<T> where T : new()
bool writingFile)
: base(fileDescription, fileName, writingFile)
{

}


Expand All @@ -392,25 +404,39 @@ public void ReadNames(IDataRow row)
// the FieldIndex fields.

// If there are more names in the file then fields in the type,
// one of the names will not be found, causing an exception.
// and IgnoreUnknownColumns is set to `false` one of the names will
// not be found, causing an exception.

int currentNameIndex = 0;
for (int i = 0; i < row.Count; i++) {
if (!m_NameToInfo.ContainsKey(row[i].Value)) {
//If we have to ignore this column
if (m_fileDescription.IgnoreUnknownColumns) {
continue;
}

for (int i = 0; i < row.Count; i++)
{
if (!m_NameToInfo.ContainsKey(row[i].Value))
{
// name not found
throw new NameNotInTypeException(typeof(T).ToString(), row[i].Value, m_fileName);
throw new NameNotInTypeException(typeof (T).ToString(), row[i].Value, m_fileName);
}

// ----

m_IndexToInfo[i] = m_NameToInfo[row[i].Value];
//Map the column index in the CSV file with the column index of the business object.
_mappingIndexes.Add(i, currentNameIndex);
currentNameIndex++;
}

if (m_fileDescription.EnforceCsvColumnAttribute &&
(!m_IndexToInfo[i].hasColumnAttribute))
{
//Loop to the
for (int i = 0; i < row.Count; i++) {
if (!_mappingIndexes.ContainsKey(i)) {
continue;
}

m_IndexToInfo[_mappingIndexes[i]] = m_NameToInfo[row[i].Value];

if (m_fileDescription.EnforceCsvColumnAttribute && (!m_IndexToInfo[i].hasColumnAttribute)) {
// enforcing column attr, but this field/prop has no column attr.
throw new MissingCsvColumnAttributeException(typeof(T).ToString(), row[i].Value, m_fileName);
throw new MissingCsvColumnAttributeException(typeof (T).ToString(), row[i].Value, m_fileName);
}
}
}
Expand All @@ -433,24 +459,37 @@ public List<int> GetCharLengths()
/// <param name="row"></param>
/// <param name="firstRow"></param>
/// <returns></returns>
public T ReadObject(IDataRow row, AggregatedException ae)
{
if (!m_fileDescription.IgnoreMissingColumns && row.Count > m_IndexToInfo.Length)
public T ReadObject(IDataRow row, AggregatedException ae) {
//If there are more columns than the required
if (row.Count > m_IndexToInfo.Length)
{
// Too many fields
throw new TooManyDataFieldsException(typeof(T).ToString(), row[0].LineNbr, m_fileName);
//Are we ignoring unknown columns?
if (!m_fileDescription.IgnoreUnknownColumns) {
// Too many fields
throw new TooManyDataFieldsException(typeof (T).ToString(), row[0].LineNbr, m_fileName);
}
}

// -----

T obj = new T();

int maxRowCount = Math.Min(row.Count, m_IndexToInfo.Length);
//If we will be using the mappings, we just iterate through all the cells in this row
int maxRowCount = _mappingIndexes.Count > 0 ? row.Count : Math.Min(row.Count, m_IndexToInfo.Length);

for (int i = 0; i < maxRowCount; i++) {
TypeFieldInfo tfi;
//If there is some index mapping generated and the IgnoreUnknownColums is `true`
if (m_fileDescription.IgnoreUnknownColumns && _mappingIndexes.Count > 0) {
if (!_mappingIndexes.ContainsKey(i)) {
continue;
}
tfi = m_IndexToInfo[_mappingIndexes[i]];
}
else {
tfi = m_IndexToInfo[i];
}

for (int i = 0; i < maxRowCount; i++)
{
TypeFieldInfo tfi = m_IndexToInfo[i];

if (m_fileDescription.EnforceCsvColumnAttribute &&
(!tfi.hasColumnAttribute))
{
Expand Down
77 changes: 76 additions & 1 deletion article.htm
Expand Up @@ -465,7 +465,10 @@ <h2>CsvFileDescription<a id="CsvFileDescription"></a></h2>
<li><a href="#NoSeparatorChar"><code>NoSeparatorChar</code></a> </li>

<li><a href="#UseFieldIndexForReadingData"><code>UseFieldIndexForReadingData</code></a> </li>
<li><a href="#IgnoreTrailingSeparatorChar"><code>IgnoreTrailingSeparatorChar</code></a> </li>

<li><a href="#IgnoreTrailingSeparatorChar"><code>IgnoreTrailingSeparatorChar</code></a> </li>

<li><a href="#IgnoreUnknownColumns"><code>IgnoreUnknownColumns</code></a> </li>
</ul>

<h3><a id="SeparatorChar">SeparatorChar</a></h3>
Expand Down Expand Up @@ -838,6 +841,78 @@ <h4>Example:</h4>

<p>Though it's not a canonical representation of CSV file, <code>IgnoreTrailingSeparatorChar</code> property tells <code>Read</code> to ignore separator character at the end of the line.</p>

<!-----------IgnoreUnknownColumns----------->
<h3><a id="IgnoreUnknownColumns"></a>IgnoreUnknownColumns</h3>

<table cellpadding="3">
<tbody>
<tr>
<td><strong>Type:</strong></td>

<td><code lang="cs">bool</code></td>
</tr>

<tr>
<td><strong>Default:</strong></td>

<td>false</td>
</tr>

<tr>
<td><strong>Applies to:</strong></td>

<td>Reading only</td>
</tr>
</tbody>
</table>

<h4>Example:</h4>

There are cases where you don't need to read all the columns, but only a subset of them. Consider the following example of a CSV file containing a list of people:<br><br>

<table cellspacing="0" cellpadding="5" border="1">
<tbody>
<tr>
<th>Id</th>
<th>Name</th>
<th>Last Name</th>
<th>Age</th>
<th>City</th>
</tr>
<tr>
<td>1</td>
<td>John</td>
<td>Doe</td>
<td>15</td>
<td>Washington</td>
</tr>
<tr>
<td>2</td>
<td>Jane</td>
<td>Doe</td>
<td>20</td>
<td>New York</td>
</tr>
</tbody>
</table>
<br>
Suppose you have the following class:
<pre lang="cs">
class Person {
[CsvColumn(Name = "Name")]
public string Name { get ; set; }
[CsvColumn(Name = "Last Name")]
public string LastName { get; set; }
[CsvColumn(Name = "Age")]
public int Age { get; set; }
}
</pre>
If you set <pre lang="cs">fd.IgnoreTrailingSeparatorChar = true;</pre>

then the fields <code>Id</code> and <code>City</code> will be ignored.

<!---------------------------------------------->

<h2>CsvColumn Attribute<a id="CsvColumn_Attribute"></a></h2>

<p>As shown in the <a href="#How_to_use">Reading from a file</a> and <a href="#writing_to_a_file">Writing to a file</a> examples, you can decorate the public fields and properties of your data class with the <code>CsvColumn</code> attribute to specify such things as the output format for date and number fields.</p>
Expand Down

0 comments on commit cc281ce

Please sign in to comment.