New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[GSOC]DatasetMapper & Imputer #694
Changes from 1 commit
87c05a5
2e4b1a8
631e59e
391006e
6a1fb81
b0c5224
de35241
2d38604
bb045b8
1295f4b
5a517c2
94b7a5c
ebed68f
db78f39
7c60b97
da4e409
d8618ec
3b8ffd0
90a5cd2
32c8a73
e09d9bc
87d8d46
de0b2db
bc187ca
a340f69
21d94c0
a92afaa
bace8b2
896a018
1a908c2
2edbc40
d881cb7
a881831
63268a3
2eb6754
6d43aa3
fedc5e0
787fd82
a0b7d59
9a6dce7
c3aeba1
028c217
03e19a4
ef4536b
6e2c1ff
5eb9abd
d043235
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,11 +23,11 @@ template <typename T> | |
class ListwiseDeletion | ||
{ | ||
public: | ||
void Apply(const arma::Mat<T>& input, | ||
arma::Mat<T>& output, | ||
const T& mappedValue, | ||
const size_t dimension, | ||
const bool transpose = true) | ||
void Impute(const arma::Mat<T>& input, | ||
arma::Mat<T>& output, | ||
const T& mappedValue, | ||
const size_t dimension, | ||
const bool transpose = true) | ||
{ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think we have to reimplement the function, we could reuse the function from above: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Now Impute(input, input, mappedValue, ...) is implemented by Impute(input, mappedValue, ...). Benefit of Impute(input, mappedValue, ...) is the users can reuse input, save the cost of allocating new memory. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see the reason to provide the interface, but we can reuse the implementation inside of the simplified function. |
||
// initiate output | ||
output = input; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -60,7 +60,7 @@ class Imputer | |
const size_t dimension) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think that many times the user will want to impute without copying the matrix; this will especially make a difference when the matrix is large. Should we also provide an overload with just one matrix that will be modified? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated |
||
{ | ||
T mappedValue = static_cast<T>(mapper.UnmapValue(missingValue, dimension)); | ||
strategy.Apply(input, output, mappedValue, dimension, transpose); | ||
strategy.Impute(input, output, mappedValue, dimension, transpose); | ||
} | ||
|
||
/** | ||
|
@@ -74,12 +74,8 @@ class Imputer | |
const size_t dimension) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This overload seems like it is necessary specifically for the Certainly in the examples for the imputer, we should document how someone can use the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is a good idea. I'll update this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated |
||
{ | ||
T mappedValue = static_cast<T>(mapper.UnmapValue(missingValue, dimension)); | ||
strategy.Apply(input, | ||
output, | ||
mappedValue, | ||
customValue, | ||
dimension, | ||
transpose); | ||
strategy.Impute(input, output, mappedValue, customValue, dimension, | ||
transpose); | ||
} | ||
|
||
//! Get the strategy | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -41,10 +41,10 @@ class MissingPolicy | |
|
||
|
||
template <typename MapType> | ||
mapped_type MapString(MapType& maps, | ||
std::vector<Datatype>& types, | ||
const std::string& string, | ||
const size_t dimension) | ||
mapped_type MapString(const std::string& string, | ||
const size_t dimension, | ||
MapType maps, | ||
std::vector<Datatype>& types) | ||
{ | ||
// If this condition is true, either we have no mapping for the given string | ||
// or we have no mappings for the given dimension at all. In either case, | ||
|
@@ -84,8 +84,8 @@ class MissingPolicy | |
token>>matrix.at(row, i); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Discover potential problems.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thats brilliant, thanks! |
||
if (token.fail()) // if not number, map it to datasetmapper | ||
{ | ||
const eT val = static_cast<eT>(this->MapString(maps, types, tokens[i], | ||
row)); | ||
const eT val = static_cast<eT>(this->MapString(tokens[i], row, maps, | ||
types)); | ||
matrix.at(row, i) = val; | ||
} | ||
token.clear(); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -99,7 +99,7 @@ BOOST_AUTO_TEST_CASE(CustomImputationTest) | |
CustomImputation<double> imputer; | ||
|
||
// transposed | ||
imputer.Apply(input, outputT, mappedValue, customValue, 0/*dimension*/, true); | ||
imputer.Impute(input, outputT, mappedValue, customValue, 0/*dimension*/, true); | ||
|
||
BOOST_REQUIRE_CLOSE(outputT(0, 0), 3.0, 1e-5); | ||
BOOST_REQUIRE_CLOSE(outputT(0, 1), 99.0, 1e-5); | ||
|
@@ -115,7 +115,7 @@ BOOST_AUTO_TEST_CASE(CustomImputationTest) | |
BOOST_REQUIRE_CLOSE(outputT(2, 3), 8.0, 1e-5); | ||
|
||
// not transposed | ||
imputer.Apply(input, output, mappedValue, customValue, 1, false); | ||
imputer.Impute(input, output, mappedValue, customValue, 1, false); | ||
|
||
BOOST_REQUIRE_CLOSE(output(0, 0), 3.0, 1e-5); | ||
BOOST_REQUIRE_CLOSE(output(0, 1), 99.0, 1e-5); | ||
|
@@ -146,7 +146,7 @@ BOOST_AUTO_TEST_CASE(MeanImputationTest) | |
MeanImputation<double> imputer; | ||
|
||
// transposed | ||
imputer.Apply(input, outputT, mappedValue, 0, true); | ||
imputer.Impute(input, outputT, mappedValue, 0, true); | ||
|
||
BOOST_REQUIRE_CLOSE(outputT(0, 0), 3.0, 1e-5); | ||
BOOST_REQUIRE_CLOSE(outputT(0, 1), 2.5, 1e-5); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No matter how I calculate, the mean value should be 5.66666667 Maybe it is because to you forgot to initialize sum? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the current implementation applies only to the given dimension (0 in this case) |
||
|
@@ -162,7 +162,7 @@ BOOST_AUTO_TEST_CASE(MeanImputationTest) | |
BOOST_REQUIRE_CLOSE(outputT(2, 3), 8.0, 1e-5); | ||
|
||
// not transposed | ||
imputer.Apply(input, output, mappedValue, 1, false); | ||
imputer.Impute(input, output, mappedValue, 1, false); | ||
|
||
BOOST_REQUIRE_CLOSE(output(0, 0), 3.0, 1e-5); | ||
BOOST_REQUIRE_CLOSE(output(0, 1), 7.0, 1e-5); | ||
|
@@ -193,7 +193,7 @@ BOOST_AUTO_TEST_CASE(MedianImputationTest) | |
MedianImputation<double> imputer; | ||
|
||
// transposed | ||
imputer.Apply(input, outputT, mappedValue, 1, true); | ||
imputer.Impute(input, outputT, mappedValue, 1, true); | ||
|
||
BOOST_REQUIRE_CLOSE(outputT(0, 0), 3.0, 1e-5); | ||
BOOST_REQUIRE_CLOSE(outputT(0, 1), 0.0, 1e-5); | ||
|
@@ -209,7 +209,7 @@ BOOST_AUTO_TEST_CASE(MedianImputationTest) | |
BOOST_REQUIRE_CLOSE(outputT(2, 3), 8.0, 1e-5); | ||
|
||
// not transposed | ||
imputer.Apply(input, output, mappedValue, 1, false); | ||
imputer.Impute(input, output, mappedValue, 1, false); | ||
|
||
BOOST_REQUIRE_CLOSE(output(0, 0), 3.0, 1e-5); | ||
BOOST_REQUIRE_CLOSE(output(0, 1), 6.0, 1e-5); | ||
|
@@ -240,7 +240,7 @@ BOOST_AUTO_TEST_CASE(ListwiseDeletionTest) | |
ListwiseDeletion<double> imputer; | ||
|
||
// transposed | ||
imputer.Apply(input, outputT, mappedValue, 0, true); // transposed | ||
imputer.Impute(input, outputT, mappedValue, 0, true); // transposed | ||
|
||
BOOST_REQUIRE_CLOSE(outputT(0, 0), 3.0, 1e-5); | ||
BOOST_REQUIRE_CLOSE(outputT(0, 1), 2.0, 1e-5); | ||
|
@@ -250,7 +250,7 @@ BOOST_AUTO_TEST_CASE(ListwiseDeletionTest) | |
BOOST_REQUIRE_CLOSE(outputT(2, 1), 4.0, 1e-5); | ||
|
||
// not transposed | ||
imputer.Apply(input, output, mappedValue, 1, false); // not transposed | ||
imputer.Impute(input, output, mappedValue, 1, false); // not transposed | ||
|
||
BOOST_REQUIRE_CLOSE(output(0, 0), 5.0, 1e-5); | ||
BOOST_REQUIRE_CLOSE(output(0, 1), 6.0, 1e-5); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If you can provide some comments on what this class is and what it does (also for the
Impute()
function), I think it would be really helpful for users.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated