forked from mlpack/mlpack
/
load.hpp
185 lines (175 loc) · 7.66 KB
/
load.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
/**
* @file load.hpp
* @author Ryan Curtin
*
* Load an Armadillo matrix from file. This is necessary because Armadillo does
* not transpose matrices on input, and it allows us to give better error
* output.
*/
#ifndef MLPACK_CORE_DATA_LOAD_HPP
#define MLPACK_CORE_DATA_LOAD_HPP
#include <mlpack/core/util/log.hpp>
#include <mlpack/core/arma_extend/arma_extend.hpp> // Includes Armadillo.
#include <string>
#include "format.hpp"
#include "dataset_info.hpp"
namespace mlpack {
namespace data /** Functions to load and save matrices and models. */ {
/**
* Loads a matrix from file, guessing the filetype from the extension. This
* will transpose the matrix at load time (unless the transpose parameter is set
* to false). If the filetype cannot be determined, an error will be given.
*
* The supported types of files are the same as found in Armadillo:
*
* - CSV (csv_ascii), denoted by .csv, or optionally .txt
* - TSV (raw_ascii), denoted by .tsv, .csv, or .txt
* - ASCII (raw_ascii), denoted by .txt
* - Armadillo ASCII (arma_ascii), also denoted by .txt
* - PGM (pgm_binary), denoted by .pgm
* - PPM (ppm_binary), denoted by .ppm
* - Raw binary (raw_binary), denoted by .bin
* - Armadillo binary (arma_binary), denoted by .bin
* - HDF5, denoted by .hdf, .hdf5, .h5, or .he5
*
* If the file extension is not one of those types, an error will be given.
* This is preferable to Armadillo's default behavior of loading an unknown
* filetype as raw_binary, which can have very confusing effects.
*
* If the parameter 'fatal' is set to true, a std::runtime_error exception will
* be thrown if the matrix does not load successfully. The parameter
* 'transpose' controls whether or not the matrix is transposed after loading.
* In most cases, because data is generally stored in a row-major format and
* mlpack requires column-major matrices, this should be left at its default
* value of 'true'.
*
* @param filename Name of file to load.
* @param matrix Matrix to load contents of file into.
* @param fatal If an error should be reported as fatal (default false).
* @param transpose If true, transpose the matrix after loading.
* @return Boolean value indicating success or failure of load.
*/
template<typename eT>
bool Load(const std::string& filename,
arma::Mat<eT>& matrix,
const bool fatal = false,
const bool transpose = true);
/**
* Loads a matrix from a file, guessing the filetype from the extension and
* mapping categorical features with a DatasetMapper object. This will
* transpose the matrix (unless the transpose parameter is set to false).
* This particular overload of Load() can only load text-based formats, such as
* those given below:
*
* - CSV (csv_ascii), denoted by .csv, or optionally .txt
* - TSV (raw_ascii), denoted by .tsv, .csv, or .txt
* - ASCII (raw_ascii), denoted by .txt
*
* If the file extension is not one of those types, an error will be given.
* This is preferable to Armadillo's default behavior of loading an unknown
* filetype as raw_binary, which can have very confusing effects.
*
* If the parameter 'fatal' is set to true, a std::runtime_error exception will
* be thrown if the matrix does not load successfully. The parameter
* 'transpose' controls whether or not the matrix is transposed after loading.
* In most cases, because data is generally stored in a row-major format and
* mlpack requires column-major matrices, this should be left at its default
* value of 'true'.
*
* The DatasetMapper object passed to this function will be re-created, so any
* mappings from previous loads will be lost.
*
* @param filename Name of file to load.
* @param matrix Matrix to load contents of file into.
* @param info DatasetMapper object to populate with mappings and data types.
* @param fatal If an error should be reported as fatal (default false).
* @param transpose If true, transpose the matrix after loading.
* @return Boolean value indicating success or failure of load.
*/
template<typename eT, typename PolicyType>
bool Load(const std::string& filename,
arma::Mat<eT>& matrix,
DatasetMapper<PolicyType>& info,
const bool fatal = false,
const bool transpose = true)
{
PolicyType policy;
return Load(filename, matrix, info, policy, fatal, transpose);
}
/**
* Loads a matrix from a file, guessing the filetype from the extension and
* mapping categorical features with a DatasetMapper object. This will
* transpose the matrix (unless the transpose parameter is set to false).
* This particular overload of Load() can only load text-based formats, such as
* those given below:
*
* - CSV (csv_ascii), denoted by .csv, or optionally .txt
* - TSV (raw_ascii), denoted by .tsv, .csv, or .txt
* - ASCII (raw_ascii), denoted by .txt
*
* If the file extension is not one of those types, an error will be given.
* This is preferable to Armadillo's default behavior of loading an unknown
* filetype as raw_binary, which can have very confusing effects.
*
* If the parameter 'fatal' is set to true, a std::runtime_error exception will
* be thrown if the matrix does not load successfully. The parameter
* 'transpose' controls whether or not the matrix is transposed after loading.
* In most cases, because data is generally stored in a row-major format and
* mlpack requires column-major matrices, this should be left at its default
* value of 'true'.
*
* The DatasetMapper object passed to this function will be re-created, so any
* mappings from previous loads will be lost. policy is passed to the
* constructor of DatasetMapper to create a new instance.
*
* @param filename Name of file to load.
* @param matrix Matrix to load contents of file into.
* @param info DatasetMapper object to populate with mappings and data types.
* @param policy Policy class that decides how the DatasetMapper should map.
* @param fatal If an error should be reported as fatal (default false).
* @param transpose If true, transpose the matrix after loading.
* @return Boolean value indicating success or failure of load.
*/
template<typename eT, typename PolicyType>
bool Load(const std::string& filename,
arma::Mat<eT>& matrix,
DatasetMapper<PolicyType>& info,
PolicyType& policy,
const bool fatal = false,
const bool transpose = true);
/**
* Load a model from a file, guessing the filetype from the extension, or,
* optionally, loading the specified format. If automatic extension detection
* is used and the filetype cannot be determined, an error will be given.
*
* The supported types of files are the same as what is supported by the
* boost::serialization library:
*
* - text, denoted by .txt
* - xml, denoted by .xml
* - binary, denoted by .bin
*
* The format parameter can take any of the values in the 'format' enum:
* 'format::autodetect', 'format::text', 'format::xml', and 'format::binary'.
* The autodetect functionality operates on the file extension (so, "file.txt"
* would be autodetected as text).
*
* The name parameter should be specified to indicate the name of the structure
* to be loaded. This should be the same as the name that was used to save the
* structure (otherwise, the loading procedure will fail).
*
* If the parameter 'fatal' is set to true, then an exception will be thrown in
* the event of load failure. Otherwise, the method will return false and the
* relevant error information will be printed to Log::Warn.
*/
template<typename T>
bool Load(const std::string& filename,
const std::string& name,
T& t,
const bool fatal = false,
format f = format::autodetect);
} // namespace data
} // namespace mlpack
// Include implementation.
#include "load_impl.hpp"
#endif