-
-
Notifications
You must be signed in to change notification settings - Fork 2.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add new ADS table #8190
base: master
Are you sure you want to change the base?
Add new ADS table #8190
Changes from 6 commits
4f3eec5
ff85549
33dce52
35061e2
7efb23e
0486fb7
f03eac9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,193 @@ | ||
/** | ||
* Copyright (c) 2014-present, The osquery authors | ||
* | ||
* This source code is licensed as defined by the LICENSE file found in the | ||
* root directory of this source tree. | ||
* | ||
* SPDX-License-Identifier: (Apache-2.0 OR GPL-2.0-only) | ||
*/ | ||
|
||
#include <boost/algorithm/string/predicate.hpp> | ||
#include <boost/algorithm/string/trim.hpp> | ||
#include <boost/filesystem.hpp> | ||
#include <string> | ||
#include <windows.h> | ||
|
||
#include <osquery/core/tables.h> | ||
#include <osquery/filesystem/filesystem.h> | ||
#include <osquery/logger/logger.h> | ||
#include <osquery/utils/base64.h> | ||
#include <osquery/utils/chars.h> | ||
#include <osquery/utils/conversions/split.h> | ||
#include <osquery/utils/conversions/windows/strings.h> | ||
#include <osquery/utils/scope_guard.h> | ||
|
||
namespace fs = boost::filesystem; | ||
|
||
namespace osquery { | ||
namespace tables { | ||
|
||
const std::string kZoneIdentifierKey = "Zone.Identifier"; | ||
|
||
void setRow(QueryData& results, | ||
const std::string& path, | ||
const std::string& key, | ||
const std::string& value) { | ||
Row r; | ||
r["path"] = path; | ||
r["directory"] = boost::filesystem::path(path).parent_path().string(); | ||
r["key"] = key; | ||
if (isPrintable(value)) { | ||
r["value"] = value; | ||
r["base64"] = INTEGER(0); | ||
} else { | ||
r["value"] = base64::encode(value); | ||
r["base64"] = INTEGER(1); | ||
} | ||
results.push_back(r); | ||
} | ||
|
||
void parseZoneIdentifier(QueryData& results, | ||
const std::string& path, | ||
const std::string& streamData) { | ||
auto lines = split(streamData, "\n"); | ||
for (const auto& line : lines) { | ||
auto key_len = line.find_first_of("="); | ||
if (key_len == std::string::npos) { | ||
continue; | ||
} | ||
|
||
setRow(results, | ||
path, | ||
line.substr(0, key_len), | ||
line.substr(key_len + 1, line.size())); | ||
} | ||
} | ||
|
||
// Process a file and extract all stream names and data. | ||
void enumerateStreams(QueryData& results, const std::string& path) { | ||
WIN32_FIND_STREAM_DATA findStreamData; | ||
HANDLE hFind = FindFirstStreamW(stringToWstring(path).c_str(), | ||
FindStreamInfoStandard, | ||
&findStreamData, | ||
0); | ||
|
||
auto fd_guard = scope_guard::create([&] { FindClose(hFind); }); | ||
|
||
if (hFind != INVALID_HANDLE_VALUE) { | ||
do { | ||
std::string stream(wstringToString(findStreamData.cStreamName)); | ||
|
||
// Split the stream string into a name and a type, format is | ||
// ":streamname:$streamtype" | ||
auto streamFullName = split(stream, ":"); | ||
|
||
if (streamFullName.size() != 2) { | ||
LOG(WARNING) << "Invalid stream name found: '" << stream | ||
<< "'. Skipping this entry"; | ||
continue; | ||
} | ||
std::string streamName = streamFullName[0]; | ||
|
||
// Skip unnamed stream since it represents the file content | ||
if (streamName == "") { | ||
continue; | ||
} | ||
|
||
std::string path_copy = path; | ||
// Remove any potential trailing / from path string | ||
if (boost::algorithm::ends_with(path_copy, "\\")) { | ||
path_copy.pop_back(); | ||
} | ||
std::stringstream streamPath; | ||
streamPath << path_copy << ":" << streamName; | ||
std::string streamData; | ||
|
||
if (!readFile(streamPath.str(), streamData).ok()) { | ||
LOG(INFO) << "Couldn't read stream data: " << streamPath.str(); | ||
continue; | ||
} | ||
|
||
if (streamName == kZoneIdentifierKey) { | ||
parseZoneIdentifier(results, path, streamData); | ||
} else { | ||
// Remove trailing newlines | ||
boost::trim_right(streamData); | ||
setRow(results, path, streamName, streamData); | ||
} | ||
} while (FindNextStreamW(hFind, &findStreamData)); | ||
} else { | ||
auto error_code = GetLastError(); | ||
if (error_code != ERROR_HANDLE_EOF) { | ||
LOG(INFO) << "Error occurred while searching for streams in " << path | ||
<< ". Error code: " << error_code; | ||
} | ||
} | ||
} | ||
|
||
QueryData genAds(QueryContext& context) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm a little unsure of how the I think the intent is when one can query a directory, and get back all the files inside. (Similar to the But the implementation enumerates the the If both are part of the query predicate, sqlite will filter to only return rows that match both. Eg Though, to correct myself, that's not at all true there's an I guess I'd suggest a reasonable pattern is to generate the list of things to enumerate, and then find the union of them. This would, at least, prevent duplicate enumeration. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The logic is a copy paste from the extended_attributes table since it was from where I based by work form. I agree is a bit confusing 😅 I can take a look into making it more efficient. |
||
QueryData results; | ||
// Resolve file paths for EQUALS and LIKE operations. | ||
auto paths = context.constraints["path"].getAll(EQUALS); | ||
context.expandConstraints( | ||
"path", | ||
LIKE, | ||
paths, | ||
([&](const std::string& pattern, std::set<std::string>& out) { | ||
std::vector<std::string> patterns; | ||
auto status = | ||
resolveFilePattern(pattern, patterns, GLOB_ALL | GLOB_NO_CANON); | ||
if (status.ok()) { | ||
for (const auto& resolved : patterns) { | ||
out.insert(resolved); | ||
} | ||
} | ||
return status; | ||
})); | ||
|
||
for (const auto& path_string : paths) { | ||
boost::filesystem::path path = path_string; | ||
boost::system::error_code ec; | ||
// Folders can have ADS streams too | ||
if (!(boost::filesystem::is_regular_file(path, ec) || | ||
boost::filesystem::is_directory(path, ec))) { | ||
continue; | ||
} | ||
enumerateStreams(results, path.string()); | ||
Comment on lines
+151
to
+156
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would this miss directories that are included in the |
||
} | ||
|
||
// Resolve directories for EQUALS and LIKE operations. | ||
auto directories = context.constraints["directory"].getAll(EQUALS); | ||
context.expandConstraints( | ||
"directory", | ||
LIKE, | ||
directories, | ||
([&](const std::string& pattern, std::set<std::string>& out) { | ||
std::vector<std::string> patterns; | ||
auto status = | ||
resolveFilePattern(pattern, patterns, GLOB_FOLDERS | GLOB_NO_CANON); | ||
if (status.ok()) { | ||
for (const auto& resolved : patterns) { | ||
out.insert(resolved); | ||
} | ||
} | ||
return status; | ||
})); | ||
|
||
// Now loop through constraints using the directory column constraint. | ||
for (const auto& directory_string : directories) { | ||
if (!isReadable(directory_string) || !isDirectory(directory_string)) { | ||
continue; | ||
} | ||
|
||
std::vector<std::string> files; | ||
if (listFilesInDirectory(directory_string, files).ok()) { | ||
for (const auto& file : files) { | ||
enumerateStreams(results, file); | ||
} | ||
} | ||
} | ||
return results; | ||
} | ||
} // namespace tables | ||
} // namespace osquery |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
table_name("ads") | ||
description("Returns the stream names and values for files using NTFS Alternate Data Streams (ADS).") | ||
schema([ | ||
Column("path", TEXT, "Absolute file path", required=True, index=True), | ||
Column("directory", TEXT, "Directory of file(s)", required=True), | ||
Column("key", TEXT, "Name of the value generated from the stream"), | ||
Column("value", TEXT, "The parsed information from the attribute"), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What kind of data shows up here? Some internet things talk about malware smuggling entire file contents here. Will that be okay to push back in a column? (We don't generally push that much data through osquery, so it feels a little amiss) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 100% agree. I think the main value of this table is the content of the I'm also not sure how the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The possible max value size is much more limited: https://en.wikipedia.org/wiki/Extended_file_attributes
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it probably depends on what we're concerned about. If it's content, the most conservative approach would be to only fetch the If it's size, truncation and warnings probably make sense. |
||
Column("base64", INTEGER, "1 if the value is base64 encoded else 0"), | ||
]) | ||
implementation("system/windows/ads@genAds") | ||
examples([ | ||
"select * from ads where path = 'C:\\Users\\admin\\Downloads\\test.exe'" | ||
]) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
/** | ||
* Copyright (c) 2014-present, The osquery authors | ||
* | ||
* This source code is licensed as defined by the LICENSE file found in the | ||
* root directory of this source tree. | ||
* | ||
* SPDX-License-Identifier: (Apache-2.0 OR GPL-2.0-only) | ||
*/ | ||
|
||
// Sanity check integration test for ads | ||
// Spec file: specs/windows/ads.table | ||
|
||
#include <osquery/filesystem/filesystem.h> | ||
#include <osquery/tests/integration/tables/helper.h> | ||
#include <string> | ||
|
||
namespace osquery { | ||
namespace table_tests { | ||
|
||
const std::string fileName = "test.txt"; | ||
const std::string streamName = "teststream"; | ||
const std::string streamContents = "This is some data in an alternate stream"; | ||
|
||
class ads : public testing::Test { | ||
public: | ||
boost::filesystem::path directory; | ||
|
||
void SetUp() override { | ||
setUpEnvironment(); | ||
|
||
directory = | ||
boost::filesystem::temp_directory_path() / | ||
boost::filesystem::unique_path("test-integration-file-table.%%%%-%%%%"); | ||
|
||
ASSERT_TRUE(boost::filesystem::create_directory(directory)); | ||
|
||
auto filepath = directory / boost::filesystem::path(fileName); | ||
|
||
// Create a file | ||
std::ofstream file(filepath.native()); | ||
file << "This is the main file data"; | ||
file.close(); | ||
|
||
// Add data to alternate stream | ||
std::string fullStreamPath = filepath.string() + ":" + streamName; | ||
std::ofstream streamFile(fullStreamPath); | ||
streamFile << streamContents; | ||
streamFile.close(); | ||
} | ||
|
||
virtual void TearDown() { | ||
boost::filesystem::remove_all(directory); | ||
} | ||
}; | ||
|
||
TEST_F(ads, test_sanity) { | ||
nachorpaez marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// std::string path_constraint = | ||
// (directory / boost::filesystem::path("%.txt")).string(); | ||
|
||
auto expected_path = directory.string(); | ||
expected_path += "\\"; | ||
expected_path += fileName; | ||
QueryData data = | ||
execute_query("select * from ads where path = \'" + expected_path + "\'"); | ||
|
||
auto& row = data.at(0); | ||
ASSERT_EQ(row.at("key"), streamName); | ||
ASSERT_EQ(row.at("value"), streamContents); | ||
ASSERT_EQ(row.at("base64"), true); | ||
ASSERT_EQ(row.at("path"), expected_path); | ||
ASSERT_EQ(row.at("directory"), directory.string()); | ||
|
||
ValidationMap row_map = { | ||
{"path", FileOnDisk}, | ||
{"directory", DirectoryOnDisk}, | ||
{"key", NormalType}, | ||
{"value", NormalType}, | ||
{"base64", IntType}, | ||
}; | ||
|
||
validate_rows(data, row_map); | ||
} | ||
|
||
} // namespace table_tests | ||
} // namespace osquery |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In the case of a directory would this erroneously return the parent directory? I'm thinking in that case
path
anddirectory
should be the same?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, if a directory is specified the table will return something like:
Which is a similar behaviour to the
extended_attributes
table:I'm happy to change it though if it's better to keep
path
anddirectory
the same.