Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add new ADS table #8190

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions osquery/tables/system/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ function(generateOsqueryTablesSystemSystemtable)

elseif(DEFINED PLATFORM_WINDOWS)
list(APPEND source_files
windows/ads.cpp
windows/appcompat_shims.cpp
windows/authenticode.cpp
windows/autoexec.cpp
Expand Down
193 changes: 193 additions & 0 deletions osquery/tables/system/windows/ads.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
/**
* Copyright (c) 2014-present, The osquery authors
*
* This source code is licensed as defined by the LICENSE file found in the
* root directory of this source tree.
*
* SPDX-License-Identifier: (Apache-2.0 OR GPL-2.0-only)
*/

#include <boost/algorithm/string/predicate.hpp>
#include <boost/algorithm/string/trim.hpp>
#include <boost/filesystem.hpp>
#include <string>
#include <windows.h>

#include <osquery/core/tables.h>
#include <osquery/filesystem/filesystem.h>
#include <osquery/logger/logger.h>
#include <osquery/utils/base64.h>
#include <osquery/utils/chars.h>
#include <osquery/utils/conversions/split.h>
#include <osquery/utils/conversions/windows/strings.h>
#include <osquery/utils/scope_guard.h>

namespace fs = boost::filesystem;

namespace osquery {
namespace tables {

const std::string kZoneIdentifierKey = "Zone.Identifier";

void setRow(QueryData& results,
const std::string& path,
const std::string& key,
const std::string& value) {
Row r;
r["path"] = path;
r["directory"] = boost::filesystem::path(path).parent_path().string();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the case of a directory would this erroneously return the parent directory? I'm thinking in that case path and directory should be the same?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, if a directory is specified the table will return something like:

> select * from ads where path = 'C:\Users\ignacior\Downloads\subdir';
+------------------------------------+-----------------------------+----------+-----------------+--------+
| path                               | directory                   | key      | value           | base64 |
+------------------------------------+-----------------------------+----------+-----------------+--------+
| C:\Users\ignacior\Downloads\subdir | C:\Users\ignacior\Downloads | hide.txt | secret          | 0      |
+------------------------------------+-----------------------------+----------+-----------------+--------+

Which is a similar behaviour to the extended_attributes table:

Using a virtual database. Need help, type '.help'
osquery> select version from osquery_info;
+---------+
| version |
+---------+
| 5.4.0   |
+---------+
osquery> select * from extended_attributes where path = '/Users/ignacior/Downloads';
+---------------------------+-----------------+----------------+--------------------------------------+--------+
| path                      | directory       | key            | value                                | base64 |
+---------------------------+-----------------+----------------+--------------------------------------+--------+
| /Users/ignacior/Downloads | /Users/ignacior | com.apple.macl | BAAoRInjLHdMmrS8U0MEzCBvBADshbS1+VFBm| 1      |
+---------------------------+-----------------+----------------+--------------------------------------+--------+

I'm happy to change it though if it's better to keep path and directory the same.

r["key"] = key;
if (isPrintable(value)) {
r["value"] = value;
r["base64"] = INTEGER(0);
} else {
r["value"] = base64::encode(value);
r["base64"] = INTEGER(1);
}
results.push_back(r);
}

void parseZoneIdentifier(QueryData& results,
const std::string& path,
const std::string& streamData) {
auto lines = split(streamData, "\n");
for (const auto& line : lines) {
auto key_len = line.find_first_of("=");
if (key_len == std::string::npos) {
continue;
}

setRow(results,
path,
line.substr(0, key_len),
line.substr(key_len + 1, line.size()));
}
}

// Process a file and extract all stream names and data.
void enumerateStreams(QueryData& results, const std::string& path) {
WIN32_FIND_STREAM_DATA findStreamData;
HANDLE hFind = FindFirstStreamW(stringToWstring(path).c_str(),
FindStreamInfoStandard,
&findStreamData,
0);

auto fd_guard = scope_guard::create([&] { FindClose(hFind); });

if (hFind != INVALID_HANDLE_VALUE) {
do {
std::string stream(wstringToString(findStreamData.cStreamName));

// Split the stream string into a name and a type, format is
// ":streamname:$streamtype"
auto streamFullName = split(stream, ":");

if (streamFullName.size() != 2) {
LOG(WARNING) << "Invalid stream name found: '" << stream
<< "'. Skipping this entry";
continue;
}
std::string streamName = streamFullName[0];

// Skip unnamed stream since it represents the file content
if (streamName == "") {
continue;
}

std::string path_copy = path;
// Remove any potential trailing / from path string
if (boost::algorithm::ends_with(path_copy, "\\")) {
path_copy.pop_back();
}
std::stringstream streamPath;
streamPath << path_copy << ":" << streamName;
std::string streamData;

if (!readFile(streamPath.str(), streamData).ok()) {
LOG(INFO) << "Couldn't read stream data: " << streamPath.str();
continue;
}

if (streamName == kZoneIdentifierKey) {
parseZoneIdentifier(results, path, streamData);
} else {
// Remove trailing newlines
boost::trim_right(streamData);
setRow(results, path, streamName, streamData);
}
} while (FindNextStreamW(hFind, &findStreamData));
} else {
auto error_code = GetLastError();
if (error_code != ERROR_HANDLE_EOF) {
LOG(INFO) << "Error occurred while searching for streams in " << path
<< ". Error code: " << error_code;
}
}
}

QueryData genAds(QueryContext& context) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm a little unsure of how the path and directory expansion logic work.

I think the intent is when one can query a directory, and get back all the files inside. (Similar to the file table). Or one can query a specific path. (And both probably support LIKE). This makes sense to me.

But the implementation enumerates the the path first, and then the directory. And I think that will result in equal work being done. Sometimes? I guess it's confusing.

If both are part of the query predicate, sqlite will filter to only return rows that match both. Eg select * from ads where directory = '/tmp/' AND path = '/var/tmp/foo' would result first in enumerating /var/tmp/foo, then all the files in /tmp, and would ultimately return nothing, because sqlite filtered them.

Though, to correct myself, that's not at all true there's an OR in there.

I guess I'd suggest a reasonable pattern is to generate the list of things to enumerate, and then find the union of them. This would, at least, prevent duplicate enumeration.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The logic is a copy paste from the extended_attributes table since it was from where I based by work form. I agree is a bit confusing 😅

I can take a look into making it more efficient.

QueryData results;
// Resolve file paths for EQUALS and LIKE operations.
auto paths = context.constraints["path"].getAll(EQUALS);
context.expandConstraints(
"path",
LIKE,
paths,
([&](const std::string& pattern, std::set<std::string>& out) {
std::vector<std::string> patterns;
auto status =
resolveFilePattern(pattern, patterns, GLOB_ALL | GLOB_NO_CANON);
if (status.ok()) {
for (const auto& resolved : patterns) {
out.insert(resolved);
}
}
return status;
}));

for (const auto& path_string : paths) {
boost::filesystem::path path = path_string;
boost::system::error_code ec;
// Folders can have ADS streams too
if (!(boost::filesystem::is_regular_file(path, ec) ||
boost::filesystem::is_directory(path, ec))) {
continue;
}
enumerateStreams(results, path.string());
Comment on lines +151 to +156
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would this miss directories that are included in the directory constraint?

}

// Resolve directories for EQUALS and LIKE operations.
auto directories = context.constraints["directory"].getAll(EQUALS);
context.expandConstraints(
"directory",
LIKE,
directories,
([&](const std::string& pattern, std::set<std::string>& out) {
std::vector<std::string> patterns;
auto status =
resolveFilePattern(pattern, patterns, GLOB_FOLDERS | GLOB_NO_CANON);
if (status.ok()) {
for (const auto& resolved : patterns) {
out.insert(resolved);
}
}
return status;
}));

// Now loop through constraints using the directory column constraint.
for (const auto& directory_string : directories) {
if (!isReadable(directory_string) || !isDirectory(directory_string)) {
continue;
}

std::vector<std::string> files;
if (listFilesInDirectory(directory_string, files).ok()) {
for (const auto& file : files) {
enumerateStreams(results, file);
}
}
}
return results;
}
} // namespace tables
} // namespace osquery
1 change: 1 addition & 0 deletions specs/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,7 @@ function(generateNativeTables)
"sleuthkit/device_hash.table:linux,macos,windows"
"sleuthkit/device_partitions.table:linux,macos,windows"
"user_groups.table:linux,macos,windows"
"windows/ads.table:windows"
"windows/background_activities_moderator.table:windows"
"windows/bitlocker_info.table:windows"
"windows/chassis_info.table:windows"
Expand Down
13 changes: 13 additions & 0 deletions specs/windows/ads.table
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
table_name("ads")
description("Returns the stream names and values for files using NTFS Alternate Data Streams (ADS).")
schema([
Column("path", TEXT, "Absolute file path", required=True, index=True),
Column("directory", TEXT, "Directory of file(s)", required=True),
Column("key", TEXT, "Name of the value generated from the stream"),
Column("value", TEXT, "The parsed information from the attribute"),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What kind of data shows up here? Some internet things talk about malware smuggling entire file contents here. Will that be okay to push back in a column? (We don't generally push that much data through osquery, so it feels a little amiss)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

100% agree. I think the main value of this table is the content of the Zone.Identifier stream which can help during investigations to identify where the file was downloaded from.
In my PR description I left an open question about how to handle cases where a stream contains an entire file. Maybe we can set a hard limit on the length of the content and warn users if the content is too large to be displayed by osquery?
Happy to hear other thoughts.

I'm also not sure how the extended_attributes table handles this type of cases in nix systems.

Copy link
Member

@Smjert Smjert Dec 15, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm also not sure how the extended_attributes table handles this type of cases in nix systems.

The possible max value size is much more limited: https://en.wikipedia.org/wiki/Extended_file_attributes

The Linux kernel allows extended attribute to have names of up to 255 bytes and values of up to 64 KiB, as do XFS and ReiserFS, but ext2/3/4 and btrfs impose much smaller limits, requiring all the attributes (names and values) of one file to fit in one "filesystem block" (usually 4 KiB)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it probably depends on what we're concerned about.

If it's content, the most conservative approach would be to only fetch the value for keys on a compiled allowlist. (pushing people to use carves if they want the content)

If it's size, truncation and warnings probably make sense.

Column("base64", INTEGER, "1 if the value is base64 encoded else 0"),
])
implementation("system/windows/ads@genAds")
examples([
"select * from ads where path = 'C:\\Users\\admin\\Downloads\\test.exe'"
])
1 change: 1 addition & 0 deletions tests/integration/tables/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,7 @@ function(generateTestsIntegrationTablesTestsTest)

elseif(DEFINED PLATFORM_WINDOWS)
set(platform_source_files
ads.cpp
appcompat_shims.cpp
arp_cache.cpp
authenticode.cpp
Expand Down
85 changes: 85 additions & 0 deletions tests/integration/tables/ads.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
/**
* Copyright (c) 2014-present, The osquery authors
*
* This source code is licensed as defined by the LICENSE file found in the
* root directory of this source tree.
*
* SPDX-License-Identifier: (Apache-2.0 OR GPL-2.0-only)
*/

// Sanity check integration test for ads
// Spec file: specs/windows/ads.table

#include <osquery/filesystem/filesystem.h>
#include <osquery/tests/integration/tables/helper.h>
#include <string>

namespace osquery {
namespace table_tests {

const std::string fileName = "test.txt";
const std::string streamName = "teststream";
const std::string streamContents = "This is some data in an alternate stream";

class ads : public testing::Test {
public:
boost::filesystem::path directory;

void SetUp() override {
setUpEnvironment();

directory =
boost::filesystem::temp_directory_path() /
boost::filesystem::unique_path("test-integration-file-table.%%%%-%%%%");

ASSERT_TRUE(boost::filesystem::create_directory(directory));

auto filepath = directory / boost::filesystem::path(fileName);

// Create a file
std::ofstream file(filepath.native());
file << "This is the main file data";
file.close();

// Add data to alternate stream
std::string fullStreamPath = filepath.string() + ":" + streamName;
std::ofstream streamFile(fullStreamPath);
streamFile << streamContents;
streamFile.close();
}

virtual void TearDown() {
boost::filesystem::remove_all(directory);
}
};

TEST_F(ads, test_sanity) {
nachorpaez marked this conversation as resolved.
Show resolved Hide resolved
// std::string path_constraint =
// (directory / boost::filesystem::path("%.txt")).string();

auto expected_path = directory.string();
expected_path += "\\";
expected_path += fileName;
QueryData data =
execute_query("select * from ads where path = \'" + expected_path + "\'");

auto& row = data.at(0);
ASSERT_EQ(row.at("key"), streamName);
ASSERT_EQ(row.at("value"), streamContents);
ASSERT_EQ(row.at("base64"), true);
ASSERT_EQ(row.at("path"), expected_path);
ASSERT_EQ(row.at("directory"), directory.string());

ValidationMap row_map = {
{"path", FileOnDisk},
{"directory", DirectoryOnDisk},
{"key", NormalType},
{"value", NormalType},
{"base64", IntType},
};

validate_rows(data, row_map);
}

} // namespace table_tests
} // namespace osquery