Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Send raylet error logs through the log monitor #5351

Merged
merged 6 commits into from Aug 6, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 6 additions & 2 deletions python/ray/log_monitor.py
Expand Up @@ -90,10 +90,12 @@ def close_all_files(self):

def update_log_filenames(self):
"""Update the list of log files to monitor."""
# we only monior worker log files
# output of user code is written here
log_file_paths = glob.glob("{}/worker*[.out|.err]".format(
self.logs_dir))
for file_path in log_file_paths:
# segfaults and other serious errors are logged here
raylet_err_paths = glob.glob("{}/raylet*.err".format(self.logs_dir))
for file_path in log_file_paths + raylet_err_paths:
if os.path.isfile(
file_path) and file_path not in self.log_filenames:
self.log_filenames.add(file_path)
Expand Down Expand Up @@ -195,6 +197,8 @@ def check_log_files_and_publish_updates(self):
file_info.worker_pid = int(
lines_to_publish[0].split(" ")[-1])
lines_to_publish = lines_to_publish[1:]
elif "/raylet" in file_info.filename:
file_info.worker_pid = "raylet"

# Record the current position in the file.
file_info.file_position = file_info.file_handle.tell()
Expand Down
11 changes: 9 additions & 2 deletions python/ray/worker.py
Expand Up @@ -1594,15 +1594,22 @@ def print_logs(redis_client, threads_stopped):
num_consecutive_messages_received += 1

data = json.loads(ray.utils.decode(msg["data"]))

def color_for(data):
if data["pid"] == "raylet":
return colorama.Fore.YELLOW
else:
return colorama.Fore.CYAN

if data["ip"] == localhost:
for line in data["lines"]:
print("{}{}(pid={}){} {}".format(
colorama.Style.DIM, colorama.Fore.CYAN, data["pid"],
colorama.Style.DIM, color_for(data), data["pid"],
colorama.Style.RESET_ALL, line))
else:
for line in data["lines"]:
print("{}{}(pid={}, ip={}){} {}".format(
colorama.Style.DIM, colorama.Fore.CYAN, data["pid"],
colorama.Style.DIM, color_for(data), data["pid"],
data["ip"], colorama.Style.RESET_ALL, line))

if (num_consecutive_messages_received % 100 == 0
Expand Down
24 changes: 23 additions & 1 deletion src/ray/util/logging.cc
Expand Up @@ -11,11 +11,28 @@
#include <iostream>

#ifdef RAY_USE_GLOG
#include <sys/stat.h>
#include "glog/logging.h"
#endif

namespace ray {

#ifdef RAY_USE_GLOG
struct StdoutLogger : public google::base::Logger {
virtual void Write(bool /* should flush */, time_t /* timestamp */, const char *message,
int length) {
// note: always flush otherwise it never shows up in raylet.out
std::cout << std::string(message, length) << std::flush;
}

virtual void Flush() { std::cout.flush(); }

virtual google::uint32 LogSize() { return 0; }
};

static StdoutLogger stdout_logger_singleton;
#endif

// This is the default implementation of ray log,
// which is independent of any libs.
class CerrLog {
Expand Down Expand Up @@ -122,7 +139,12 @@ void RayLog::StartRayLog(const std::string &app_name, RayLogLevel severity_thres
#ifdef RAY_USE_GLOG
google::InitGoogleLogging(app_name_.c_str());
int mapped_severity_threshold = GetMappedSeverity(severity_threshold_);
google::SetStderrLogging(mapped_severity_threshold);
google::SetStderrLogging(GetMappedSeverity(RayLogLevel::ERROR));
for (int i = static_cast<int>(severity_threshold_);
i <= static_cast<int>(RayLogLevel::FATAL); ++i) {
int level = GetMappedSeverity(static_cast<RayLogLevel>(i));
google::base::SetLogger(level, &stdout_logger_singleton);
}
// Enable log file if log_dir_ is not empty.
if (!log_dir_.empty()) {
auto dir_ends_with_slash = log_dir_;
Expand Down