Skip to content

Commit

Permalink
Expose bandwidth data for worker cache via InfluxDB
Browse files Browse the repository at this point in the history
  • Loading branch information
kraih committed Feb 24, 2022
1 parent 4c3e524 commit e3f50f2
Show file tree
Hide file tree
Showing 7 changed files with 62 additions and 17 deletions.
12 changes: 11 additions & 1 deletion lib/OpenQA/CacheService.pm
Original file line number Diff line number Diff line change
Expand Up @@ -220,5 +220,15 @@ DROP TABLE downloads;
-- 3 up
ALTER TABLE assets ADD COLUMN `pending` INTEGER DEFAULT 1;
--4 up
-- 4 up
CREATE INDEX IF NOT EXISTS assets_pending on assets (pending);
-- 5 up
CREATE TABLE IF NOT EXISTS metrics (
`name` TEXT NOT NULL UNIQUE,
`value` TEXT,
PRIMARY KEY(`name`)
);
-- 5 down
DROP TABLE metrics;
7 changes: 6 additions & 1 deletion lib/OpenQA/CacheService/Controller/Influxdb.pm
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ package OpenQA::CacheService::Controller::Influxdb;
use Mojo::Base 'Mojolicious::Controller', -signatures;

sub minion ($self) {
my $stats = $self->app->minion->stats;
my $app = $self->app;
my $stats = $app->minion->stats;
my $jobs = {
active => $stats->{active_jobs},
delayed => $stats->{delayed_jobs},
Expand All @@ -18,6 +19,10 @@ sub minion ($self) {
$text .= _output_measure($url, 'openqa_minion_jobs', $jobs);
$text .= _output_measure($url, 'openqa_minion_workers', $workers);

my $metrics = $app->cache->metrics;
my $bytes = $metrics->{download_rate} || 0;
$text .= "openqa_download_rate,url=$url bytes=${bytes}i\n";

$self->render(text => $text);
}

Expand Down
15 changes: 14 additions & 1 deletion lib/OpenQA/CacheService/Model/Cache.pm
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use Carp 'croak';
use Capture::Tiny 'capture_merged';
use Mojo::URL;
use OpenQA::Log qw(log_error);
use OpenQA::Utils qw(base_host human_readable_size check_df download_speed);
use OpenQA::Utils qw(base_host human_readable_size check_df download_rate download_speed);
use OpenQA::Downloader;
use Mojo::File 'path';
use Time::HiRes qw(gettimeofday);
Expand Down Expand Up @@ -140,6 +140,7 @@ sub get_asset ($self, $host, $job, $type, $asset) {
die qq{Updating the cache for "$asset" failed, this should never happen} unless $ok;
my $cache_size = human_readable_size($self->{cache_real_size});
my $speed = download_speed($start, $end, $size);
$self->_update_metric('download_rate', int(download_rate($start, $end, $size) // 0));
$log->info(qq{Download of "$asset" successful ($speed), new cache size is $cache_size});
},
on_failed => sub {
Expand Down Expand Up @@ -168,6 +169,18 @@ sub track_asset ($self, $asset) {
if (my $err = $@) { $self->log->error("Tracking asset failed: $err") }
}

sub metrics ($self) {
return {map { $_->{name} => $_->{value} } $self->sqlite->db->query("SELECT * FROM metrics")->hashes->each};
}

sub _update_metric ($self, $name, $value) {
my $db = $self->sqlite->db;
my $tx = $db->begin('exclusive');
my $sql = 'INSERT INTO metrics (name, value) VALUES ($1, $2) ON CONFLICT DO UPDATE SET value = $2';
$db->query($sql, $name, $value);
$tx->commit;
}

sub _update_asset_last_use ($self, $asset) {
my $db = $self->sqlite->db;
my $tx = $db->begin('exclusive');
Expand Down
11 changes: 8 additions & 3 deletions lib/OpenQA/Utils.pm
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ our @EXPORT = qw(
fix_top_level_help
looks_like_url_with_scheme
check_df
download_rate
download_speed
);

Expand Down Expand Up @@ -931,11 +932,15 @@ sub check_df ($dir) {
return ($available_bytes, $total_bytes);
}

sub download_speed ($start, $end, $bytes) {
sub download_rate ($start, $end, $bytes) {
my $interval = tv_interval($start, $end);
return '??/s' if $interval == 0;
return undef if $interval == 0;
return sprintf('%.2f', $bytes / $interval);
}

my $rate = sprintf('%.2f', $bytes / $interval);
sub download_speed ($start, $end, $bytes) {
my $rate = download_rate($start, $end, $bytes);
return '??/s' unless defined $rate;
my $human = human_readable_size($rate);
return "$human/s";
}
Expand Down
8 changes: 6 additions & 2 deletions t/16-utils.t
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ sub exit_code(&) {

use FindBin;
use lib "$FindBin::Bin/lib", "$FindBin::Bin/../external/os-autoinst-common/lib";
use OpenQA::Utils
qw(:DEFAULT prjdir sharedir resultdir assetdir imagesdir base_host random_string random_hex download_speed);
use OpenQA::Utils (qw(:DEFAULT prjdir sharedir resultdir assetdir imagesdir base_host random_string random_hex),
qw(download_rate download_speed));
use OpenQA::Task::SignalGuard;
use OpenQA::Test::Utils 'redirect_output';
use OpenQA::Test::TimeLimit '10';
Expand Down Expand Up @@ -79,6 +79,10 @@ subtest 'random number generator' => sub {
};

subtest 'download speed' => sub {
is download_rate([1638459407, 528237], [1638459408, 628237], 1024), '930.91';
is download_rate([1638459407, 528237], [1638459408, 628237], 1024 * 1024 * 1024), '976128930.91';
is download_rate([1638459407, 528237], [1638459407, 528237], 1024), undef;

is download_speed([1638459407, 528237], [1638459408, 628237], 1024), '930.91 Byte/s';
is download_speed([1638459407, 528237], [1638459408, 628237], 1024 * 1024), '931 KiB/s';
is download_speed([1638459407, 528237], [1638459408, 628237], 1024 * 1024 * 1024), '931 MiB/s';
Expand Down
20 changes: 14 additions & 6 deletions t/25-cache-service.t
Original file line number Diff line number Diff line change
Expand Up @@ -485,45 +485,53 @@ subtest 'Test Minion Sync task' => sub {
};

subtest 'Minion monitoring with InfluxDB' => sub {
my $app = OpenQA::CacheService->new;
my $rate = $app->cache->metrics->{download_rate};
ok $rate > 0, 'download rate is higher than 0 bytes per second';

my $url = $cache_client->url('/influxdb/minion');
my $ua = $cache_client->ua;
my $res = $ua->get($url)->result;
is $res->body, <<'EOF', 'three workers still running';
is $res->body, <<"EOF", 'three workers still running';
openqa_minion_jobs,url=http://127.0.0.1:9530 active=0i,delayed=0i,failed=0i,inactive=0i
openqa_minion_workers,url=http://127.0.0.1:9530 active=0i,inactive=2i
openqa_download_rate,url=http://127.0.0.1:9530 bytes=${rate}i
EOF

my $app = OpenQA::CacheService->new;
my $minion = $app->minion;
my $worker = $minion->repair->worker->register;
$res = $ua->get($url)->result;
is $res->body, <<'EOF', 'four workers running now';
is $res->body, <<"EOF", 'four workers running now';
openqa_minion_jobs,url=http://127.0.0.1:9530 active=0i,delayed=0i,failed=0i,inactive=0i
openqa_minion_workers,url=http://127.0.0.1:9530 active=0i,inactive=3i
openqa_download_rate,url=http://127.0.0.1:9530 bytes=${rate}i
EOF

$minion->add_task(test => sub { });
my $job_id = $minion->enqueue('test');
my $job_id2 = $minion->enqueue('test');
my $job = $worker->dequeue(0);
$res = $ua->get($url)->result;
is $res->body, <<'EOF', 'two jobs';
is $res->body, <<"EOF", 'two jobs';
openqa_minion_jobs,url=http://127.0.0.1:9530 active=1i,delayed=0i,failed=0i,inactive=1i
openqa_minion_workers,url=http://127.0.0.1:9530 active=1i,inactive=2i
openqa_download_rate,url=http://127.0.0.1:9530 bytes=${rate}i
EOF

$job->fail('test');
$res = $ua->get($url)->result;
is $res->body, <<'EOF', 'one job failed';
is $res->body, <<"EOF", 'one job failed';
openqa_minion_jobs,url=http://127.0.0.1:9530 active=0i,delayed=0i,failed=1i,inactive=1i
openqa_minion_workers,url=http://127.0.0.1:9530 active=0i,inactive=3i
openqa_download_rate,url=http://127.0.0.1:9530 bytes=${rate}i
EOF

$job->retry({delay => ONE_HOUR});
$res = $ua->get($url)->result;
is $res->body, <<'EOF', 'job is being retried';
is $res->body, <<"EOF", 'job is being retried';
openqa_minion_jobs,url=http://127.0.0.1:9530 active=0i,delayed=1i,failed=0i,inactive=2i
openqa_minion_workers,url=http://127.0.0.1:9530 active=0i,inactive=3i
openqa_download_rate,url=http://127.0.0.1:9530 bytes=${rate}i
EOF
};

Expand Down
6 changes: 3 additions & 3 deletions t/25-cache.t
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,8 @@ END { stop_service($server_instance) }

my $app = OpenQA::CacheService->new(log => $log);
my $cache = $app->cache;
is $cache->sqlite->migrations->latest, 4, 'version 4 is the latest version';
is $cache->sqlite->migrations->active, 4, 'version 4 is the active version';
is $cache->sqlite->migrations->latest, 5, 'Current version is the latest version';
is $cache->sqlite->migrations->active, 5, 'Current version is the active version';
like $cache_log, qr/Creating cache directory tree for "$cachedir"/, 'Cache directory tree created';
like $cache_log, qr/Cache size of "$cachedir" is 0 Byte, with limit 50 GiB/, 'Cache limit is default (50GB)';
ok(-e $db_file, 'cache.sqlite is present');
Expand Down Expand Up @@ -107,7 +107,7 @@ $cache->sqlite->db->query(
$cache->downloader->sleep_time(0.01);
$cache->init;
$cache->limit(100);
is $cache->sqlite->migrations->active, 4, 'version 4 is still the active version';
is $cache->sqlite->migrations->active, 5, 'Current version is still the active version';
like $cache_log, qr/Cache size of "$cachedir" is 168 Byte, with limit 50 GiB/,
'Cache limit/size match the expected 100GB/168)';
unlike $cache_log, qr/Purging ".*[13].qcow2"/, 'Registered assets 1 and 3 were kept';
Expand Down

0 comments on commit e3f50f2

Please sign in to comment.