Skip to content

Commit

Permalink
Randomisations: Change p_rank calculations to retain all defined vals
Browse files Browse the repository at this point in the history
Now values within the 5-95 percentile range are kept
instead of being set to nodata.

Updates #856
  • Loading branch information
shawnlaffan committed Apr 3, 2023
1 parent 0b6fa9c commit 0950be0
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 50 deletions.
23 changes: 3 additions & 20 deletions lib/Biodiverse/Common.pm
Expand Up @@ -2633,17 +2633,6 @@ sub get_sig_rank_from_comp_results {

\my %results_list_ref = $args{results_list_ref} // {};

my ($sig_thresh_lo, $sig_thresh_hi);
# this is recalculated every call - cheap, but perhaps should be optimised or cached?
if ($args{threshold}) {
$sig_thresh_lo = $args{threshold};
$sig_thresh_hi = 1 - $$sig_thresh_lo;
}
else {
$sig_thresh_lo = 0.05;
$sig_thresh_hi = 0.95;
}

foreach my $c_key (grep {$_ =~ /^C_/} keys %comp_list_ref) {

my $index_name = substr $c_key, 2;
Expand All @@ -2654,23 +2643,17 @@ sub get_sig_rank_from_comp_results {
}

# proportion observed higher than random
my $p_key = 'P_' . $index_name;
my $p_high = $comp_list_ref{$p_key};

if ( $p_high > $sig_thresh_hi) {
$results_list_ref{$index_name} = $p_high;
if ($comp_list_ref{"P_${index_name}"} > 0.5) {
$results_list_ref{$index_name} = $comp_list_ref{"P_${index_name}"};
}
else {
my $t_key = 'T_' . $index_name;
my $q_key = 'Q_' . $index_name;

# proportion observed lower than random
my $p_low
$results_list_ref{$index_name}
= ($comp_list_ref{$c_key} + ($comp_list_ref{$t_key} // 0))
/ $comp_list_ref{$q_key};
$results_list_ref{$index_name}
= ($p_low < $sig_thresh_lo) ? $p_low : undef;
}
}
Expand Down
26 changes: 10 additions & 16 deletions t/11-BaseData-reintegrate-after-rand.t
Expand Up @@ -678,14 +678,11 @@ sub check_randomisation_lists_incremented_correctly_spatial {

foreach my $sig_list_name (sort @sig_lists) {
# we only care if they are in the valid set
my %l_args = (element => $group, list => $sig_list_name);
my $lr_integr = $sp_integr->get_list_ref (%l_args);
foreach my $key (keys %$lr_integr) {
my $value = $lr_integr->{$key};
if (defined $value) {
my $msg ="p-rank $value in valid interval ($key), $group, $sig_list_name";
$collated_sig_list_got{$msg} = ($value < 0.05 || $value > 0.95);
}
my $orig_list_name = $sig_list_name =~ s/>>p_rank//r;
my $lr_integr = $sp_integr->get_list_ref (element => $group, list => $sig_list_name);
my $rand_list = $sp_integr->get_list_ref (element => $group, list => $orig_list_name);
foreach my $key (sort keys %$lr_integr) {
rand_p_rank_is_valid ($lr_integr, $rand_list, $key, $group);
}
}
}
Expand Down Expand Up @@ -746,15 +743,12 @@ sub check_randomisation_lists_incremented_correctly_cluster {
foreach my $sig_list_name (sort @sig_lists) {
# We only care if they are in the valid set.
# Replication is handled under test_reintegrate.
my %l_args = (list => $sig_list_name);
my $lr_integr = $to_node->get_list_ref (%l_args);
my $orig_list_name = $sig_list_name =~ s/>>p_rank//r;
my $lr_integr = $to_node->get_list_ref (list => $sig_list_name);
my $rand_list = $to_node->get_list_ref (list => $orig_list_name);
# diag $orig_list_name;
foreach my $key (sort keys %$lr_integr) {
my $value = $lr_integr->{$key};
if (defined $value) {
ok ($value < 0.05 || $value > 0.95,
"p-rank $value in valid interval ($key), $node_name",
);
}
rand_p_rank_is_valid ($lr_integr, $rand_list, $key, $node_name);
}
}

Expand Down
21 changes: 7 additions & 14 deletions t/28-Randomisation.t
Expand Up @@ -21,7 +21,7 @@ local $| = 1;

use Data::Section::Simple qw(get_data_section);

use Biodiverse::TestHelpers qw /:cluster :element_properties :tree/;
use Biodiverse::TestHelpers qw /:cluster :element_properties :tree :utils/;
use Biodiverse::Cluster;

use Biodiverse::Randomise;
Expand Down Expand Up @@ -1768,11 +1768,8 @@ sub test_p_ranks {
);
# values are undef for non-sig, or the sig thresh passed (low is negated)
foreach my $key (sort keys %$sig_listref) {
my $value = $sig_listref->{$key};
if (defined $value) {
$defined_count++;
ok ($value < $bounds[0] || $value > $bounds[1], "$value in valid interval ($key), $gp");
}
$defined_count
= rand_p_rank_is_valid ($sig_listref, $rand_listref, $key, $gp);
}
}
ok ($defined_count, "At least some spatial sig values were defined (got $defined_count)");
Expand Down Expand Up @@ -1801,14 +1798,9 @@ sub test_p_ranks {
[sort keys %expected_keys],
"got expected keys for $node_name",
);
# values are undef for non-sig, or the sig thresh passed (low is negated)
foreach my $key (sort keys %$sig_listref) {
my $value = $sig_listref->{$key};
if (defined $value) {
$defined_count++;
# use eq, not ==, due to floating point issues with 0.1
ok ($value < $bounds[0] || $value > $bounds[1], "$value in valid interval ($key), $node_name");
}
$defined_count
= rand_p_rank_is_valid ($sig_listref, $rand_listref, $key, $node_name);
}
}
ok ($defined_count, "At least some cluster node sig values were defined (got $defined_count)");
Expand Down Expand Up @@ -1880,7 +1872,8 @@ sub test_p_rank_thresh_calcs {
#say Dumper $p_rank;
}

sub test_p_rank_calcs {
# not needed now
sub _xx_test_p_rank_calcs {
my $bd = Biodiverse::BaseData->new(NAME => 'test_p_ranks', CELL_SIZES => [1,1]);

# set things up in one go for clarity, then subdivide
Expand Down
32 changes: 32 additions & 0 deletions t/lib/Biodiverse/TestHelpers.pm
Expand Up @@ -52,6 +52,7 @@ use Exporter::Easy (
verify_set_contents
write_data_to_temp_file
is_numeric_within_tolerance_or_exact_text
rand_p_rank_is_valid
),
],
basedata => [
Expand Down Expand Up @@ -131,6 +132,37 @@ use Exporter::Easy (
],
);


sub rand_p_rank_is_valid {
my ($sig_listref, $rand_listref, $key, $gp) = @_;
my $rand_value = $sig_listref->{$key};
my $orig_value = $rand_listref->{"P_${key}"};

ok exists $rand_listref->{"P_${key}"}, "rand list contains key P_${key}"
or return;

my $defined_count;
$gp //= '';

if (defined $orig_value) {
$defined_count++;
if ($orig_value > 0.5) {
is $rand_value, $orig_value, "expected p-rank, value > 0.5 ($key, $gp)";
}
else {
is $rand_value,
($rand_listref->{"C_${key}"} + ($rand_listref->{"T_${key}"} // 0))
/ $rand_listref->{"Q_${key}"},
"expected p-rank, value <= 0.5 ($key, $gp, $orig_value)";
}
}
else {
is $rand_value, $orig_value, "undef p-rank for undef input ($key, $gp)";
}
return $defined_count;
}
#=item isnt_deeply
#
#Same as is_deeply except it returns false if the two structurees are the same.
Expand Down

0 comments on commit 0950be0

Please sign in to comment.