Skip to content

Commit

Permalink
Add confidence intervals for ACE
Browse files Browse the repository at this point in the history
Needs more testing to shake out the various permutations.

Updates issue #420
  • Loading branch information
shawnlaffan committed Apr 27, 2016
1 parent 34bdfd2 commit 54370fc
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 19 deletions.
80 changes: 72 additions & 8 deletions lib/Biodiverse/Indices/RichnessEstimation.pm
Expand Up @@ -438,8 +438,10 @@ sub calc_ace {
my $f1 = 0;
my $n_rare = 0;
my $richness = scalar keys %$label_hash;
my %all_freqs; # all taxa

foreach my $freq (values %$label_hash) {
$all_freqs{$freq}++;
if ($freq <= 10) {
$f_rare{$freq} ++;
$n_rare += $freq;
Expand Down Expand Up @@ -512,7 +514,7 @@ sub calc_ace {
my $cv = sqrt $gamma_rare_hat_square;

my $variance = $self->_get_ace_variance (
freq_counts => \%f_rare,
freq_counts => \%all_freqs,
f_rare => \%f_rare,
cv => $cv,
n_rare => $n_rare,
Expand All @@ -521,14 +523,22 @@ sub calc_ace {
s_estimate => $S_ace,
);
my $se = sqrt $variance;
my $ci_vals = $self->_calc_ace_confidence_intervals (
freq_counts => \%all_freqs,
variance => $variance,
s_estimate => $S_ace,
richness => $richness,
label_hash => $label_hash,
);

my %results = (
ACE_SCORE => $S_ace,
ACE_SE => $se,
ACE_UNDETECTED => $S_ace - $richness,
ACE_VARIANCE => $variance,
ACE_CI_LOWER => undef,
ACE_CI_UPPER => undef,
ACE_CI_LOWER => $ci_vals->{ci_lower},
ACE_CI_UPPER => $ci_vals->{ci_upper},
ACE_INFREQUENT_COUNT => $S_rare,
);

Expand Down Expand Up @@ -584,14 +594,24 @@ sub _get_ace_variance {

my $freq_counts = $args{freq_counts};

# precalculate the differentials and covariances
my (%diff, %cov);
foreach my $i (sort {$a <=> $b} keys %$freq_counts) {
$diff{$i} = $self->_get_ace_differential (%args, f => $i);
#say "diff $i = $diff{$i}";
foreach my $j (sort {$a <=> $b} keys %$freq_counts) {
$cov{$i}{$j} = $cov{$j}{$i}
// $self->_get_ace_ice_cov (%args, i => $i, j => $j);
#say "cov $i,$j = $cov{$i}{$j}";
}
}

my $var_ace = 0;
foreach my $i (keys %$freq_counts) {
foreach my $j (keys %$freq_counts) {
my $partial
= $self->_get_ace_differential (%args, f => $i)
* $self->_get_ace_differential (%args, f => $j)
* $self->_get_ace_ice_cov (%args, i => $i, j => $j);
$var_ace += $partial;
= $diff{$i} * $diff{$j} * $cov{$i}{$j};
$var_ace += $partial;
}
}

Expand Down Expand Up @@ -627,7 +647,7 @@ sub _get_ace_ice_cov {
my $self = shift;
my %args = @_;
my ($i, $j, $s_ice) = @args{qw/i j s_estimate/};
my $Q = $args{f_rare};
my $Q = $args{freq_counts};

my $covf;
if ($i == $j) {
Expand Down Expand Up @@ -846,6 +866,50 @@ sub _get_ace_differential {
return $d;
}


sub _calc_ace_confidence_intervals {

my $self = shift;
my %args = @_;

my $estimate = $args{s_estimate};
my $richness = $args{richness};
my $variance = $args{variance};
my $label_hash = $args{label_hash};
my $freq_counts = $args{freq_counts};

# and now the confidence interval
my ($lower, $upper);

# SpadeR treats values this close to zero as zero
if (($estimate - $richness) >= 0.00001) {
my $T = $estimate - $richness;
my $K = exp ($z_for_ci * sqrt (log (1 + $variance / $T**2)));
$lower = $richness + $T / $K;
$upper = $richness + $T * $K;
}
else {
my ($part1, $part2, $P) = (0, 0, 0);
foreach my $f (keys %$freq_counts) {
$part1 += $freq_counts->{$f} * (exp(-$f) - exp(-2*$f));
$part2 += $f * exp (-$f) * $freq_counts->{$f};
$P += $freq_counts->{$f} * exp (-$f) / $richness;
}
my $n = sum values %$label_hash;
my $var_obs = $part1 - $part2**2 / $n; # should be passed as an arg?
#say "var_obs is the same as the variance argument\n" if $var_obs == $variance;
#say "$n $richness $P $var_obs $part1 $part2";
$lower = max($richness, $richness / (1 - $P) - $z_for_ci * sqrt($var_obs) / (1 - $P));
$upper = $richness / (1 - $P) + $z_for_ci * sqrt($var_obs) / (1 - $P);
}

my %results = (
ci_lower => $lower,
ci_upper => $upper,
);

return wantarray ? %results : \%results;
}
1;

__END__
Expand Down
15 changes: 4 additions & 11 deletions t/24-Indices_richness_estimators.t
Expand Up @@ -384,19 +384,12 @@ sub test_chao2_no_Q1_no_Q2 {
sub test_ACE {
my $bd = shift->clone;

## need to ensure there are no doubles
#foreach my $label ($bd->get_labels) {
# if ($bd->get_label_sample_count(element => $label) == 2) {
# $bd->add_element (group => $focal_gp, label => $label, count => 20);
# }
#}

my $results2 = {
ACE_SCORE => 28.459957,
ACE_SE => 2.457,
ACE_VARIANCE => 6.036849,
ACE_CI_LOWER => 26.482,
ACE_CI_UPPER => 38.562,
ACE_SE => 2.457292,
ACE_VARIANCE => 6.03828211669945,
ACE_CI_LOWER => 26.4817368287846,
ACE_CI_UPPER => 38.561607,
ACE_UNDETECTED => 2.459957,
ACE_INFREQUENT_COUNT => 19,
};
Expand Down

0 comments on commit 54370fc

Please sign in to comment.