In [None]:
# default_exp att_stats

# AttStats

> Class representing attribute statistics.


Attribute statistics consist of the average confidence for the attribute and the average confidence for each of it's values.

**Example:**

"@attribute outlook {sunny, overcast, rainy}
<br> @attribute temperature {hot, mild, cool}
<br>
<br> @data
<br> sunny[0.5];rainy[0.3];overcast[0.2],hot
<br> sunny[0.1];rainy[0.4];overcast[0.5],mild
<br> sunny[0.7];rainy[0.1];overcast[0.2],cool"

Statistics for "outlook" attribute are as follows:
 * average confidence: ( max(0.5, 0.3, 0.2) + max(0.1, 0.4, 0.5) + max(0.7, 0.1, 0.2) ) / 3 = 0.57
 * statistics: [(0.5 + 0.1 + 0.7) / 3, (0.3 + 0.4 + 0.1) / 3, (0.2 + 0.5 + 0.2) / 3] = [0.43, 0.27, 0.3]

In [None]:
# export
from typing import List

from pyuid3.value import Value
from pyuid3.attribute import Attribute
# from pyuid3.data import Data   # may cause problems

In [None]:
# export
class AttStats:
    def __init__(self, statistics: List[Value], avg_confidence: float):
        self.statistics = statistics
        self.avg_confidence = avg_confidence
        
    @staticmethod
    def get_statistics(att: Attribute, data: 'Data') -> 'AttStats':    # TODO: rename to get_stats 
        sum = []
        for val_name in att.get_domain():
            sum.append(Value(val_name, 0))
        avg_conf = 0

        if not data.get_instances():
            return AttStats(sum, avg_conf)

        instances = data.get_instances()
        for instance in instances:
            r = instance.get_reading_for_attribute(att.get_name())
            values = r.get_values()
            for v in values:
                idx = sum.index(v)
                old = sum[idx]
                del sum[idx]

                sum.append(Value(v.get_name(), old.get_confidence() + v.get_confidence()))
            avg_conf += r.get_most_probable().get_confidence()

        size = len(data.get_instances())
        avg_conf /= size

        stats = []
        for stat_v in sum:
            stats.append(Value(stat_v.get_name(), stat_v.get_confidence()/size))
        return AttStats(stats, avg_conf)
    
    def het_statistics(self) -> List[Value]:   # TODO: rename to get_statistics
        return self.statistics

    def get_avg_confidence(self) -> float:
        return self.avg_confidence

    def get_stat_for_value(self, value_name: str) -> float:
        for v in self.statistics:
            if v.get_name() == value_name:
                return v.get_confidence()
        return 0

    def get_most_probable(self) -> Value:
        confidence = [value.get_confidence() for value in self.statistics]
        highest_conf = max(confidence)
        index = confidence.index(highest_conf)
        return self.statistics[index]

    def __str__(self) -> str:
        result = '{'
        for value in self.statistics:
            result += str(value) + ','
        result = result[:-1]  # delete the last coma ','
        result += '}'
        return result

## Tests

In [None]:
import math
from pyuid3.data import Data

data = Data.parse_uarff("../resources/weather.nominal.uncertain.arff")
attribute = Attribute('outlook', {'sunny', 'rainy', 'overcast'})
att_stats = AttStats.get_statistics(attribute, data)
assert att_stats.get_avg_confidence() == 0.9428571428571428

vals = att_stats.het_statistics()
assert math.isclose(vals[0].get_confidence(), 0.38571428571428573)
assert math.isclose(vals[1].get_confidence(), 0.37142857142857144)
assert math.isclose(vals[2].get_confidence(), 0.24285714285714285)