In [1]:
# default_exp attributes

# Attributes

> Statistics computed from a collection of sequences

Attributes are the multisequence equivalent of the statistics methods in the core module. Multisequence attributes can be represented as a single value, or a collection of values with each corresponding to the summary statistic of a single sequence. The pysan library's `attributes` module also contains a sequence frequency method which can be used to create sequence frequency plots.

In [4]:
#export
from pysan.statistics import is_recurrent
def are_recurrent(sequences):
    "Returns true if any of the sequences in a given collection are recurrant, false otherwise."

    for sequence in sequences:
        if is_recurrent(sequence):
            return True

    return False

In [5]:
s1 = [1,1,1,2,2,3,2,4,4,3,2,1,2,3,3,3,2,2,1,1,1]
s2 = [1,1,2,2,3,2,4,4,3,2,1,2,3,2,2,2,3,3,2,4,4]
s3 = [1,1,1,2,2,3,2,4,4,3,2,1,2,3,3,3,4,4,4,3,3]
s4 = [1,1,1,1,2,3,2,3,3,3,3,1,2,2,3,3,3,4,4,4,4]
sequences = [s1,s2,s3,s4]
are_recurrent(sequences)

True

In [6]:
#export
def get_summary_statistic(sequences, function):
    "Computes a summary statistic (e.g. entropy, complexity, or turbulence) for each sequence in a collection, returning the results as a list."
    return [function(s) for s in sequences]

In [7]:
import pysan as ps
s1 = [1,1,1,2,2,3,2,4,4,3,2,1,2,3,3,3,2,2,1,1,1]
s2 = [1,1,2,2,3,2,4,4,3,2,1,2,3,2,2,2,3,3,2,4,4]
s3 = [1,1,1,2,2,3,2,4,4,3,2,1,2,3,3,3,4,4,4,3,3]
s4 = [1,1,1,1,2,3,2,3,3,3,3,1,2,2,3,3,3,4,4,4,4]
sequences = [s1,s2,s3,s4]
get_summary_statistic(sequences, ps.statistics.get_entropy)

[0.9363346784550166, 0.936781259256497, 0.98495001131136, 0.9673577477123131]

In [11]:
#export
from pysan.statistics import get_routine
def get_routine_scores(sequences, duration):
    "Returns a list containing the routine scores for each sequence in a collection using :meth:`get_routine() <pysan.core.get_routine>`."
    return [get_routine(s, duration) for s in sequences]

In [12]:
s1 = [1,1,1,2,2,3,2,4,4,3,2,1,2,3,3,3,2,2,1,1,1]
s2 = [1,1,2,2,3,2,4,4,3,2,1,2,3,2,2,2,3,3,2,4,4]
s3 = [1,1,1,2,2,3,2,4,4,3,2,1,2,3,3,3,4,4,4,3,3]
s4 = [1,1,1,1,2,3,2,3,3,3,3,1,2,2,3,3,3,4,4,4,4]
sequences = [s1,s2,s3,s4]
get_routine_scores(sequences, 3)

[0.0, 0.0, 0.0, 0.0]

In [14]:
#export
def get_synchrony(sequences):
    "Computes the normalised synchrony between a two or more sequences. Synchrony here refers to positions with identical elements, e.g. two identical sequences have a synchrony of 1, two completely different sequences have a synchrony of 0. The value is normalised by dividing by the number of positions compared. This computation is defined in Cornwell's 2015 book on social sequence analysis, page 230."

    shortest_sequence = min([len(s) for s in sequences])

    same_elements = []
    for position in range(shortest_sequence):

        elements_at_this_position = []
        for sequence in sequences:
            elements_at_this_position.append(sequence[position])

        same_elements.append(elements_at_this_position.count(elements_at_this_position[0]) == len(elements_at_this_position))

    return same_elements.count(True) / shortest_sequence

In [19]:
s1 = [1,1,1,2,2,3,2,4,4,3,2,1,2,3,3,3,2,2,1,1,1]
s2 = [1,1,2,2,3,2,4,4,3,2,1,2,3,2,2,2,3,3,2,4,4]
s3 = [1,1,1,2,2,3,2,4,4,3,2,1,2,3,3,3,4,4,4,3,3]
s4 = [1,1,1,1,2,3,2,3,3,3,3,1,2,2,3,3,3,4,4,4,4]
sequences = [s1,s2,s3,s4]
get_synchrony(sequences)

0.09523809523809523

In [16]:
#export
def get_sequence_frequencies(sequences):
    "Computes the frequencies of different sequences in a collection, returning a dictionary of their string representations and counts."

    # converting to strings makes comparison easy
    sequences_as_strings = [str(s) for s in sequences]

    sequence_frequencies = {}
    for sequence in set(sequences_as_strings):
        sequence_frequencies[sequence] = sequences_as_strings.count(sequence)

    sequence_frequencies = {k: v for k, v in sorted(sequence_frequencies.items(), key=lambda item: item[1], reverse=True)}

    return sequence_frequencies

In [21]:
s1 = [1,1,1,2,2,3,2]
s2 = [1,1,2,2,3,2,4]
s3 = [1,1,1,2,2,3,2]
s4 = [1,1,2,2,3,2,4]
sequences = [s1,s2,s3,s4]
get_sequence_frequencies(sequences)

{'[1, 1, 1, 2, 2, 3, 2]': 2, '[1, 1, 2, 2, 3, 2, 4]': 2}

## Plotting

TODO

Unlike the core module's statistics methods, many of the above methods return multiple values which can be used to create meaningful visualisations.