In [1]:
import gtsam

In [2]:
roles = ["unknown","pedestrian","teammate"]
obs = ["no_vest","vest"]
id = 113
n_obs = 0
symbol = gtsam.Symbol('r',id)
role = gtsam.DiscreteDistribution((symbol.key(),len(roles)),".7/0/0")

values = gtsam.Values()

In [11]:
role_dbn = gtsam.DiscreteBayesNet()

role_dbn.add((symbol.key(),len(roles)),".7/0/0") # Prior estimate

# Simulate adding a measurement
obs_sym1 = gtsam.Symbol('v',1)
role_dbn.add( (symbol.key(),len(roles)), [(obs_sym1.key(),len(obs))], ".5/1./0. .5/0./1.")

# simulate adding a second measurement
obs_sym2 = gtsam.Symbol('v',2)
role_dbn.add( (symbol.key(),len(roles)), [(obs_sym2.key(),len(obs))], ".5/1./0. .5/0./1.")

# Convert to graph, solve, and output values
role_graph = gtsam.DiscreteFactorGraph(role_dbn)
role_graph.optimize()

print(role_graph)


### NOW ADD EVIDENCE
role_graph.add((obs_sym1.key(),len(obs)), ".45 .55")
role_graph.add((obs_sym2.key(),len(obs)), ".4 .6")
role_graph.optimize()

print(dir(role_graph))

print(role_graph.sumProduct())




size: 3
factor 0:  P( r113 ):
 Choice(r113) 
 0 Leaf    1
 1 Leaf    0
 2 Leaf    0

factor 1:  P( r113 | v1 ):
 Choice(v1) 
 0 Choice(r113) 
 0 0 Leaf 0.33333333
 0 1 Leaf 0.66666667
 0 2 Leaf    0
 1 Choice(r113) 
 1 0 Leaf 0.33333333
 1 1 Leaf    0
 1 2 Leaf 0.66666667

factor 2:  P( r113 | v2 ):
 Choice(v2) 
 0 Choice(r113) 
 0 0 Leaf 0.33333333
 0 1 Leaf 0.66666667
 0 2 Leaf    0
 1 Choice(r113) 
 1 0 Leaf 0.33333333
 1 1 Leaf    0
 1 2 Leaf 0.66666667


['__call__', '__class__', '__delattr__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '_repr_html_', '_repr_markdown_', 'add', 'at', 'dot', 'eliminateMultifrontal', 'eliminatePartialMultifrontal', 'eliminatePartialSequential', 'eliminateSequential', 'empty', 'equals', 'keys', 'maxProduct', 

# Audio dev

In [19]:
import numpy as np
import sentencepiece as spm
import torch
import torchaudio
import torchvision
from torchaudio.io import StreamReader
import torch.multiprocessing as mp

In [73]:
def stream(q, format, src, segment_length, sample_rate):
    
    print("Building StreamReader...")
    streamer = torchaudio.io.StreamReader(src=src, format=format, option=option)
    streamer.add_basic_audio_stream(frames_per_chunk=segment_length, sample_rate=sample_rate, format="s16p", num_channels=6)

    print(streamer.get_src_stream_info(0))
    print("Streaming...")
    print()
    for (chunk_a) in streamer.stream(timeout=-1, backoff=1.0):
        q.put([chunk_a])


class ContextCacher:
    """Cache the end of input data and prepend the next input data with it.

    Args:
        segment_length (int): The size of main segment.
            If the incoming segment is shorter, then the segment is padded.
        context_length (int): The size of the context, cached and appended.
    """

    def __init__(self, segment_length: int, context_length: int):
        self.segment_length = segment_length
        self.context_length = context_length
        self.context = torch.zeros([context_length])

    def __call__(self, chunk: torch.Tensor):
        if chunk.size(0) < self.segment_length:
            chunk = torch.nn.functional.pad(chunk, (0, self.segment_length - chunk.size(0)))
        chunk_with_context = torch.cat((self.context, chunk))
        self.context = chunk[-self.context_length :]
        return chunk_with_context

class Pipeline:
    """Build inference pipeline from RNNTBundle.

    Args:
        bundle (torchaudio.pipelines.RNNTBundle): Bundle object
        beam_width (int): Beam size of beam search decoder.
    """

    def __init__(self, bundle: torchaudio.pipelines.RNNTBundle, beam_width: int = 10):
        self.bundle = bundle
        self.feature_extractor = bundle.get_streaming_feature_extractor()
        self.decoder = bundle.get_decoder()
        self.token_processor = bundle.get_token_processor()

        self.beam_width = beam_width

        self.state = None
        self.hypotheses = None

    def infer(self, segment: torch.Tensor) -> str:
        """Perform streaming inference"""
        features, length = self.feature_extractor(segment)
        self.hypotheses, self.state = self.decoder.infer(
            features, length, self.beam_width, state=self.state, hypothesis=self.hypotheses
        )
        transcript = self.token_processor(self.hypotheses[0][0], lstrip=False)
        return transcript

In [75]:
def main():
    # Parameters
    device = "alsa"
    src = "hw:4"
    n_channels = 6
    
    # Model info
    bundle=torchaudio.pipelines.EMFORMER_RNNT_BASE_LIBRISPEECH
    sample_rate = bundle.sample_rate
    segment_length = bundle.segment_length * bundle.hop_length
    context_length = bundle.right_context_length * bundle.hop_length
    pipeline = Pipeline(bundle)
    
    
    # Cache stream
    cacher = ContextCacher(segment_length, context_length)
    
    
    # Inference
    
    ctx = mp.get_context("spawn")
    
    @torch.inference_mode()
    def infer():
        while True:
            chunk = q.get()      
            segment = cacher(chunk[:, 0])
            transcript = pipeline.infer(segment)
            print(transcript, end="\r", flush=True)
    
    q = ctx.Queue()
    p = ctx.Process(target=stream, args=(q, device, src, segment_length, sample_rate))
    p.start()
    infer()
    p.join()

In [None]:
main()

Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "/home/jd/anaconda3/envs/marmot/lib/python3.10/multiprocessing/spawn.py", line 116, in spawn_main
    exitcode = _main(fd, parent_sentinel)
  File "/home/jd/anaconda3/envs/marmot/lib/python3.10/multiprocessing/spawn.py", line 126, in _main
    self = reduction.pickle.load(from_parent)
AttributeError: Can't get attribute 'stream' on <module '__main__' (built-in)>


In [66]:
print(sample_rate)
print("hop length: %s " % bundle.hop_length)
print("segment length: %s " % bundle.segment_length)
print("context length: %s " %bundle.right_context_length)
print(segment_length)
print(context_length)

print(streamer)

16000
hop length: 160 
segment length: 16 
context length: 4 
2560
640
<torchaudio.io._stream_reader.StreamReader object at 0x77951405ba60>


In [None]:
# Get stream


# Visualize stream?


# form beam at location (az/el/dist)


# visualize separated beam


# extract noise/common spectrum from all channels