In [3]:
import io
import os
import RPi.GPIO as GPIO
import time
import subprocess
import picamera
import numpy as np
import pyaudio

from google.cloud import vision
from google.cloud.vision import types

TRIG = 11
ECHO = 12
MIN_V = 0.45
MAX_V = 0.8
MIN_T = 0.00011764705
MAX_T = 0.02352941176
TMP_FILE = 'resources/tmp.jpg'
FS = 5000       # sampling rate, Hz, must be integer
SENSE_DUR = 1   # no idea what unit this is in, may be float

def set_volume(percent):
    """OS call to set volume"""
    subprocess.call(['amixer', 'set', 'PCM', str.format('{}%', percent)])

def speak(string):
    """External voice program process"""
    subprocess.call(['espeak', string])
    
# Part II: Regarding distance
def dur_to_vol(t):
    return MAX_V - ((MAX_V - MIN_V) / (MAX_T - MIN_T))*t

def duration():
    """Calculated with sonic sensor"""
    GPIO.output(TRIG, 0)
    time.sleep(0.000002)
    GPIO.output(TRIG, 1)
    time.sleep(0.00001)
    GPIO.output(TRIG, 0)
    while GPIO.input(ECHO) == 0: pass
    time1 = time.time()
    while GPIO.input(ECHO) == 1: pass
    time2 = time.time()
    t = time2 - time1
    if (MIN_T < t < MAX_T):
        return t

# Part III: Regarding color
def avg_color(rgb_array):
    return np.mean(rgb_array, axis=(0, 1)).astype(int)

def color_to_freq(c):
    r, g, b = (((c[k]) >> 4) for k in range(3))  # trim 4 bit from each value
    return ((r << 8) + (g << 4) + b)

def main():
    with picamera.PiCamera() as camera:
        camera.resolution = (320, 240)  # Scale down resolution
        try:
            # Google Vision setup
            os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "api-key.json"
            client = vision.ImageAnnotatorClient()  # Instantiates a client

            # Sonic setup
            GPIO.setmode(GPIO.BOARD)
            GPIO.setup(TRIG, GPIO.OUT)
            GPIO.setup(ECHO, GPIO.IN)

            # Color setup
            rgb_array = np.empty((240, 320, 3), dtype=np.uint8)

            # Turn on the sound
            set_volume(100)  # We won't modify the system's volume beyond this point
            p = pyaudio.PyAudio()

            counter = 0  # To keep count for Google Vision
            while True:
                counter += 1
                
                # Image processing (rare)
                if counter == 10:
                    counter = 0

                    # Capture the image to file...
                    camera.capture(TMP_FILE)
                    # ... just to load it back to memory
                    with io.open(TMP_FILE, 'rb') as image_file:
                        content = image_file.read()
                    jpg = types.Image(content=content)

                    # Process the image on the cloud
                    response = client.label_detection(image=jpg)
                    labels = response.label_annotations

                    # print('Labels:')
                    for label in labels[:2]:
                        # print(label.description, end=' ')
                        speak(label.description)
                    set_volume(100)

                # Audio sensing (common)
                else:
                    # Sonic part
                    t = duration()  # Gather distance
                    if t is not None:
                        volume = dur_to_vol(t)
                        print("{:.2f}".format(volume), end=' ')
                    else:
                        volume = MIN_V

                    # Color:
                    camera.capture(rgb_array, 'rgb')
                    freq = color_to_freq(avg_color(rgb_array))
                    # print(freq, end=' ')
                    
                    # Play the sound
                    # generate samples, input is frequency
                    samples = (np.sin(2*np.pi*np.arange(FS*SENSE_DUR)*freq/FS)).astype(np.float32)

                    # for paFloat32 sample values must be in range [-1.0, 1.0]
                    stream = p.open(format=pyaudio.paFloat32,
                                    channels=1,
                                    rate=FS,
                                    output=True,)

                    stream.write(volume*samples)  # Note the volume

                    stream.stop_stream()
                    stream.close()
                    
                # time.sleep(0.05)  # Periodic sleep to keep the process from being pre-empted

        except Exception as e:
            print(e)
        finally:
            GPIO.cleanup()
            p.terminate()
            
main()

0.79 0.80 0.79 0.79 0.79 0.79 0.78 0.74 0.78 0.78 0.59 0.59 0.59 0.59 0.69 0.69 0.73 0.59 0.59 0.59 0.72 0.73 

KeyboardInterrupt: 