# Interaction with the World Homework (#3)
Python Computing for Data Science (c) J Bloom, UC Berkeley 2018

Due Tuesday 2pm, Feb 20, 2018

# 1) Monty: The Python Siri

Let's make a Siri-like program (call it Monty!) with the following properties:
   - record your voice command
   - use a webservice to parse that sound file into text
   - based on what the text, take three different types of actions:
       - send an email to yourself
       - do some math
       - tell a joke

So for example, if you say "Monty: email me with subject hello and body goodbye", it will email you with the appropriate subject and body. If you say "Monty: tell me a joke" then it will go to the web and find a joke and print it for you. If you say, "Monty: calculate two times three" it should response with printing the number 6.

Hint: you can use speed-to-text apps like Houndify (or, e.g., Google Speech https://cloud.google.com/speech/) to return the text (but not do the actions). You'll need to sign up for a free API and then follow documentation instructions for using the service within Python. 

In [1]:
# import from the credentials file to get the
# -- email address (creds.ADDR)
# -- email password (creds.PWD)
# -- houndify id (creds.HOUNDIFY_ID)
# -- houndify key (creds.HOUNDIFY_KEY)
# -- WolframAlpha key (creds.WOLFRAM_ID)
import creds

# imports for the audio and speech recognition
import pyaudio
import wave
import speech_recognition as sr

# imports for the email portion
import smtplib
# from email.MIMEMultipart import MIMEMultipart
# from email.MIMEBase import MIMEBase
# from email.MIMEText import MIMEText
# from email import Encoders
# from email.Utils import COMMASPACE, formatdate
from email.mime.text import MIMEText

# imports for the web parsing
from bs4 import BeautifulSoup
from urllib.request import urlopen,Request
import random

# imports for the calculations
import wolframalpha

class AudioParams(object):
    """ Class to store the parameters for the audio file. """
    def __init__(self,output_name='default.wav'):
        """
        Constructor
        
        :output_name: (str) the file name (ending in .wav) to store the recording at"""
        self.chunk = 1024
        self.format = pyaudio.paInt32
        self.channels = 1
        self.rate = 44100
        self.input = True
        self.record_seconds = 10
        self.output_name = output_name
        
def monty():
    """
    The Monty function that will record the users action,
    and then do the appropriate thing based on the speech
    input, either telling a joke, sending an email or
    calculating math.
    
    Notes:
    -- it assumes in some of the processing that you start every command with 'Monty'
    -- if you use the word 'email', it assumes the words 'subject' and 'body' will follow
    -- if you use the word 'joke', it assumes you want it to tell you a joke
    -- if you don't use 'joke' or 'email', it assumes what you say (after 'Monty')
        will be a calculation to query WolframAlpha with
    
    -- I print out a couple things every time this is run such as a command saying to
        speak now, and then letting you know what it thought you said (from the 
        Houndify API) and then that it is processing with a final confirmation
        either of the email sent, a joke that is told, or the result of the
        calculation.
    """
    pars = AudioParams('monty.wav')
    print('I am Monty. Please speak your command now in the next ten seconds.')
    record_audio(pars)
    
    text = text_from_audio(pars)
    print('Monty thinks you said: {}'.format(text))
    print('Processing')
    words = text.split(' ')
    
    # check for emails
    if 'email' in words:
        email_parser(text)
        print('The email has been sent.')
    # check for a joke
    elif 'joke' in words:
        joke = find_joke()
        print(joke)
    # perform the calculation
    else:
        result = calculate(text)
        print('The result of the calculation is {}'.format(result))
    
def record_audio(pars):
    """
    Records and saves the audio for the monty function to
    a file as per the parameters in the pars object.
    
    :pars: (AudioParams object) the parameters for this recording
    """
    p = pyaudio.PyAudio()
    stream = p.open(format=pars.format\
                   ,channels=pars.channels\
                   ,rate=pars.rate\
                   ,input=pars.input\
                   ,frames_per_buffer=pars.chunk)
    data = []
    for i in range(0,int(pars.rate/pars.chunk*pars.record_seconds)):
        new_data = stream.read(pars.chunk)
        data.append(new_data)
    sample_size = p.get_sample_size(pars.format)
    stream.close()
    p.terminate()
    save_file(data,pars,sample_size)
    
def save_file(data,pars,sample_size):
    """
    Saves the recording from the monty function to the
    file as per the parameters in the pars object.
    
    :data: (array) the data of the voice recording
    :pars: (AudioParams object) the parameters for the file
    :sample_size: (pyaudio.get_sample_size result)
    """
    wave_string = b''.join(data)
    wf = wave.open(pars.output_name,'wb')
    wf.setnchannels(pars.channels)
    wf.setsampwidth(sample_size)
    wf.setframerate(pars.rate)
    wf.writeframes(wave_string)
    wf.close()
    
def text_from_audio(pars):
    """
    Gets and returns the text from an audio recording.
    
    :pars: (AudioParams object) the parameters of the recording
            specifically noting the output_name and the
            houndify API keys
    
    :returns: (str) the text recognized from the file
    """
    r = sr.Recognizer()
    with sr.AudioFile(pars.output_name) as source:
        audio = r.record(source)
    text = r.recognize_houndify(audio,client_id=creds.HOUNDIFY_ID\
                                ,client_key=creds.HOUNDIFY_KEY)
    return text
    
def email_parser(text):
    """
    Parses the input text to send an email as appropriate.
    
    :text: (str) the input text string that should contain the
            words 'subject' and 'body'
    """
    post_subject = text.split(' subject ')[1]
    sub_body_split = post_subject.split(' body ')
    subject,body = sub_body_split[0],sub_body_split[1]
    
    # additional formatting for the subject to make it all
    # uppercase and not end in an 'and' that would have separated
    # the subject from the body
    subject = subject.split(' ')
    subject = [word[0].upper()+word[1:] for word in subject]
    if subject[-1]=='And':
        subject.pop()
    subject = ' '.join(subject)
    
    # additional formatting for the body to make the first word
    # uppercase and end in a period if it doesn't end in a . ! or ?
    body = body[0].upper() + body[1:]
    if body[-1] not in ['.','!','?']:
        body = ''.join([body,'.'])
    
    # actually send the email
    email(creds.ADDR,creds.PWD,subject,body)

def email(sender,pwd,subject,text):
    """
    Note: this is pretty close to a copy of the notebook email_example.py 
    from class without the file functionality. Although I had to change
    some things to get it to actually work.
    
    :sender: (str) the sender's email address
    :pwd: (str) the password for the sender's email address
    :to: (str) the recipient's email address
    :subject: (str) the subject of the email
    :text: (str) the body of the email
    """
    msg = MIMEText(text)
    msg['Subject'] = subject
    msg['From'] = sender
    msg['To'] = sender
    # Note that we need to designate the remote SMTP server we want to use.
    mailServer = smtplib.SMTP("smtp.gmail.com", 587)
    mailServer.starttls()
    mailServer.login(sender, pwd)
    mailServer.sendmail(sender, sender, msg.as_string())
    mailServer.close()
    
def find_joke():
    """
    Finds a joke on the internet and returns it. Note that this could obviously
    get a speedup if the monty function is to be called multiple times by just
    parsing the webpages once for jokes and then saving the results, but I'm
    assuming you wanted the work done on each call.
    Also note that I just pulled jokes from one site, but it should generate
    a pretty large list of jokes, which seems sufficient. If you wanted a greater
    variety or expected this function to be called many times, we would just
    parse more sites (and keep track of used jokes if no repeats is desired).
    
    :returns: (str) the joke that was found
    """
    url = 'http://www.laughfactory.com/jokes/clean-jokes'
    jokes = []
    response = urlopen(url)
    html = response.read()
    response.close()
    soup = BeautifulSoup(html,'lxml')

    joke_divs = soup.findAll('div',{'class':'jokes'})
    for joke_container in joke_divs:
        joke = joke_container.find('p').get_text()
        joke = remove_space(joke)
        jokes.append(joke)
    return random.choice(jokes)

def remove_space(text):
    """
    Removes leading and trailing white space from the input string.
    
    :text: (str) the input string
    
    :returns: (str) the input string without leading or trailing white
    """
    start = 0
    end = len(text)-1
    while text[start] in [' ','\n','\t']:
        start += 1
    while text[end] in [' ','\n','\t']:
        end -= 1
    return text[start:end+1]

def calculate(text):
    """
    Uses WolframAlpha to try to calculate whatever the query was.
    
    :text: (str) the input text
    """
    # getting rid of the Monty word at the beginning
    calculation = ' '.join(text.split(' ')[1:])
    
    client = wolframalpha.Client(creds.WOLFRAM_ID)
    res = client.query(calculation)
    try:
        return list(res.results)[0].text
    except:
        raise KeyError('Unable to query Wolfram with: {}'.format(calculation))

# 2) Write a program that identifies musical notes from sound (AIFF) files. 

  - Run it on the supplied sound files (12) and report your program’s results. 
  - Use the labeled sounds (4) to make sure it works correctly. The provided sound files contain 1-3 simultaneous notes from different organs.
  - Save copies of any example plots to illustrate how your program works.
  
  https://piazza.com/berkeley/spring2018/ay250class13410/resources -> Homeworks -> hw3_sound_files.zip

Hints: You’ll want to decompose the sound into a frequency power spectrum. Use a Fast Fourier Transform. Be care about “unpacking” the string hexcode into python data structures. The sound files use 32 bit data. Play around with what happens when you convert the string data to other integer sizes, or signed vs unsigned integers. Also, beware of harmonics.