In [14]:
from __future__ import print_function
from functools import reduce

# Python Exercises

The purpose of this notebook is to get some practice with the special python concepts that are useful for writing Spark applications. Here we will do them in the (safe) controlled environment of a simple python shell for simpler debugging -- later on we will use these same constructs in the Spark framework. 

##1.  `map`

Use the python `map` function to convert the first and last letter of the string `test_string` to uppercase.

*hint*: use the standard string method `split` to create a list of words; then use a `map` to convert the appropriate letters of each word

*hint \#2:* Use `Edit -> Split Cell` to create easily-executable code chunks that you can debug. When they all run individually, you can merge them back together.

In [4]:
# From Sun Tzu's Art of War
test_string = 'The supreme art of war is to subdue the enemy without fighting.'

words = test_string.split()

def first_last_capitalize(word) : 
    # first convert the string `word` to a list of characters
    l = list(word)
    
    # now change the first and last character to uppercase (use the upper() method of a string)
    l[0] = l[0].upper()
    l[-1] = l[-1].upper()
    
    # convert back to a string
    return str("".join(l))

upper_lower = map(first_last_capitalize, test_string.split())

result = " ".join(upper_lower)
print(result)
assert(result == 'ThE SupremE ArT OF WaR IS TO SubduE ThE EnemY WithouT Fighting.')

ThE SupremE ArT OF WaR IS TO SubduE ThE EnemY WithouT Fighting.


## 2. List comprehension and tuples

Use a list comprehension to convert the list of words into a list of tuples, where the first element of the tuple is the word and the second element is the word length.


*hint:* use the python built-in len() function to get the string length

In [5]:
word_length = [(word, len(word)) for word in words]

In [6]:
print(word_length)
assert(word_length == [('The', 3),
 ('supreme', 7),
 ('art', 3),
 ('of', 2),
 ('war', 3),
 ('is', 2),
 ('to', 2),
 ('subdue', 6),
 ('the', 3),
 ('enemy', 5),
 ('without', 7),
 ('fighting.', 9)])

[('The', 3), ('supreme', 7), ('art', 3), ('of', 2), ('war', 3), ('is', 2), ('to', 2), ('subdue', 6), ('the', 3), ('enemy', 5), ('without', 7), ('fighting.', 9)]


## 3. `reduce`

Compute the average word length in the sentence by first mapping the `word_length` list to contain just the word lengths, then using `reduce` to sum up the lengths, and finally dividing by the total number of words: 

In [7]:
word_counts = map(lambda (word, count): count, word_length)

In [8]:
total_chars = reduce(lambda a,b: a+b, word_counts)

In [9]:
import numpy as np
print(float(total_chars)/len(words))
assert(np.allclose(float(total_chars)/len(words),4.33333333333))

4.33333333333


## 4. Generators

Write a generator `even_words` that returns the words with an even character count. At least two possible solutions! 

In [10]:
def even_words(words) : 
    for word in words : 
        if len(word) % 2 == 0: 
            yield word

In [11]:
result = list(even_words(words))

In [12]:
result2 = list((word for word in words if len(word)%2==0))

In [13]:
assert(result == result2 == ['of', 'is', 'to', 'subdue'])