In [4]:
import pandas as pd
import numpy as np

# Quick Quiz

1. What does enumerate() do?
2. What does zip() do?
3. What is the difference between the following two statements:

        [i+11 for i in range(10)] 

and 

        (i+11 for i in range(10))

4. Which of the following are immutable data types: list, tuple, frozenset, dict?

5. What is the difference between the following two declarations?

        d1 = defaultdict(int)

and

        d2 = dict()

# Functions and Arguments

Functions can accept <b>parameters</b>. In the function signature and body we are talking about <b>arguments</b>.

The below is a function that accepts one parameter. We can pass a single parameter to it.

In [2]:
def func(a):
    return a

In [3]:
func('s')

's'

The below is a function that accepts two paramaters, one of which is set to have a default value.

In [7]:
def func2(a, s="string"):##default set to string, can call with 1 argument or can over-write it
    if s == "string":
        x = 1
    else:
        x = 0
    
    return x

In [8]:
func2(1)    # note that I can skip passing the 2nd paramater if I accept the default

1

In [9]:
func2(1, "more")

0

#### It is important to note that the default argument will be evaluated only once, which matters for mutable objects!

In [17]:
# This matters for when we over-write the default argument

def overwrite_default(S=''): ##strings are immutable
    S = S+'.'
    
    return S

In [18]:
print(overwrite_default())
print(overwrite_default()) ##only appends the dot once, doesn't remember the changed strings

.
.


In [19]:
def overwrite_default(S=[]): ##lists are mutable so remembers the modified string
    S.append('.')
    
    return S

In [20]:
print(overwrite_default())
print(overwrite_default()) ##don't overwite parameters, make copies of them

['.']
['.', '.']


### Python allows to work with keyword arguments like this:

num is a positional argument
color and size are keyword arguments (and their order is not important)

In [22]:
def order_shirt(num, color='blue', size='M'):
    print ("You are ordering {0} shirt/s of color {1} and size {2}".format(num, color, size))

In [23]:
order_shirt(1)

You are ordering 1 shirt/s of color blue and size M


In [25]:
order_shirt(10, color="white")

You are ordering 10 shirt/s of color white and size M


In [26]:
order_shirt(10, size="L")

You are ordering 10 shirt/s of color blue and size L


In [27]:
order_shirt(20, size='S', color='black')   # Note that order is not important

You are ordering 20 shirt/s of color black and size S


### It is possible to collect argumets with * and ** for keyword arguments

For example, my function can accept several parameters, and it can vary. The * does the unpacking.

In [32]:
def varied_arguments(*args):
    
    i = sum([1 for arg in args])
    
    return i

In [34]:
print (varied_arguments(1,2,3))
print (varied_arguments('one'))

3
1


When combining positional, argument and key-word arguments with unpacking, it is important to follow order:

In [36]:
def mix_arguments(pos, *args, **kwargs):
    
    i = sum([1 for arg in args])
    j = sum([1 for kwarg in kwargs])
    
    return 1+i+j

In [37]:
print (mix_arguments('pos1', 1,2,4, key='k1', key2='k2'))

6


In [38]:
print (mix_arguments('pos1', key='k1', key2='k2'))

3


In [40]:
# but this will result in an error - why?
print (mix_arguments(1,2,4, key='k1', key2='k2', 'pos'))

# Lambdas

In Python, lambdas are anonimous functions. That is, these are functions without names and are defined, essentially, 'in-line'.

In [1]:
# example: use when the function is too simple to code fully

def add_two_numbers(x,y):
    return x+y

add_two_numbers_lambda = lambda x, y: x+y #takes in x,y returns x+y, replicates the function above, no gain in speed, try not to use

In [2]:
add_two_numbers(5,5)

10

In [3]:
add_two_numbers_lambda(5,5)

10

What we have done above is simply used lambda to define an anonimous function that adds two numbers.
However, this is not very useful.

The best place to use lambdas is in maps, filters and apply-es.

Let's look at the map example. For example, we have a list of values, which we need to take sqrt if value is positive and square if it is negative.

This task can be easily accomplished with a method or a lambda, because it would have otherwise resulted in a short method and we don't plan on calling it again - just use lambdas.

In [12]:
 l = np.random.randint(-10,10,10)

In [15]:
list(map(lambda x: np.sqrt(x) if x >0 else x*x, l)) #no variable assignment, can't be recalled as not stored

[2.23606797749979,
 1.7320508075688772,
 1.7320508075688772,
 1,
 2.23606797749979,
 1.4142135623730951,
 16,
 3.0,
 1.0,
 81]

So, if disect the above line of code we have:

1. map (func, operate_on)

2. operate_on is a list of 10 random integers

3. func is a lambda function, that accepts one argument and returns srt() of it, if it is greater than zero, or it returns its square otherwise

In [20]:
# Let's write a map that converts each letter to its opposite case

''.join(list(map(lambda s: s.upper() if s.islower() else s.lower(), "A sentEncE")))

'a SENTeNCe'

In [21]:
# Let's write a map that converts a list of values to three groups of values
# if value is less than 1, convert to 'small', if between 1 and 5, convert to 'med', else convert to 'large'

In [24]:
 l = np.random.randint(-20,20,100)

In [25]:
l

array([  2,   2,   8,  -9,  10,   0,   3,  -2, -13, -11,   1, -11,  -5,
        18, -16, -11, -16,  -9,   0, -20,  17,   3,   1,  14,   0,  11,
        15,  19,  12, -14,  -3, -12,  16, -17,  11, -16,  -6, -18,  -6,
       -15, -11,   2,  15,  17,  14,   4, -10,  -8,   1, -12,   6, -11,
        16,   8,   1,   8,  -7,   4, -20, -18,  -8, -11, -12,   6, -20,
        -5, -18,  15,   3,  -7,   2, -17, -14,  14,  19,   7, -14,  12,
       -19, -18, -11,   9,  -3,  -2,  -7,  -4,  13,  11, -16, -11,  12,
        15,  -1,  -6,  13,  19,  -4,   9,  18, -13])

In [27]:
list(map(lambda x: 'small' if x<1 else ('large' if x > 5 else 'med'), l))

['med',
 'med',
 'large',
 'small',
 'large',
 'small',
 'med',
 'small',
 'small',
 'small',
 'med',
 'small',
 'small',
 'large',
 'small',
 'small',
 'small',
 'small',
 'small',
 'small',
 'large',
 'med',
 'med',
 'large',
 'small',
 'large',
 'large',
 'large',
 'large',
 'small',
 'small',
 'small',
 'large',
 'small',
 'large',
 'small',
 'small',
 'small',
 'small',
 'small',
 'small',
 'med',
 'large',
 'large',
 'large',
 'med',
 'small',
 'small',
 'med',
 'small',
 'large',
 'small',
 'large',
 'large',
 'med',
 'large',
 'small',
 'med',
 'small',
 'small',
 'small',
 'small',
 'small',
 'large',
 'small',
 'small',
 'small',
 'large',
 'med',
 'small',
 'med',
 'small',
 'small',
 'large',
 'large',
 'large',
 'small',
 'large',
 'small',
 'small',
 'small',
 'large',
 'small',
 'small',
 'small',
 'small',
 'large',
 'large',
 'small',
 'small',
 'large',
 'large',
 'small',
 'small',
 'large',
 'large',
 'small',
 'large',
 'large',
 'small']

# Collections

Collections module is a very useful module for super-fast built-in container objects. We have already used defaultdict from collections. Take a look at the full module on-line: https://docs.python.org/2/library/collections.html


We will look at Counters, which is a very usefull library.

In [28]:
from collections import Counter

#### Counting characters in a string:

In [29]:
char_count = Counter("Count my characters")

In [30]:
char_count

Counter({'C': 1,
         'o': 1,
         'u': 1,
         'n': 1,
         't': 2,
         ' ': 2,
         'm': 1,
         'y': 1,
         'c': 2,
         'h': 1,
         'a': 2,
         'r': 2,
         'e': 1,
         's': 1})

#### Count unique elements in a list

In [33]:
list_count = Counter(["one", "two", "two", "five", "three", "five"])

In [34]:
list_count

Counter({'one': 1, 'two': 2, 'five': 2, 'three': 1})

In [41]:
#I can ask for the most common element in a counter
list_count.most_common(1)

[('two', 2)]

#### Count words in a sentence

In [44]:
word_sent_count = Counter("A word once, a word once more, and finally another word".split(" "))

In [45]:
word_sent_count

Counter({'A': 1,
         'word': 3,
         'once,': 1,
         'a': 1,
         'once': 1,
         'more,': 1,
         'and': 1,
         'finally': 1,
         'another': 1})

# Home Work


Write a function that accepts a sentence, and returns either count of words in that sentence or count of characters.
It should default to count of words. If counting characters, then, allow to count punctuation in the sentence as well. But default to not counting punctuation.

The function signature should be like this:


In [9]:
from string import punctuation
from collections import Counter

def custom_counter(sentence, as_words=True, punc=False):
    count = 0
    no_punc = ''.join(c for c in sentence if c not in punctuation)
    if punc:
        sentence = no_punc + ' '.join(c for c in sentence if c in punctuation)
    else:
        sentence = no_punc
    
    if as_words:
        count = Counter(sentence.split(" "))
    else:
        count = Counter(sentence)
    
    return count

Test your function on the following sentence:
"If You Are Jumping Up and Down in Muddy Puddles, You Must Wear Boots! George, You have Done it All Wrong! I Do Not Snort! I am Peppa Pig!"

Test your function with last two parameters taking variable values.

In [11]:
test_sentence = "If You Are Jumping Up and Down in Muddy Puddles, You Must Wear Boots! George, You have Done it All Wrong! I Do Not Snort! I am Peppa Pig!"

In [12]:
custom_counter(test_sentence, as_words=True, punc=True)

Counter({'!': 4,
         ',': 1,
         'All': 1,
         'Are': 1,
         'Boots': 1,
         'Do': 1,
         'Done': 1,
         'Down': 1,
         'George': 1,
         'I': 2,
         'If': 1,
         'Jumping': 1,
         'Muddy': 1,
         'Must': 1,
         'Not': 1,
         'Peppa': 1,
         'Pig,': 1,
         'Puddles': 1,
         'Snort': 1,
         'Up': 1,
         'Wear': 1,
         'Wrong': 1,
         'You': 3,
         'am': 1,
         'and': 1,
         'have': 1,
         'in': 1,
         'it': 1})

In [13]:
custom_counter(test_sentence, as_words=True, punc=False)

Counter({'All': 1,
         'Are': 1,
         'Boots': 1,
         'Do': 1,
         'Done': 1,
         'Down': 1,
         'George': 1,
         'I': 2,
         'If': 1,
         'Jumping': 1,
         'Muddy': 1,
         'Must': 1,
         'Not': 1,
         'Peppa': 1,
         'Pig': 1,
         'Puddles': 1,
         'Snort': 1,
         'Up': 1,
         'Wear': 1,
         'Wrong': 1,
         'You': 3,
         'am': 1,
         'and': 1,
         'have': 1,
         'in': 1,
         'it': 1})

In [14]:
custom_counter(test_sentence, as_words=False, punc=False)

Counter({' ': 28,
         'A': 2,
         'B': 1,
         'D': 3,
         'G': 1,
         'I': 3,
         'J': 1,
         'M': 2,
         'N': 1,
         'P': 3,
         'S': 1,
         'U': 1,
         'W': 2,
         'Y': 3,
         'a': 5,
         'd': 5,
         'e': 8,
         'f': 1,
         'g': 4,
         'h': 1,
         'i': 4,
         'l': 3,
         'm': 2,
         'n': 7,
         'o': 12,
         'p': 4,
         'r': 5,
         's': 3,
         't': 5,
         'u': 7,
         'v': 1,
         'w': 1,
         'y': 1})

In [15]:
custom_counter(test_sentence, as_words=False, punc=True)

Counter({' ': 33,
         '!': 4,
         ',': 2,
         'A': 2,
         'B': 1,
         'D': 3,
         'G': 1,
         'I': 3,
         'J': 1,
         'M': 2,
         'N': 1,
         'P': 3,
         'S': 1,
         'U': 1,
         'W': 2,
         'Y': 3,
         'a': 5,
         'd': 5,
         'e': 8,
         'f': 1,
         'g': 4,
         'h': 1,
         'i': 4,
         'l': 3,
         'm': 2,
         'n': 7,
         'o': 12,
         'p': 4,
         'r': 5,
         's': 3,
         't': 5,
         'u': 7,
         'v': 1,
         'w': 1,
         'y': 1})