## Sorting lists

word tally: winnowing, supple

In [1]:
x = ["duck", "aardvark", "crocodile", "emu", "bee"]

In [2]:
sorted(x)

['aardvark', 'bee', 'crocodile', 'duck', 'emu']

In [3]:
x.sort()

In [4]:
x

['aardvark', 'bee', 'crocodile', 'duck', 'emu']

In [5]:
#sorted by second letter: ['aardvark', 'bee', 'emu', 'crocodile', 'duck']
sorted(x, reverse=True)

['emu', 'duck', 'crocodile', 'bee', 'aardvark']

In [6]:
#sadly does not work: sorted(x, by_second_letter=True)

In [7]:
#sorted(x, key=???)

In [8]:
def get_second_letter(s):
    return s[1]

In [9]:
get_second_letter("cheese")

'h'

In [10]:
sorted(x, key=get_second_letter)

['aardvark', 'bee', 'emu', 'crocodile', 'duck']

In [11]:
type("hello")

str

In [12]:
type(get_second_letter)

function

## lambda functions
writing a function on a single line

In [13]:
# normal function
def get_second_letter(s):
    return s[1]

In [14]:
get_second_letter = lambda s: s[1]

In [15]:
#functions written with the lambda function can be written on one line
#def name (parameters):
#return expr
#AKA name = lambda params: expr

In [16]:
get_second_letter("hello")

'e'

In [17]:
type(lambda s: s[1])

function

In [18]:
x

['aardvark', 'bee', 'crocodile', 'duck', 'emu']

In [19]:
sorted(x, key=lambda s: s[1])

['aardvark', 'bee', 'emu', 'crocodile', 'duck']

In [20]:
#a list of planet names sorted by not the name but by the number of moons
[p['name'] for p in sorted(planets, key=lambda x: x['moons'])]

NameError: name 'planets' is not defined

In [None]:
def get_moon_count(d):
    return d['moons']
[p['name'] for p in sorted(planets, key=get_moon_count)]
[p['name'] for p in sorted(planets, key=lambda d: d['diameter'], reverse=True)]

[p['name'] for p in \
sorted(planets, key=lambda d: d['diameter'], reverse=True) \
if p['diameter'] > 4]

## tuple (not "toople", "tuple" rhymes with "supple")
tuple is kind of like a strict list

In [21]:
t = (5, 10, 15)

In [22]:
type(t)

tuple

In [23]:
t[0]

5

In [24]:
for item in t:
    print(item * item)

25
100
225


In [25]:
t.append(30)

AttributeError: 'tuple' object has no attribute 'append'

In [26]:
carefree_list = [5, 10, 15, 20, 25]

In [27]:
carefree_list.append(30)

In [28]:
carefree_list

[5, 10, 15, 20, 25, 30]

In [29]:
carefree_list[1] = "Boris"

In [30]:
carefree_list

[5, 'Boris', 15, 20, 25, 30]

In [31]:
t[1] = "Boris"

TypeError: 'tuple' object does not support item assignment

In [39]:
#tuple is kind of like a list but can't be changed after you create it.
#can't be changed, it's an 'immutable' data type
#one benefit is exactly that: it CAN'T be changed.
#other benefit is that tuples are memory-efficient
#a list is a notebook, a tuple is a stone tablet

In [36]:
hello = [1, 2, 3]

In [35]:
foo = (1, 2, 3)

In [37]:
import sys
sys.getsizeof(hello)

48

In [38]:
sys.getsizeof(foo)

40

## back to regular expressions for a sec
### grouping with multiple matches in the same string

In [40]:
import re
test = "one 1 two 2 three 3 four 4 five 5"
re.findall(r"\w+ \d", test)

['one 1', 'two 2', 'three 3', 'four 4', 'five 5']

In [41]:
for item in re.findall(r"\w+ \d", test):
    x = item.split(" ")
    print(x[0])
    print(x[1])

one
1
two
2
three
3
four
4
five
5


In [42]:
test = "one 1 two 2 three 3 four 4 five 5"
re.findall(r"(\w+) (\d)", test)

[('one', '1'), ('two', '2'), ('three', '3'), ('four', '4'), ('five', '5')]

In [43]:
all_subjects = open("enronsubjects.txt").read()

FileNotFoundError: [Errno 2] No such file or directory: 'enronsubjects.txt'

### monetary amounts in the subject lines
match something like $10 m,k,b

In [None]:
re.findall(r"\$(\d+) ?(\w+)", all_subjects)

In [None]:
vals = []
for item in re.findall(r"\$(\d+) ?(\w+)", all_subjects):
    multiplier = item[1].lower()
    number_val = int(item[0])
    if multiplier == 'k':
        number_val *=1000
    elif multiplier == 'm':
        number_val *= 1000000000
    vals.append(number_val)
sum(vals)

### substitution with regular expressions

In [44]:
message = "this is a test, this is only a test"

In [46]:
message.replace("this", "that").replace("test", "walrus")

'that is a walrus, that is only a walrus'

In [None]:
re.findall(r"\d{3}-\d{3}-\d{4}", all-subjects)

In [47]:
message = "This is a test; this is only a test."
re.sub(r"[Tt]his", "that", message)

'that is a test; that is only a test.'

In [48]:
message

'This is a test; this is only a test.'

In [49]:
re.sub(r"\b\w+\b", "WALRUS", message)

'WALRUS WALRUS WALRUS WALRUS; WALRUS WALRUS WALRUS WALRUS WALRUS.'

In [None]:
anon = re.sub(r"\d{3}-\d{3}-\d{4}.{}", anon)
#"hack-y"

In [None]:
re.findall(r".{,20}\d{3}-\d{3}-X{4}.{,20}", anon)