# Agenda

1. Comprehensions
2. Sorting (and key functions)
3. Modules

In [1]:
numbers = range(10)

output = []

for one_number in numbers:
    output.append(one_number ** 2)
    
output

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

In [3]:
output = [one_number ** 2 for one_number in numbers]

In [4]:
output

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

In [5]:
[one_number ** 2             # expression -- SELECT
 for one_number in numbers]  # iteration  -- FROM 

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

In [6]:
mylist = ['abcd', 'efghi', 'jk']

'*'.join(mylist)

'abcd*efghi*jk'

In [8]:
mylist = [10, 20, 30]

'*'.join(mylist)  # can't run str.join on a list of ints, just a list of strings

TypeError: sequence item 0: expected str instance, int found

In [9]:
mylist = [10, 20, 30]

'*'.join([str(one_item)
          for one_item in mylist])

'10*20*30'

# Exercises: Comprehensions

1. Ask the user to enter a sentence. Using a list comprehension and `str.capitalize`, print the input with every word capitalized. (Just like `str.title` would do, but without using it.)

2. Ask the user to enter numbers, separated by spaces. (We will get a single string.) Use a comprehension to sum the numbers. You can use the builtin `sum` function, which expects to get a list of integers.

In [10]:
s = 'this is a test sentence'
s.capitalize()

'This is a test sentence'

In [11]:
s.title()

'This Is A Test Sentence'

In [12]:
s = input('Enter a sentence: ').strip()

print(s.title())

Enter a sentence: this is a test
This Is A Test


In [16]:
' '.join([one_word.capitalize()
          for one_word in s.split()])

'This Is A Test'

In [19]:
s = input('Enter some numbers: ').strip()

sum([int(one_item)
     for one_item in s.split()])

Enter some numbers: 10 20 30


60

In [24]:
[one_line.split(':')[0]              # expression  -- SELECT
 for one_line in open('/etc/passwd') # iteration   -- FROM
 if not one_line.startswith('#')]    # condition   -- WHERE

['nobody',
 'root',
 'daemon',
 '_uucp',
 '_taskgated',
 '_networkd',
 '_installassistant',
 '_lp',
 '_postfix',
 '_scsd',
 '_ces',
 '_appstore',
 '_mcxalr',
 '_appleevents',
 '_geod',
 '_devdocs',
 '_sandbox',
 '_mdnsresponder',
 '_ard',
 '_www',
 '_eppc',
 '_cvs',
 '_svn',
 '_mysql',
 '_sshd',
 '_qtss',
 '_cyrus',
 '_mailman',
 '_appserver',
 '_clamav',
 '_amavisd',
 '_jabber',
 '_appowner',
 '_windowserver',
 '_spotlight',
 '_tokend',
 '_securityagent',
 '_calendar',
 '_teamsserver',
 '_update_sharing',
 '_installer',
 '_atsserver',
 '_ftp',
 '_unknown',
 '_softwareupdate',
 '_coreaudiod',
 '_screensaver',
 '_locationd',
 '_trustevaluationagent',
 '_timezone',
 '_lda',
 '_cvmsroot',
 '_usbmuxd',
 '_dovecot',
 '_dpaudio',
 '_postgres',
 '_krbtgt',
 '_kadmin_admin',
 '_kadmin_changepw',
 '_devicemgr',
 '_webauthserver',
 '_netbios',
 '_warmd',
 '_dovenull',
 '_netstatistics',
 '_avbdeviced',
 '_krb_krbtgt',
 '_krb_kadmin',
 '_krb_changepw',
 '_krb_kerberos',
 '_krb_anonymous',
 '_asse

In [25]:
!ls *.txt

linux-etc-passwd.txt  myconfig.txt  myfile2.txt  outfile.txt	wcfile.txt
mini-access-log.txt   myfile.txt    nums.txt	 shoe-data.txt


In [26]:
!cat nums.txt

5
	10     
	20
  	3
		   	20        

 25


# Exercise: Sum numbers

Read from `nums.txt` in to a list comprehension, and give the comprehension to `sum` in order to sum the numbers.

In [27]:
[one_line
for one_line in open('nums.txt')]

['5\n',
 '\t10     \n',
 '\t20\n',
 '  \t3\n',
 '\t\t   \t20        \n',
 '\n',
 ' 25\n']

In [28]:
[one_line.strip()
for one_line in open('nums.txt')]

['5', '10', '20', '3', '20', '', '25']

In [29]:
[int(one_line.strip())
for one_line in open('nums.txt')]

ValueError: invalid literal for int() with base 10: ''

In [30]:
int()

0

In [31]:
int('10')

10

In [32]:
int('     10      ')

10

In [33]:
int('')

ValueError: invalid literal for int() with base 10: ''

In [37]:
sum([int(one_line)
for one_line in open('nums.txt')
if one_line.strip().isdigit()])

83

In [38]:
s = '123'
s.isdigit()

True

In [39]:
s = ' 123'
s.isdigit()

False

In [40]:
def count_vowels(one_word):
    total = 0
    
    for one_character in one_word:
        if one_character in 'aeiou':
            total += 1
            
    return total

count_vowels('abcde')

2

In [41]:
count_vowels('hello')

2

In [42]:
def count_vowels(one_word):
    return sum([1
                for one_character in one_word
                if one_character in 'aeiou'])

In [43]:
count_vowels('hello')

2

In [44]:
!head linux-etc-passwd.txt

# This is a comment
# You should ignore me
root:x:0:0:root:/root:/bin/bash
daemon:x:1:1:daemon:/usr/sbin:/usr/sbin/nologin
bin:x:2:2:bin:/bin:/usr/sbin/nologin
sys:x:3:3:sys:/dev:/usr/sbin/nologin
sync:x:4:65534:sync:/bin:/bin/sync
games:x:5:60:games:/usr/games:/usr/sbin/nologin
man:x:6:12:man:/var/cache/man:/usr/sbin/nologin
lp:x:7:7:lp:/var/spool/lpd:/usr/sbin/nologin


# Exercise: Usernames + IDs

1. Read from `linux-etc-passwd.txt` in a list comprehension.
2. From each line (except for comments and blank lines), produce a list of two elements -- the username (index 0) and the user ID (index 2).
3. The result will be a list of lists.

In [47]:
[one_line
 for one_line in open('linux-etc-passwd.txt')
 if not one_line.startswith('#') and one_line.strip() ]

['root:x:0:0:root:/root:/bin/bash\n',
 'daemon:x:1:1:daemon:/usr/sbin:/usr/sbin/nologin\n',
 'bin:x:2:2:bin:/bin:/usr/sbin/nologin\n',
 'sys:x:3:3:sys:/dev:/usr/sbin/nologin\n',
 'sync:x:4:65534:sync:/bin:/bin/sync\n',
 'games:x:5:60:games:/usr/games:/usr/sbin/nologin\n',
 'man:x:6:12:man:/var/cache/man:/usr/sbin/nologin\n',
 'lp:x:7:7:lp:/var/spool/lpd:/usr/sbin/nologin\n',
 'mail:x:8:8:mail:/var/mail:/usr/sbin/nologin\n',
 'news:x:9:9:news:/var/spool/news:/usr/sbin/nologin\n',
 'uucp:x:10:10:uucp:/var/spool/uucp:/usr/sbin/nologin\n',
 'proxy:x:13:13:proxy:/bin:/usr/sbin/nologin\n',
 'www-data:x:33:33:www-data:/var/www:/usr/sbin/nologin\n',
 'backup:x:34:34:backup:/var/backups:/usr/sbin/nologin\n',
 'list:x:38:38:Mailing List Manager:/var/list:/usr/sbin/nologin\n',
 'irc:x:39:39:ircd:/var/run/ircd:/usr/sbin/nologin\n',
 'gnats:x:41:41:Gnats Bug-Reporting System (admin):/var/lib/gnats:/usr/sbin/nologin\n',
 'nobody:x:65534:65534:nobody:/nonexistent:/usr/sbin/nologin\n',
 'syslog:x:101:

In [49]:
[[one_line.split(':')[0], one_line.split(':')[2]]
 for one_line in open('linux-etc-passwd.txt')
 if not one_line.startswith(('#', '\n'))]

[['root', '0'],
 ['daemon', '1'],
 ['bin', '2'],
 ['sys', '3'],
 ['sync', '4'],
 ['games', '5'],
 ['man', '6'],
 ['lp', '7'],
 ['mail', '8'],
 ['news', '9'],
 ['uucp', '10'],
 ['proxy', '13'],
 ['www-data', '33'],
 ['backup', '34'],
 ['list', '38'],
 ['irc', '39'],
 ['gnats', '41'],
 ['nobody', '65534'],
 ['syslog', '101'],
 ['messagebus', '102'],
 ['landscape', '103'],
 ['jci', '955'],
 ['sshd', '104'],
 ['user', '1000'],
 ['reuven', '1001'],
 ['postfix', '105'],
 ['colord', '106'],
 ['postgres', '107'],
 ['dovecot', '108'],
 ['dovenull', '109'],
 ['postgrey', '110'],
 ['debian-spamd', '111'],
 ['memcache', '113'],
 ['genadi', '1002'],
 ['shira', '1003'],
 ['atara', '1004'],
 ['shikma', '1005'],
 ['amotz', '1006'],
 ['mysql', '114'],
 ['clamav', '115'],
 ['amavis', '116'],
 ['opendkim', '117'],
 ['gitlab-redis', '999'],
 ['gitlab-psql', '998'],
 ['git', '1007'],
 ['opendmarc', '118'],
 ['dkim-milter-python', '119'],
 ['deploy', '1008'],
 ['redis', '112']]

In [50]:
[one_line.split(':')[0:3:2]      # slice -- [start:end:step]
 for one_line in open('linux-etc-passwd.txt')
 if not one_line.startswith(('#', '\n'))]

[['root', '0'],
 ['daemon', '1'],
 ['bin', '2'],
 ['sys', '3'],
 ['sync', '4'],
 ['games', '5'],
 ['man', '6'],
 ['lp', '7'],
 ['mail', '8'],
 ['news', '9'],
 ['uucp', '10'],
 ['proxy', '13'],
 ['www-data', '33'],
 ['backup', '34'],
 ['list', '38'],
 ['irc', '39'],
 ['gnats', '41'],
 ['nobody', '65534'],
 ['syslog', '101'],
 ['messagebus', '102'],
 ['landscape', '103'],
 ['jci', '955'],
 ['sshd', '104'],
 ['user', '1000'],
 ['reuven', '1001'],
 ['postfix', '105'],
 ['colord', '106'],
 ['postgres', '107'],
 ['dovecot', '108'],
 ['dovenull', '109'],
 ['postgrey', '110'],
 ['debian-spamd', '111'],
 ['memcache', '113'],
 ['genadi', '1002'],
 ['shira', '1003'],
 ['atara', '1004'],
 ['shikma', '1005'],
 ['amotz', '1006'],
 ['mysql', '114'],
 ['clamav', '115'],
 ['amavis', '116'],
 ['opendkim', '117'],
 ['gitlab-redis', '999'],
 ['gitlab-psql', '998'],
 ['git', '1007'],
 ['opendmarc', '118'],
 ['dkim-milter-python', '119'],
 ['deploy', '1008'],
 ['redis', '112']]

In [51]:
dict([one_line.split(':')[0:3:2]   
 for one_line in open('linux-etc-passwd.txt')
 if not one_line.startswith(('#', '\n'))])

{'root': '0',
 'daemon': '1',
 'bin': '2',
 'sys': '3',
 'sync': '4',
 'games': '5',
 'man': '6',
 'lp': '7',
 'mail': '8',
 'news': '9',
 'uucp': '10',
 'proxy': '13',
 'www-data': '33',
 'backup': '34',
 'list': '38',
 'irc': '39',
 'gnats': '41',
 'nobody': '65534',
 'syslog': '101',
 'messagebus': '102',
 'landscape': '103',
 'jci': '955',
 'sshd': '104',
 'user': '1000',
 'reuven': '1001',
 'postfix': '105',
 'colord': '106',
 'postgres': '107',
 'dovecot': '108',
 'dovenull': '109',
 'postgrey': '110',
 'debian-spamd': '111',
 'memcache': '113',
 'genadi': '1002',
 'shira': '1003',
 'atara': '1004',
 'shikma': '1005',
 'amotz': '1006',
 'mysql': '114',
 'clamav': '115',
 'amavis': '116',
 'opendkim': '117',
 'gitlab-redis': '999',
 'gitlab-psql': '998',
 'git': '1007',
 'opendmarc': '118',
 'dkim-milter-python': '119',
 'deploy': '1008',
 'redis': '112'}

In [52]:
import random
random.randint(0, 100)

60

In [53]:
numbers = [random.randint(0, 100)
           for i in range(10)]

In [54]:
numbers

[58, 68, 1, 48, 94, 68, 30, 22, 77, 33]

In [55]:
numbers.sort()     # ideally, don't use this
numbers

[1, 22, 30, 33, 48, 58, 68, 68, 77, 94]

In [56]:
numbers = [58, 68, 1, 48, 94, 68, 30, 22, 77, 33]

In [58]:
sorted(numbers)  # sorted is a builtin function

[1, 22, 30, 33, 48, 58, 68, 68, 77, 94]

In [59]:
sorted(open('nums.txt'))

['\t\t   \t20        \n',
 '\t10     \n',
 '\t20\n',
 '\n',
 '  \t3\n',
 ' 25\n',
 '5\n']

In [60]:
for one_line in open('nums.txt'):
    print(len(one_line), end= ' ')

2 9 4 5 17 1 4 

In [61]:
words = 'This is a bunch of words for my Python course at WDC'.split()
words

['This',
 'is',
 'a',
 'bunch',
 'of',
 'words',
 'for',
 'my',
 'Python',
 'course',
 'at',
 'WDC']

In [62]:
sorted(words)

['Python',
 'This',
 'WDC',
 'a',
 'at',
 'bunch',
 'course',
 'for',
 'is',
 'my',
 'of',
 'words']

In [63]:
ord('a')

97

In [66]:
ord('b')

98

In [67]:
ord('A')

65

In [68]:
ord('B')

66

In [69]:
'Zoo' < 'aaa'

True

In [70]:
sorted([one_word.lower()
         for one_word in words])

['a',
 'at',
 'bunch',
 'course',
 'for',
 'is',
 'my',
 'of',
 'python',
 'this',
 'wdc',
 'words']

In [71]:
# A < B?

# f(A) < f(B)

# sorted - key function

sorted(words, key=str.lower)

['a',
 'at',
 'bunch',
 'course',
 'for',
 'is',
 'my',
 'of',
 'Python',
 'This',
 'WDC',
 'words']

In [75]:
d1 = {'a':1, 'c':2}
d2 = {'a':10, 'b':3}

d1 < d2

TypeError: '<' not supported between instances of 'dict' and 'dict'

# Exercises: Sorting

1. Ask the user to enter a sentence. Sort the words in the sentence by the number of vowels each word contains.
2. Ask the user to enter a sentence. Sort the words by each of the words *backwards* -- that is, first checking the final letter, then the 2nd to last letter, and so on.

In [79]:
words = input('Enter a sentence: ').split()

def by_vowel_count(one_word):
    print(f'Now checking {one_word}')
    total = 0
    
    for one_character in one_word:
        if one_character in 'aeiou':
            total += 1
            
    return total

sorted(words, key=by_vowel_count)

Enter a sentence: once upon a time it was fantastically interesting and wonderful
Now checking once
Now checking upon
Now checking a
Now checking time
Now checking it
Now checking was
Now checking fantastically
Now checking interesting
Now checking and
Now checking wonderful


['a',
 'it',
 'was',
 'and',
 'once',
 'upon',
 'time',
 'wonderful',
 'fantastically',
 'interesting']

In [80]:
words = input('Enter a sentence: ').split()

def by_backwards_word(one_word):
    return one_word[::-1]

sorted(words, key=by_backwards_word)

Enter a sentence: once upon a time it was fantastically interesting and wonderful


['a',
 'and',
 'once',
 'time',
 'interesting',
 'wonderful',
 'upon',
 'was',
 'it',
 'fantastically']

In [82]:
one_word = 'fantastically'

In [83]:
one_word[::-1]   # [START:END:STEPSIZE]   

'yllacitsatnaf'

In [84]:
one_word[8:3:-1]

'citsa'

In [88]:
one_word[-1::-1]

'yllacitsatnaf'

In [91]:
words = input('Enter a sentence: ').split()

def by_backwards_word(one_word):
    output = ''
    for i in range(len(one_word)):
        output += one_word[len(one_word)-1-i]
    return output

sorted(words, key=by_backwards_word)

Enter a sentence: once upon a time it was fantastically interesting and wonderful


['a',
 'and',
 'once',
 'time',
 'interesting',
 'wonderful',
 'upon',
 'was',
 'it',
 'fantastically']

In [90]:
def by_backwards_word(one_word):
    output = ''
    for i in range(len(one_word)):
        output += one_word[len(one_word)-1-i]
    return output

by_backwards_word('abcde')

'edcba'

In [92]:
numbers = [
[random.randint(0, 10) for i in range(5)],
[random.randint(0, 10) for i in range(5)],
[random.randint(0, 10) for i in range(5)],
[random.randint(0, 10) for i in range(5)]
]

In [93]:
numbers

[[7, 9, 6, 0, 6], [7, 1, 8, 1, 3], [9, 2, 8, 0, 1], [0, 3, 4, 10, 0]]

In [94]:
sorted(numbers)

[[0, 3, 4, 10, 0], [7, 1, 8, 1, 3], [7, 9, 6, 0, 6], [9, 2, 8, 0, 1]]

In [95]:
sorted(numbers, key=sum)

[[0, 3, 4, 10, 0], [7, 1, 8, 1, 3], [9, 2, 8, 0, 1], [7, 9, 6, 0, 6]]

In [96]:
sorted(numbers, key=sum, reverse=True)

[[7, 9, 6, 0, 6], [7, 1, 8, 1, 3], [9, 2, 8, 0, 1], [0, 3, 4, 10, 0]]

In [97]:
d1 = {'a':1, 'b':2, 'c':3}
d2 = {'b':20, 'c':30, 'd':1}
d3 = {'a':25, 'b':15, 'c':1}

all_dicts = [d1, d2, d3]

sorted(all_dicts)



TypeError: '<' not supported between instances of 'dict' and 'dict'

In [98]:
def by_b(one_dict):
    return one_dict['b']

sorted(all_dicts, key=by_b)

[{'a': 1, 'b': 2, 'c': 3},
 {'a': 25, 'b': 15, 'c': 1},
 {'b': 20, 'c': 30, 'd': 1}]

In [99]:
def by_value_sums(one_dict):
    return sum(one_dict.values())

sorted(all_dicts, key=by_value_sums)

[{'a': 1, 'b': 2, 'c': 3},
 {'a': 25, 'b': 15, 'c': 1},
 {'b': 20, 'c': 30, 'd': 1}]

# Modules

In [100]:
# DRY -- don't repeat yourself

import random

In [101]:
type(random)

module

In [102]:
random

<module 'random' from '/usr/local/Cellar/python@3.9/3.9.1_8/Frameworks/Python.framework/Versions/3.9/lib/python3.9/random.py'>

In [103]:
import sys

In [104]:
sys.path

['/Users/reuven/Courses/Current/wdc-2021-jan-intro-python',
 '/usr/local/Cellar/python@3.9/3.9.1_8/Frameworks/Python.framework/Versions/3.9/lib/python39.zip',
 '/usr/local/Cellar/python@3.9/3.9.1_8/Frameworks/Python.framework/Versions/3.9/lib/python3.9',
 '/usr/local/Cellar/python@3.9/3.9.1_8/Frameworks/Python.framework/Versions/3.9/lib/python3.9/lib-dynload',
 '',
 '/Users/reuven/Library/Python/3.9/lib/python/site-packages',
 '/usr/local/lib/python3.9/site-packages',
 '/usr/local/Cellar/protobuf/3.14.0/libexec/lib/python3.9/site-packages',
 '/usr/local/lib/python3.9/site-packages/IPython/extensions',
 '/Users/reuven/.ipython']

In [105]:
import asdfaf

ModuleNotFoundError: No module named 'asdfaf'

In [106]:
random.randint(0, 100)

28

In [107]:
dir(random)

['BPF',
 'LOG4',
 'NV_MAGICCONST',
 'RECIP_BPF',
 'Random',
 'SG_MAGICCONST',
 'SystemRandom',
 'TWOPI',
 '_Sequence',
 '_Set',
 '__all__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__',
 '_accumulate',
 '_acos',
 '_bisect',
 '_ceil',
 '_cos',
 '_e',
 '_exp',
 '_floor',
 '_inst',
 '_log',
 '_os',
 '_pi',
 '_random',
 '_repeat',
 '_sha512',
 '_sin',
 '_sqrt',
 '_test',
 '_test_generator',
 '_urandom',
 '_warn',
 'betavariate',
 'choice',
 'choices',
 'expovariate',
 'gammavariate',
 'gauss',
 'getrandbits',
 'getstate',
 'lognormvariate',
 'normalvariate',
 'paretovariate',
 'randbytes',
 'randint',
 'random',
 'randrange',
 'sample',
 'seed',
 'setstate',
 'shuffle',
 'triangular',
 'uniform',
 'vonmisesvariate',
 'weibullvariate']

In [108]:
help(random)

Help on module random:

NAME
    random - Random variable generators.

MODULE REFERENCE
    https://docs.python.org/3.9/library/random
    
    The following documentation is automatically generated from the Python
    source files.  It may be incomplete, incorrect or include features that
    are considered implementation detail and may vary between Python
    implementations.  When in doubt, consult the module reference at the
    location listed above.

DESCRIPTION
        bytes
        -----
               uniform bytes (values between 0 and 255)
    
        integers
        --------
               uniform within range
    
        sequences
        ---------
               pick random element
               pick random sample
               pick weighted random sample
               generate random permutation
    
        distributions on the real line:
        ------------------------------
               uniform
               triangular
               normal (Gaussian)
      

In [109]:
random.randint(0, 100)

5

In [110]:
randint(0, 100)

NameError: name 'randint' is not defined

In [111]:
random['randint']

TypeError: 'module' object is not subscriptable

In [113]:
from random import randint 

In [114]:
'randint' in globals()

True

In [115]:
import mymod

In [116]:
dir(mymod)

['__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__']

In [117]:
# dunder -- double underscore

In [118]:
mymod.__file__

'/Users/reuven/Courses/Current/wdc-2021-jan-intro-python/mymod.py'

In [119]:
mymod.__name__

'mymod'

In [120]:
import mymod as m

In [121]:
m

<module 'mymod' from '/Users/reuven/Courses/Current/wdc-2021-jan-intro-python/mymod.py'>

In [122]:
import mymod

In [123]:
dir(mymod)

['__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__']

In [125]:
import importlib
importlib.reload(mymod)

<module 'mymod' from '/Users/reuven/Courses/Current/wdc-2021-jan-intro-python/mymod.py'>

In [126]:
dir(mymod)

['__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__',
 'hello',
 'x',
 'y',
 'z']

In [127]:
mymod.x

100

In [128]:
mymod.y

[10, 20, 30]

In [129]:
mymod.z

{'a': 1, 'b': 2, 'c': 3}

In [130]:
mymod.hello('world')

'Hello, world, from mymod!'