# Agenda

1. Comprehensions
2. Sorting (and key functions)
3. Modules

In [1]:
numbers = range(10)

output = []

for one_number in numbers:
    output.append(one_number ** 2)
    
output

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

In [3]:
output = [one_number ** 2 for one_number in numbers]

In [4]:
output

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

In [5]:
[one_number ** 2             # expression -- SELECT
 for one_number in numbers]  # iteration  -- FROM 

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

In [6]:
mylist = ['abcd', 'efghi', 'jk']

'*'.join(mylist)

'abcd*efghi*jk'

In [8]:
mylist = [10, 20, 30]

'*'.join(mylist)  # can't run str.join on a list of ints, just a list of strings

TypeError: sequence item 0: expected str instance, int found

In [9]:
mylist = [10, 20, 30]

'*'.join([str(one_item)
          for one_item in mylist])

'10*20*30'

# Exercises: Comprehensions

1. Ask the user to enter a sentence. Using a list comprehension and `str.capitalize`, print the input with every word capitalized. (Just like `str.title` would do, but without using it.)

2. Ask the user to enter numbers, separated by spaces. (We will get a single string.) Use a comprehension to sum the numbers. You can use the builtin `sum` function, which expects to get a list of integers.

In [10]:
s = 'this is a test sentence'
s.capitalize()

'This is a test sentence'

In [11]:
s.title()

'This Is A Test Sentence'

In [12]:
s = input('Enter a sentence: ').strip()

print(s.title())

Enter a sentence: this is a test
This Is A Test


In [16]:
' '.join([one_word.capitalize()
          for one_word in s.split()])

'This Is A Test'

In [19]:
s = input('Enter some numbers: ').strip()

sum([int(one_item)
     for one_item in s.split()])

Enter some numbers: 10 20 30


60

In [24]:
[one_line.split(':')[0]              # expression  -- SELECT
 for one_line in open('/etc/passwd') # iteration   -- FROM
 if not one_line.startswith('#')]    # condition   -- WHERE

['nobody',
 'root',
 'daemon',
 '_uucp',
 '_taskgated',
 '_networkd',
 '_installassistant',
 '_lp',
 '_postfix',
 '_scsd',
 '_ces',
 '_appstore',
 '_mcxalr',
 '_appleevents',
 '_geod',
 '_devdocs',
 '_sandbox',
 '_mdnsresponder',
 '_ard',
 '_www',
 '_eppc',
 '_cvs',
 '_svn',
 '_mysql',
 '_sshd',
 '_qtss',
 '_cyrus',
 '_mailman',
 '_appserver',
 '_clamav',
 '_amavisd',
 '_jabber',
 '_appowner',
 '_windowserver',
 '_spotlight',
 '_tokend',
 '_securityagent',
 '_calendar',
 '_teamsserver',
 '_update_sharing',
 '_installer',
 '_atsserver',
 '_ftp',
 '_unknown',
 '_softwareupdate',
 '_coreaudiod',
 '_screensaver',
 '_locationd',
 '_trustevaluationagent',
 '_timezone',
 '_lda',
 '_cvmsroot',
 '_usbmuxd',
 '_dovecot',
 '_dpaudio',
 '_postgres',
 '_krbtgt',
 '_kadmin_admin',
 '_kadmin_changepw',
 '_devicemgr',
 '_webauthserver',
 '_netbios',
 '_warmd',
 '_dovenull',
 '_netstatistics',
 '_avbdeviced',
 '_krb_krbtgt',
 '_krb_kadmin',
 '_krb_changepw',
 '_krb_kerberos',
 '_krb_anonymous',
 '_asse

In [25]:
!ls *.txt

linux-etc-passwd.txt  myconfig.txt  myfile2.txt  outfile.txt	wcfile.txt
mini-access-log.txt   myfile.txt    nums.txt	 shoe-data.txt


In [26]:
!cat nums.txt

5
	10     
	20
  	3
		   	20        

 25


# Exercise: Sum numbers

Read from `nums.txt` in to a list comprehension, and give the comprehension to `sum` in order to sum the numbers.

In [27]:
[one_line
for one_line in open('nums.txt')]

['5\n',
 '\t10     \n',
 '\t20\n',
 '  \t3\n',
 '\t\t   \t20        \n',
 '\n',
 ' 25\n']

In [28]:
[one_line.strip()
for one_line in open('nums.txt')]

['5', '10', '20', '3', '20', '', '25']

In [29]:
[int(one_line.strip())
for one_line in open('nums.txt')]

ValueError: invalid literal for int() with base 10: ''

In [30]:
int()

0

In [31]:
int('10')

10

In [32]:
int('     10      ')

10

In [33]:
int('')

ValueError: invalid literal for int() with base 10: ''

In [37]:
sum([int(one_line)
for one_line in open('nums.txt')
if one_line.strip().isdigit()])

83

In [38]:
s = '123'
s.isdigit()

True

In [39]:
s = ' 123'
s.isdigit()

False

In [40]:
def count_vowels(one_word):
    total = 0
    
    for one_character in one_word:
        if one_character in 'aeiou':
            total += 1
            
    return total

count_vowels('abcde')

2

In [41]:
count_vowels('hello')

2

In [42]:
def count_vowels(one_word):
    return sum([1
                for one_character in one_word
                if one_character in 'aeiou'])

In [43]:
count_vowels('hello')

2

In [44]:
!head linux-etc-passwd.txt

# This is a comment
# You should ignore me
root:x:0:0:root:/root:/bin/bash
daemon:x:1:1:daemon:/usr/sbin:/usr/sbin/nologin
bin:x:2:2:bin:/bin:/usr/sbin/nologin
sys:x:3:3:sys:/dev:/usr/sbin/nologin
sync:x:4:65534:sync:/bin:/bin/sync
games:x:5:60:games:/usr/games:/usr/sbin/nologin
man:x:6:12:man:/var/cache/man:/usr/sbin/nologin
lp:x:7:7:lp:/var/spool/lpd:/usr/sbin/nologin


# Exercise: Usernames + IDs

1. Read from `linux-etc-passwd.txt` in a list comprehension.
2. From each line (except for comments and blank lines), produce a list of two elements -- the username (index 0) and the user ID (index 2).
3. The result will be a list of lists.

In [47]:
[one_line
 for one_line in open('linux-etc-passwd.txt')
 if not one_line.startswith('#') and one_line.strip() ]

['root:x:0:0:root:/root:/bin/bash\n',
 'daemon:x:1:1:daemon:/usr/sbin:/usr/sbin/nologin\n',
 'bin:x:2:2:bin:/bin:/usr/sbin/nologin\n',
 'sys:x:3:3:sys:/dev:/usr/sbin/nologin\n',
 'sync:x:4:65534:sync:/bin:/bin/sync\n',
 'games:x:5:60:games:/usr/games:/usr/sbin/nologin\n',
 'man:x:6:12:man:/var/cache/man:/usr/sbin/nologin\n',
 'lp:x:7:7:lp:/var/spool/lpd:/usr/sbin/nologin\n',
 'mail:x:8:8:mail:/var/mail:/usr/sbin/nologin\n',
 'news:x:9:9:news:/var/spool/news:/usr/sbin/nologin\n',
 'uucp:x:10:10:uucp:/var/spool/uucp:/usr/sbin/nologin\n',
 'proxy:x:13:13:proxy:/bin:/usr/sbin/nologin\n',
 'www-data:x:33:33:www-data:/var/www:/usr/sbin/nologin\n',
 'backup:x:34:34:backup:/var/backups:/usr/sbin/nologin\n',
 'list:x:38:38:Mailing List Manager:/var/list:/usr/sbin/nologin\n',
 'irc:x:39:39:ircd:/var/run/ircd:/usr/sbin/nologin\n',
 'gnats:x:41:41:Gnats Bug-Reporting System (admin):/var/lib/gnats:/usr/sbin/nologin\n',
 'nobody:x:65534:65534:nobody:/nonexistent:/usr/sbin/nologin\n',
 'syslog:x:101:

In [49]:
[[one_line.split(':')[0], one_line.split(':')[2]]
 for one_line in open('linux-etc-passwd.txt')
 if not one_line.startswith(('#', '\n'))]

[['root', '0'],
 ['daemon', '1'],
 ['bin', '2'],
 ['sys', '3'],
 ['sync', '4'],
 ['games', '5'],
 ['man', '6'],
 ['lp', '7'],
 ['mail', '8'],
 ['news', '9'],
 ['uucp', '10'],
 ['proxy', '13'],
 ['www-data', '33'],
 ['backup', '34'],
 ['list', '38'],
 ['irc', '39'],
 ['gnats', '41'],
 ['nobody', '65534'],
 ['syslog', '101'],
 ['messagebus', '102'],
 ['landscape', '103'],
 ['jci', '955'],
 ['sshd', '104'],
 ['user', '1000'],
 ['reuven', '1001'],
 ['postfix', '105'],
 ['colord', '106'],
 ['postgres', '107'],
 ['dovecot', '108'],
 ['dovenull', '109'],
 ['postgrey', '110'],
 ['debian-spamd', '111'],
 ['memcache', '113'],
 ['genadi', '1002'],
 ['shira', '1003'],
 ['atara', '1004'],
 ['shikma', '1005'],
 ['amotz', '1006'],
 ['mysql', '114'],
 ['clamav', '115'],
 ['amavis', '116'],
 ['opendkim', '117'],
 ['gitlab-redis', '999'],
 ['gitlab-psql', '998'],
 ['git', '1007'],
 ['opendmarc', '118'],
 ['dkim-milter-python', '119'],
 ['deploy', '1008'],
 ['redis', '112']]

In [50]:
[one_line.split(':')[0:3:2]      # slice -- [start:end:step]
 for one_line in open('linux-etc-passwd.txt')
 if not one_line.startswith(('#', '\n'))]

[['root', '0'],
 ['daemon', '1'],
 ['bin', '2'],
 ['sys', '3'],
 ['sync', '4'],
 ['games', '5'],
 ['man', '6'],
 ['lp', '7'],
 ['mail', '8'],
 ['news', '9'],
 ['uucp', '10'],
 ['proxy', '13'],
 ['www-data', '33'],
 ['backup', '34'],
 ['list', '38'],
 ['irc', '39'],
 ['gnats', '41'],
 ['nobody', '65534'],
 ['syslog', '101'],
 ['messagebus', '102'],
 ['landscape', '103'],
 ['jci', '955'],
 ['sshd', '104'],
 ['user', '1000'],
 ['reuven', '1001'],
 ['postfix', '105'],
 ['colord', '106'],
 ['postgres', '107'],
 ['dovecot', '108'],
 ['dovenull', '109'],
 ['postgrey', '110'],
 ['debian-spamd', '111'],
 ['memcache', '113'],
 ['genadi', '1002'],
 ['shira', '1003'],
 ['atara', '1004'],
 ['shikma', '1005'],
 ['amotz', '1006'],
 ['mysql', '114'],
 ['clamav', '115'],
 ['amavis', '116'],
 ['opendkim', '117'],
 ['gitlab-redis', '999'],
 ['gitlab-psql', '998'],
 ['git', '1007'],
 ['opendmarc', '118'],
 ['dkim-milter-python', '119'],
 ['deploy', '1008'],
 ['redis', '112']]

In [51]:
dict([one_line.split(':')[0:3:2]   
 for one_line in open('linux-etc-passwd.txt')
 if not one_line.startswith(('#', '\n'))])

{'root': '0',
 'daemon': '1',
 'bin': '2',
 'sys': '3',
 'sync': '4',
 'games': '5',
 'man': '6',
 'lp': '7',
 'mail': '8',
 'news': '9',
 'uucp': '10',
 'proxy': '13',
 'www-data': '33',
 'backup': '34',
 'list': '38',
 'irc': '39',
 'gnats': '41',
 'nobody': '65534',
 'syslog': '101',
 'messagebus': '102',
 'landscape': '103',
 'jci': '955',
 'sshd': '104',
 'user': '1000',
 'reuven': '1001',
 'postfix': '105',
 'colord': '106',
 'postgres': '107',
 'dovecot': '108',
 'dovenull': '109',
 'postgrey': '110',
 'debian-spamd': '111',
 'memcache': '113',
 'genadi': '1002',
 'shira': '1003',
 'atara': '1004',
 'shikma': '1005',
 'amotz': '1006',
 'mysql': '114',
 'clamav': '115',
 'amavis': '116',
 'opendkim': '117',
 'gitlab-redis': '999',
 'gitlab-psql': '998',
 'git': '1007',
 'opendmarc': '118',
 'dkim-milter-python': '119',
 'deploy': '1008',
 'redis': '112'}

In [52]:
import random
random.randint(0, 100)

60

In [53]:
numbers = [random.randint(0, 100)
           for i in range(10)]

In [54]:
numbers

[58, 68, 1, 48, 94, 68, 30, 22, 77, 33]

In [55]:
numbers.sort()     # ideally, don't use this
numbers

[1, 22, 30, 33, 48, 58, 68, 68, 77, 94]

In [56]:
numbers = [58, 68, 1, 48, 94, 68, 30, 22, 77, 33]

In [58]:
sorted(numbers)  # sorted is a builtin function

[1, 22, 30, 33, 48, 58, 68, 68, 77, 94]

In [59]:
sorted(open('nums.txt'))

['\t\t   \t20        \n',
 '\t10     \n',
 '\t20\n',
 '\n',
 '  \t3\n',
 ' 25\n',
 '5\n']

In [60]:
for one_line in open('nums.txt'):
    print(len(one_line), end= ' ')

2 9 4 5 17 1 4 

In [61]:
words = 'This is a bunch of words for my Python course at WDC'.split()
words

['This',
 'is',
 'a',
 'bunch',
 'of',
 'words',
 'for',
 'my',
 'Python',
 'course',
 'at',
 'WDC']

In [62]:
sorted(words)

['Python',
 'This',
 'WDC',
 'a',
 'at',
 'bunch',
 'course',
 'for',
 'is',
 'my',
 'of',
 'words']

In [63]:
ord('a')

97

In [66]:
ord('b')

98

In [67]:
ord('A')

65

In [68]:
ord('B')

66

In [69]:
'Zoo' < 'aaa'

True

In [70]:
sorted([one_word.lower()
         for one_word in words])

['a',
 'at',
 'bunch',
 'course',
 'for',
 'is',
 'my',
 'of',
 'python',
 'this',
 'wdc',
 'words']

In [71]:
# A < B?

# f(A) < f(B)

# sorted - key function

sorted(words, key=str.lower)

['a',
 'at',
 'bunch',
 'course',
 'for',
 'is',
 'my',
 'of',
 'Python',
 'This',
 'WDC',
 'words']