# Agenda

1. List comprehensions
2. Dict comprehensions
3. Set comprehensions
4. Nested comprehensions
4. `lambda`

In [1]:
numbers = list(range(10))

numbers

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [2]:
output = []

for one_number in numbers:
    output.append(one_number ** 2)
    
output

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

In [3]:
# list comprehensions

[one_number ** 2              # expression  -- SELECT
 for one_number in numbers]   # iteration   -- FROM 

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

In [4]:
mylist = ['abcd', 'efgh', 'ijkl']

'*'.join(mylist)

'abcd*efgh*ijkl'

In [5]:
'___'.join(mylist)

'abcd___efgh___ijkl'

In [6]:
mylist = [10, 20, 30]

'*'.join(mylist)

TypeError: sequence item 0: expected str instance, int found

In [8]:
[str(one_item)
 for one_item in mylist]

['10', '20', '30']

In [9]:
'*'.join([str(one_item)
          for one_item in mylist])

'10*20*30'

In [12]:
s = 'abcd'

[f'{index}: {one_item}'
 for index, one_item in enumerate(s)]

['0: a', '1: b', '2: c', '3: d']

In [13]:
d = {'a':1, 'b':2, 'c':3}

[f'{key}: {value}'
for key, value in d.items()]

['a: 1', 'b: 2', 'c: 3']

In [14]:
[key * value
for key, value in d.items()]

['a', 'bb', 'ccc']

# Exercises: Comprehensions

1. Ask the user to enter a string with integers separated by spaces. (Example: `'10 20 30'`) Use a list comprehension to take this string and sum the numbers. You may use `sum` to calculate this.
2. Ask the user to enter a string. Use a list comprehension and `str.capitalize` to get the same result as you would from `str.title`.

In [16]:
s = 'this is a test'

s.capitalize()  # returns a string -- all lowercase, except the first 

'This is a test'

In [17]:
s.title()  # returns a string -- all lowercase, except the first letter in each word

'This Is A Test'

In [20]:
s = input('Enter numbers: ')

sum([int(one_item)
 for one_item in s.split()])

Enter numbers: 10 20 30


60

In [25]:
s = input('Enter a sentence: ')

' '.join([one_word.capitalize()
          for one_word in s.split()])

Enter a sentence: this is a test


'This Is A Test'

In [29]:
[one_line.split(':')[0]                 # expression -- SELECT
 for one_line in open('/etc/passwd')    # iteration -- FROM
 if not one_lpeine.startswith("#")]     # condition -- WHERE

['nobody',
 'root',
 'daemon',
 '_uucp',
 '_taskgated',
 '_networkd',
 '_installassistant',
 '_lp',
 '_postfix',
 '_scsd',
 '_ces',
 '_appstore',
 '_mcxalr',
 '_appleevents',
 '_geod',
 '_devdocs',
 '_sandbox',
 '_mdnsresponder',
 '_ard',
 '_www',
 '_eppc',
 '_cvs',
 '_svn',
 '_mysql',
 '_sshd',
 '_qtss',
 '_cyrus',
 '_mailman',
 '_appserver',
 '_clamav',
 '_amavisd',
 '_jabber',
 '_appowner',
 '_windowserver',
 '_spotlight',
 '_tokend',
 '_securityagent',
 '_calendar',
 '_teamsserver',
 '_update_sharing',
 '_installer',
 '_atsserver',
 '_ftp',
 '_unknown',
 '_softwareupdate',
 '_coreaudiod',
 '_screensaver',
 '_locationd',
 '_trustevaluationagent',
 '_timezone',
 '_lda',
 '_cvmsroot',
 '_usbmuxd',
 '_dovecot',
 '_dpaudio',
 '_postgres',
 '_krbtgt',
 '_kadmin_admin',
 '_kadmin_changepw',
 '_devicemgr',
 '_webauthserver',
 '_netbios',
 '_warmd',
 '_dovenull',
 '_netstatistics',
 '_avbdeviced',
 '_krb_krbtgt',
 '_krb_kadmin',
 '_krb_changepw',
 '_krb_kerberos',
 '_krb_anonymous',
 '_asse

In [30]:
!ls *.txt

config.txt   infile2.txt  linux-etc-passwd.txt	outfile.txt
infile0.txt  infile3.txt  mini-access-log.txt	shoe-data.txt
infile1.txt  infile4.txt  nums.txt


In [31]:
!cat nums.txt

5
	10     
	20
  	3
		   	20        

 25


# Exercise: `nums.txt`

Read from `nums.txt`, and sum the numbers that are there using a comprehension.

In [42]:
sum([int(one_line)
for one_line in open('nums.txt')
if one_line.strip().isdigit() ])

83

In [35]:
int('1')

1

In [36]:
int('   1    ')

1

In [38]:
int('   ')

ValueError: invalid literal for int() with base 10: '   '

In [44]:
s = '123'
s.isdigit()

True

In [45]:
s = '一二三'
s.isdigit()

False

In [46]:
s.isnumeric()

True

In [47]:
!head shoe-data.txt

Adidas	orange	43
Nike	black	41
Adidas	black	39
New Balance	pink	41
Nike	white	44
New Balance	orange	38
Nike	pink	44
Adidas	pink	44
New Balance	orange	39
New Balance	black	43


# Exercise: Shoe dicts

1. Read from `shoe-data.txt` with a list comprehension
2. Turn each row (separated by `'\t'` characters) into a dict.
3. Each dict should look like this: `{'brand':'Adidas', 'color':'orange', 'size':'43'}`
4. I suggest that you write a function (`line_to_dict`) that will be called by the comprehension once for each line in the file, and which returns a dict of the form we want.

In [50]:
def line_to_dict(one_line):
    brand, color, size = one_line.strip().split('\t')
    return {'brand':brand,
           'color':color,
           'size':size}

[line_to_dict(one_line)
 for one_line in open('shoe-data.txt')]

[{'brand': 'Adidas', 'color': 'orange', 'size': '43\n'},
 {'brand': 'Nike', 'color': 'black', 'size': '41\n'},
 {'brand': 'Adidas', 'color': 'black', 'size': '39\n'},
 {'brand': 'New Balance', 'color': 'pink', 'size': '41\n'},
 {'brand': 'Nike', 'color': 'white', 'size': '44\n'},
 {'brand': 'New Balance', 'color': 'orange', 'size': '38\n'},
 {'brand': 'Nike', 'color': 'pink', 'size': '44\n'},
 {'brand': 'Adidas', 'color': 'pink', 'size': '44\n'},
 {'brand': 'New Balance', 'color': 'orange', 'size': '39\n'},
 {'brand': 'New Balance', 'color': 'black', 'size': '43\n'},
 {'brand': 'New Balance', 'color': 'orange', 'size': '44\n'},
 {'brand': 'Nike', 'color': 'black', 'size': '41\n'},
 {'brand': 'Adidas', 'color': 'orange', 'size': '37\n'},
 {'brand': 'Adidas', 'color': 'black', 'size': '38\n'},
 {'brand': 'Adidas', 'color': 'pink', 'size': '41\n'},
 {'brand': 'Adidas', 'color': 'white', 'size': '36\n'},
 {'brand': 'Adidas', 'color': 'orange', 'size': '36\n'},
 {'brand': 'Nike', 'color': '

In [54]:
def line_to_dict(one_line):
    return dict(zip(['brand', 'color', 'size'],
                    one_line.strip().split('\t')))

[line_to_dict(one_line)
 for one_line in open('shoe-data.txt')]

[{'brand': 'Adidas', 'color': 'orange', 'size': '43'},
 {'brand': 'Nike', 'color': 'black', 'size': '41'},
 {'brand': 'Adidas', 'color': 'black', 'size': '39'},
 {'brand': 'New Balance', 'color': 'pink', 'size': '41'},
 {'brand': 'Nike', 'color': 'white', 'size': '44'},
 {'brand': 'New Balance', 'color': 'orange', 'size': '38'},
 {'brand': 'Nike', 'color': 'pink', 'size': '44'},
 {'brand': 'Adidas', 'color': 'pink', 'size': '44'},
 {'brand': 'New Balance', 'color': 'orange', 'size': '39'},
 {'brand': 'New Balance', 'color': 'black', 'size': '43'},
 {'brand': 'New Balance', 'color': 'orange', 'size': '44'},
 {'brand': 'Nike', 'color': 'black', 'size': '41'},
 {'brand': 'Adidas', 'color': 'orange', 'size': '37'},
 {'brand': 'Adidas', 'color': 'black', 'size': '38'},
 {'brand': 'Adidas', 'color': 'pink', 'size': '41'},
 {'brand': 'Adidas', 'color': 'white', 'size': '36'},
 {'brand': 'Adidas', 'color': 'orange', 'size': '36'},
 {'brand': 'Nike', 'color': 'pink', 'size': '41'},
 {'brand': '

In [53]:
dict(zip('abc', [10, 20, 30]))

{'a': 10, 'b': 20, 'c': 30}

In [58]:
# list of lists with a comprehension
[[one_line.split(':')[0], one_line.split(':')[2]]
for one_line in open('/etc/passwd')
if not one_line.startswith('#')]

[['nobody', '-2'],
 ['root', '0'],
 ['daemon', '1'],
 ['_uucp', '4'],
 ['_taskgated', '13'],
 ['_networkd', '24'],
 ['_installassistant', '25'],
 ['_lp', '26'],
 ['_postfix', '27'],
 ['_scsd', '31'],
 ['_ces', '32'],
 ['_appstore', '33'],
 ['_mcxalr', '54'],
 ['_appleevents', '55'],
 ['_geod', '56'],
 ['_devdocs', '59'],
 ['_sandbox', '60'],
 ['_mdnsresponder', '65'],
 ['_ard', '67'],
 ['_www', '70'],
 ['_eppc', '71'],
 ['_cvs', '72'],
 ['_svn', '73'],
 ['_mysql', '74'],
 ['_sshd', '75'],
 ['_qtss', '76'],
 ['_cyrus', '77'],
 ['_mailman', '78'],
 ['_appserver', '79'],
 ['_clamav', '82'],
 ['_amavisd', '83'],
 ['_jabber', '84'],
 ['_appowner', '87'],
 ['_windowserver', '88'],
 ['_spotlight', '89'],
 ['_tokend', '91'],
 ['_securityagent', '92'],
 ['_calendar', '93'],
 ['_teamsserver', '94'],
 ['_update_sharing', '95'],
 ['_installer', '96'],
 ['_atsserver', '97'],
 ['_ftp', '98'],
 ['_unknown', '99'],
 ['_softwareupdate', '200'],
 ['_coreaudiod', '202'],
 ['_screensaver', '203'],
 ['_loc

In [59]:
# get a dict from our list of lists with a comprehension
dict([[one_line.split(':')[0], one_line.split(':')[2]]
for one_line in open('/etc/passwd')
if not one_line.startswith('#')])

{'nobody': '-2',
 'root': '0',
 'daemon': '1',
 '_uucp': '4',
 '_taskgated': '13',
 '_networkd': '24',
 '_installassistant': '25',
 '_lp': '26',
 '_postfix': '27',
 '_scsd': '31',
 '_ces': '32',
 '_appstore': '33',
 '_mcxalr': '54',
 '_appleevents': '55',
 '_geod': '56',
 '_devdocs': '59',
 '_sandbox': '60',
 '_mdnsresponder': '65',
 '_ard': '67',
 '_www': '70',
 '_eppc': '71',
 '_cvs': '72',
 '_svn': '73',
 '_mysql': '74',
 '_sshd': '75',
 '_qtss': '76',
 '_cyrus': '77',
 '_mailman': '78',
 '_appserver': '79',
 '_clamav': '82',
 '_amavisd': '83',
 '_jabber': '84',
 '_appowner': '87',
 '_windowserver': '88',
 '_spotlight': '89',
 '_tokend': '91',
 '_securityagent': '92',
 '_calendar': '93',
 '_teamsserver': '94',
 '_update_sharing': '95',
 '_installer': '96',
 '_atsserver': '97',
 '_ftp': '98',
 '_unknown': '99',
 '_softwareupdate': '200',
 '_coreaudiod': '202',
 '_screensaver': '203',
 '_locationd': '205',
 '_trustevaluationagent': '208',
 '_timezone': '210',
 '_lda': '211',
 '_cvmsro

In [62]:
# dict comprehension 

#     key                         value
{ one_line.split(':')[0] :  one_line.split(':')[2]
for one_line in open('/etc/passwd')
if not one_line.startswith('#') }

{'nobody': '-2',
 'root': '0',
 'daemon': '1',
 '_uucp': '4',
 '_taskgated': '13',
 '_networkd': '24',
 '_installassistant': '25',
 '_lp': '26',
 '_postfix': '27',
 '_scsd': '31',
 '_ces': '32',
 '_appstore': '33',
 '_mcxalr': '54',
 '_appleevents': '55',
 '_geod': '56',
 '_devdocs': '59',
 '_sandbox': '60',
 '_mdnsresponder': '65',
 '_ard': '67',
 '_www': '70',
 '_eppc': '71',
 '_cvs': '72',
 '_svn': '73',
 '_mysql': '74',
 '_sshd': '75',
 '_qtss': '76',
 '_cyrus': '77',
 '_mailman': '78',
 '_appserver': '79',
 '_clamav': '82',
 '_amavisd': '83',
 '_jabber': '84',
 '_appowner': '87',
 '_windowserver': '88',
 '_spotlight': '89',
 '_tokend': '91',
 '_securityagent': '92',
 '_calendar': '93',
 '_teamsserver': '94',
 '_update_sharing': '95',
 '_installer': '96',
 '_atsserver': '97',
 '_ftp': '98',
 '_unknown': '99',
 '_softwareupdate': '200',
 '_coreaudiod': '202',
 '_screensaver': '203',
 '_locationd': '205',
 '_trustevaluationagent': '208',
 '_timezone': '210',
 '_lda': '211',
 '_cvmsro

In [63]:
!ls *.txt

config.txt   infile2.txt  linux-etc-passwd.txt	outfile.txt
infile0.txt  infile3.txt  mini-access-log.txt	shoe-data.txt
infile1.txt  infile4.txt  nums.txt


# Exercise: Words and lengths

1. Ask the user to enter a sentence.
2. Use a dict comprehension to turn the input into a dict, where the words are keys and the values are the lengths of the words.

In [65]:
s = input('Enter a sentence: ')



Enter a sentence: this is a very interesting sentence


In [66]:
s

'this is a very interesting sentence'

In [67]:
s.split()

['this', 'is', 'a', 'very', 'interesting', 'sentence']

In [69]:
{one_word : len(one_word)
 for one_word in s.split()}

{'this': 4, 'is': 2, 'a': 1, 'very': 4, 'interesting': 11, 'sentence': 8}

In [72]:
s = input('Enter numbers: ')

sum([int(one_number)
 for one_number in s.split()])

Enter numbers: 10 20 30


60

In [73]:
s = input('Enter numbers: ')

sum([int(one_number)
 for one_number in s.split()])

Enter numbers: 10 20 30 10 20 30


120

In [74]:
s = input('Enter numbers: ')

sum(set([int(one_number)
 for one_number in s.split()]))

Enter numbers: 10 20 30 10 20 30


60

In [75]:
s = input('Enter numbers: ')

sum({int(one_number)
 for one_number in s.split()})

Enter numbers: 10 20 30


60

In [76]:
# set comprehension
{ x**2
 for x in range(-5, 5)
    
}

{0, 1, 4, 9, 16, 25}

In [77]:
# list comprehension
[ x**2
 for x in range(-5, 5)
    
]

[25, 16, 9, 4, 1, 0, 1, 4, 9, 16]

In [78]:
# dict comprehension
{ x: x**2
 for x in range(-5, 5)
    
}

{-5: 25, -4: 16, -3: 9, -2: 4, -1: 1, 0: 0, 1: 1, 2: 4, 3: 9, 4: 16}

In [79]:
!cat linux-etc-passwd.txt

# This is a comment
# You should ignore me
root:x:0:0:root:/root:/bin/bash
daemon:x:1:1:daemon:/usr/sbin:/usr/sbin/nologin
bin:x:2:2:bin:/bin:/usr/sbin/nologin
sys:x:3:3:sys:/dev:/usr/sbin/nologin
sync:x:4:65534:sync:/bin:/bin/sync
games:x:5:60:games:/usr/games:/usr/sbin/nologin
man:x:6:12:man:/var/cache/man:/usr/sbin/nologin
lp:x:7:7:lp:/var/spool/lpd:/usr/sbin/nologin
mail:x:8:8:mail:/var/mail:/usr/sbin/nologin



news:x:9:9:news:/var/spool/news:/usr/sbin/nologin
uucp:x:10:10:uucp:/var/spool/uucp:/usr/sbin/nologin
proxy:x:13:13:proxy:/bin:/usr/sbin/nologin
www-data:x:33:33:www-data:/var/www:/usr/sbin/nologin
backup:x:34:34:backup:/var/backups:/usr/sbin/nologin
list:x:38:38:Mailing List Manager:/var/list:/usr/sbin/nologin
irc:x:39:39:ircd:/var/run/ircd:/usr/sbin/nologin
gnats:x:41:41:Gnats Bug-Reporting System (admin):/var/lib/gnats:/usr/sbin/nologin

nobody:x:65534:65534:nobody:/nonexistent:/usr/sbin/nologin
syslog:x:101:104::/home/syslog:/bin/false
messagebu

# Exercise: Different shells

1. Use a set comprehension to read from `linux-etc-passwd.txt`.
2. Ignore lines that start with `#` or are empty.
3. The returned set should contain all of the different shells on the system. The shell is the *final* field in each record.

In [82]:
[one_line
 for one_line in open('linux-etc-passwd.txt')
 if not one_line.startswith('#') and one_line.strip()]

['root:x:0:0:root:/root:/bin/bash\n',
 'daemon:x:1:1:daemon:/usr/sbin:/usr/sbin/nologin\n',
 'bin:x:2:2:bin:/bin:/usr/sbin/nologin\n',
 'sys:x:3:3:sys:/dev:/usr/sbin/nologin\n',
 'sync:x:4:65534:sync:/bin:/bin/sync\n',
 'games:x:5:60:games:/usr/games:/usr/sbin/nologin\n',
 'man:x:6:12:man:/var/cache/man:/usr/sbin/nologin\n',
 'lp:x:7:7:lp:/var/spool/lpd:/usr/sbin/nologin\n',
 'mail:x:8:8:mail:/var/mail:/usr/sbin/nologin\n',
 'news:x:9:9:news:/var/spool/news:/usr/sbin/nologin\n',
 'uucp:x:10:10:uucp:/var/spool/uucp:/usr/sbin/nologin\n',
 'proxy:x:13:13:proxy:/bin:/usr/sbin/nologin\n',
 'www-data:x:33:33:www-data:/var/www:/usr/sbin/nologin\n',
 'backup:x:34:34:backup:/var/backups:/usr/sbin/nologin\n',
 'list:x:38:38:Mailing List Manager:/var/list:/usr/sbin/nologin\n',
 'irc:x:39:39:ircd:/var/run/ircd:/usr/sbin/nologin\n',
 'gnats:x:41:41:Gnats Bug-Reporting System (admin):/var/lib/gnats:/usr/sbin/nologin\n',
 'nobody:x:65534:65534:nobody:/nonexistent:/usr/sbin/nologin\n',
 'syslog:x:101:

In [86]:
{one_line.split(':')[-1].strip()
 for one_line in open('linux-etc-passwd.txt')
 if not one_line.startswith(('#', '\n'))}

{'/bin/bash',
 '/bin/false',
 '/bin/nologin',
 '/bin/sh',
 '/bin/sync',
 '/usr/sbin/nologin'}

In [87]:
!head mini-access-log.txt

67.218.116.165 - - [30/Jan/2010:00:03:18 +0200] "GET /robots.txt HTTP/1.0" 200 99 "-" "Mozilla/5.0 (Twiceler-0.9 http://www.cuil.com/twiceler/robot.html)"
66.249.71.65 - - [30/Jan/2010:00:12:06 +0200] "GET /browse/one_node/1557 HTTP/1.1" 200 39208 "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
65.55.106.183 - - [30/Jan/2010:01:29:23 +0200] "GET /robots.txt HTTP/1.1" 200 99 "-" "msnbot/2.0b (+http://search.msn.com/msnbot.htm)"
65.55.106.183 - - [30/Jan/2010:01:30:06 +0200] "GET /browse/one_model/2162 HTTP/1.1" 200 2181 "-" "msnbot/2.0b (+http://search.msn.com/msnbot.htm)"
66.249.71.65 - - [30/Jan/2010:02:07:14 +0200] "GET /browse/browse_applet_tab/2593 HTTP/1.1" 200 10305 "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
66.249.71.65 - - [30/Jan/2010:02:10:39 +0200] "GET /browse/browse_files_tab/2499?tab=true HTTP/1.1" 200 446 "-" "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
66.249.65.12 - -

In [88]:
# what IP addresses are in mini-access-log.txt?

[one_line.split()[0]
for one_line in open('mini-access-log.txt')]

['67.218.116.165',
 '66.249.71.65',
 '65.55.106.183',
 '65.55.106.183',
 '66.249.71.65',
 '66.249.71.65',
 '66.249.65.12',
 '66.249.65.12',
 '66.249.65.12',
 '66.249.65.12',
 '66.249.65.12',
 '65.55.106.131',
 '65.55.106.131',
 '66.249.65.12',
 '66.249.65.12',
 '66.249.65.12',
 '66.249.65.12',
 '66.249.65.12',
 '66.249.65.12',
 '66.249.65.12',
 '65.55.106.186',
 '65.55.106.186',
 '66.249.65.12',
 '66.249.65.12',
 '66.249.65.12',
 '74.52.245.146',
 '74.52.245.146',
 '66.249.65.43',
 '66.249.65.43',
 '66.249.65.43',
 '66.249.65.12',
 '66.249.65.12',
 '66.249.65.12',
 '66.249.65.12',
 '66.249.65.12',
 '66.249.65.12',
 '65.55.207.25',
 '65.55.207.25',
 '66.249.65.12',
 '66.249.65.12',
 '66.249.65.12',
 '66.249.65.12',
 '66.249.65.12',
 '66.249.65.12',
 '66.249.65.12',
 '65.55.207.94',
 '65.55.207.94',
 '66.249.65.12',
 '65.55.207.71',
 '66.249.65.12',
 '66.249.65.12',
 '66.249.65.12',
 '98.242.170.241',
 '66.249.65.38',
 '66.249.65.38',
 '66.249.65.38',
 '66.249.65.38',
 '66.249.65.38',
 '

In [89]:
# what DIFFERENT IP addresses are in mini-access-log.txt?

{one_line.split()[0]
for one_line in open('mini-access-log.txt')}

{'208.80.193.28',
 '65.55.106.131',
 '65.55.106.155',
 '65.55.106.183',
 '65.55.106.186',
 '65.55.207.126',
 '65.55.207.25',
 '65.55.207.50',
 '65.55.207.71',
 '65.55.207.77',
 '65.55.207.94',
 '65.55.215.75',
 '66.249.65.12',
 '66.249.65.38',
 '66.249.65.43',
 '66.249.71.65',
 '67.195.112.35',
 '67.218.116.165',
 '74.52.245.146',
 '82.34.9.20',
 '89.248.172.58',
 '98.242.170.241'}

In [91]:
# how many times does each IP address appear in the logfile?

from collections import Counter

c = Counter([one_line.split()[0]
        for one_line in open('mini-access-log.txt')])
c

Counter({'67.218.116.165': 2,
         '66.249.71.65': 3,
         '65.55.106.183': 2,
         '66.249.65.12': 32,
         '65.55.106.131': 2,
         '65.55.106.186': 2,
         '74.52.245.146': 2,
         '66.249.65.43': 3,
         '65.55.207.25': 2,
         '65.55.207.94': 2,
         '65.55.207.71': 1,
         '98.242.170.241': 1,
         '66.249.65.38': 100,
         '65.55.207.126': 2,
         '82.34.9.20': 2,
         '65.55.106.155': 2,
         '65.55.207.77': 2,
         '208.80.193.28': 1,
         '89.248.172.58': 22,
         '67.195.112.35': 16,
         '65.55.207.50': 3,
         '65.55.215.75': 2})

In [93]:
c.most_common(5)

[('66.249.65.38', 100),
 ('66.249.65.12', 32),
 ('89.248.172.58', 22),
 ('67.195.112.35', 16),
 ('66.249.71.65', 3)]

In [94]:
{ one_line.split(':')[0] :  one_line.split(':')[2]
for one_line in open('/etc/passwd')
if not one_line.startswith('#') }

{'nobody': '-2',
 'root': '0',
 'daemon': '1',
 '_uucp': '4',
 '_taskgated': '13',
 '_networkd': '24',
 '_installassistant': '25',
 '_lp': '26',
 '_postfix': '27',
 '_scsd': '31',
 '_ces': '32',
 '_appstore': '33',
 '_mcxalr': '54',
 '_appleevents': '55',
 '_geod': '56',
 '_devdocs': '59',
 '_sandbox': '60',
 '_mdnsresponder': '65',
 '_ard': '67',
 '_www': '70',
 '_eppc': '71',
 '_cvs': '72',
 '_svn': '73',
 '_mysql': '74',
 '_sshd': '75',
 '_qtss': '76',
 '_cyrus': '77',
 '_mailman': '78',
 '_appserver': '79',
 '_clamav': '82',
 '_amavisd': '83',
 '_jabber': '84',
 '_appowner': '87',
 '_windowserver': '88',
 '_spotlight': '89',
 '_tokend': '91',
 '_securityagent': '92',
 '_calendar': '93',
 '_teamsserver': '94',
 '_update_sharing': '95',
 '_installer': '96',
 '_atsserver': '97',
 '_ftp': '98',
 '_unknown': '99',
 '_softwareupdate': '200',
 '_coreaudiod': '202',
 '_screensaver': '203',
 '_locationd': '205',
 '_trustevaluationagent': '208',
 '_timezone': '210',
 '_lda': '211',
 '_cvmsro

In [96]:
# walrus operator :=
# assignment expression

while s := input('Enter your name: ').strip():
    print(f'Hello, {s}!')

Enter your name: asdfa
Hello, asdfa!
Enter your name: asdfasfa
Hello, asdfasfa!
Enter your name: 


In [98]:
# using the walrus for assignment in the condition

{ fields[0] : fields[2]
for one_line in open('/etc/passwd')
if not one_line.startswith('#') and (fields := one_line.split(':'))}

{'nobody': '-2',
 'root': '0',
 'daemon': '1',
 '_uucp': '4',
 '_taskgated': '13',
 '_networkd': '24',
 '_installassistant': '25',
 '_lp': '26',
 '_postfix': '27',
 '_scsd': '31',
 '_ces': '32',
 '_appstore': '33',
 '_mcxalr': '54',
 '_appleevents': '55',
 '_geod': '56',
 '_devdocs': '59',
 '_sandbox': '60',
 '_mdnsresponder': '65',
 '_ard': '67',
 '_www': '70',
 '_eppc': '71',
 '_cvs': '72',
 '_svn': '73',
 '_mysql': '74',
 '_sshd': '75',
 '_qtss': '76',
 '_cyrus': '77',
 '_mailman': '78',
 '_appserver': '79',
 '_clamav': '82',
 '_amavisd': '83',
 '_jabber': '84',
 '_appowner': '87',
 '_windowserver': '88',
 '_spotlight': '89',
 '_tokend': '91',
 '_securityagent': '92',
 '_calendar': '93',
 '_teamsserver': '94',
 '_update_sharing': '95',
 '_installer': '96',
 '_atsserver': '97',
 '_ftp': '98',
 '_unknown': '99',
 '_softwareupdate': '200',
 '_coreaudiod': '202',
 '_screensaver': '203',
 '_locationd': '205',
 '_trustevaluationagent': '208',
 '_timezone': '210',
 '_lda': '211',
 '_cvmsro

In [99]:
mylist = [[10, 20, 25, 30], [40, 45, 50, 55, 60, 65],
         [70, 80], [90, 95, 100, 105]]

mylist

[[10, 20, 25, 30], [40, 45, 50, 55, 60, 65], [70, 80], [90, 95, 100, 105]]

In [100]:
# how can I flatten this list?

[one_item
 for one_item in mylist]

[[10, 20, 25, 30], [40, 45, 50, 55, 60, 65], [70, 80], [90, 95, 100, 105]]

In [101]:
# nested list comprehension
[one_item
 for one_sublist in mylist
 for one_item in one_sublist]

[10, 20, 25, 30, 40, 45, 50, 55, 60, 65, 70, 80, 90, 95, 100, 105]

In [102]:
[one_item for one_sublist in mylist for one_item in one_sublist]

[10, 20, 25, 30, 40, 45, 50, 55, 60, 65, 70, 80, 90, 95, 100, 105]

In [103]:
# nested list comprehension

[one_item
 for one_sublist in mylist
 if len(one_sublist) > 2
 for one_item in one_sublist]

[10, 20, 25, 30, 40, 45, 50, 55, 60, 65, 90, 95, 100, 105]

In [105]:
# nested list comprehension

[one_item
 for one_sublist in mylist
 for one_item in one_sublist
 if one_item % 2  ]  # odd number

[25, 45, 55, 65, 95, 105]

In [108]:
# nested list comprehension

[one_item
 for one_sublist in mylist
 if len(one_sublist) > 4
 for one_item in one_sublist
 if one_item % 2  ]  # odd number

[45, 55, 65]

In [109]:
[(x,y)
 for x in range(5)
 for y in range(5)]

[(0, 0),
 (0, 1),
 (0, 2),
 (0, 3),
 (0, 4),
 (1, 0),
 (1, 1),
 (1, 2),
 (1, 3),
 (1, 4),
 (2, 0),
 (2, 1),
 (2, 2),
 (2, 3),
 (2, 4),
 (3, 0),
 (3, 1),
 (3, 2),
 (3, 3),
 (3, 4),
 (4, 0),
 (4, 1),
 (4, 2),
 (4, 3),
 (4, 4)]

In [110]:
!ls movi*

movies.dat


In [111]:
!head movies.dat

1::Toy Story (1995)::Animation|Children's|Comedy
2::Jumanji (1995)::Adventure|Children's|Fantasy
3::Grumpier Old Men (1995)::Comedy|Romance
4::Waiting to Exhale (1995)::Comedy|Drama
5::Father of the Bride Part II (1995)::Comedy
6::Heat (1995)::Action|Crime|Thriller
7::Sabrina (1995)::Comedy|Romance
8::Tom and Huck (1995)::Adventure|Children's
9::Sudden Death (1995)::Action
10::GoldenEye (1995)::Action|Adventure|Thriller


# Exercise: Movie categories

1. Use a nested list comprehension to find the 5 most common categories for movies in `movies.dat`.
2. Take the output from the list comprehension, which will be a list of categories, and use `Counter` to find the most common ones.

In [120]:
from collections import Counter

c = Counter([one_category
for one_line in open('movies.dat', encoding='Latin-1')
for one_category in one_line.split('::')[2].strip().split('|')])
c

Counter({'Animation': 105,
         "Children's": 251,
         'Comedy': 1200,
         'Adventure': 283,
         'Fantasy': 68,
         'Romance': 471,
         'Drama': 1603,
         'Action': 503,
         'Crime': 211,
         'Thriller': 492,
         'Horror': 343,
         'Sci-Fi': 276,
         'Documentary': 127,
         'War': 143,
         'Musical': 114,
         'Mystery': 106,
         'Film-Noir': 44,
         'Western': 68})

In [121]:
c.most_common(5)

[('Drama', 1603),
 ('Comedy', 1200),
 ('Action', 503),
 ('Thriller', 492),
 ('Romance', 471)]

In [123]:
for key, value in c.most_common(5):
    print(f'{key:10}: {value}')

Drama     : 1603
Comedy    : 1200
Action    : 503
Thriller  : 492
Romance   : 471


In [126]:
for key, value in c.most_common(5):
    print(f'{key:10}: {int(value / 50)* "x"}')

Drama     : xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
Comedy    : xxxxxxxxxxxxxxxxxxxxxxxx
Action    : xxxxxxxxxx
Thriller  : xxxxxxxxx
Romance   : xxxxxxxxx


In [127]:
import random

numbers = [random.randint(0, 100) 
          for i in range(10)]

numbers

[62, 12, 3, 8, 13, 21, 13, 19, 60, 10]

In [128]:
sorted(numbers)

[3, 8, 10, 12, 13, 13, 19, 21, 60, 62]

In [129]:
import random

numbers = [random.randint(-100, 100) 
          for i in range(10)]

numbers

[90, -100, 82, 83, 11, -39, -79, -10, 75, 69]

In [130]:
sorted(numbers)

[-100, -79, -39, -10, 11, 69, 75, 82, 83, 90]

In [131]:
sorted(numbers, key=abs)

[-10, 11, -39, 69, 75, -79, 82, 83, 90, -100]

In [132]:
words = 'This is a sentence for my Python course at WDC'.split()

sorted(words)

['Python', 'This', 'WDC', 'a', 'at', 'course', 'for', 'is', 'my', 'sentence']

In [133]:
# case-insensitive sort
sorted(words, key=str.lower)

['a', 'at', 'course', 'for', 'is', 'my', 'Python', 'sentence', 'This', 'WDC']

In [None]:
# what if I want to sort by backwards words?
def by_backwards_word(one_word):
    return one_word[::-1]

