## Elements of the [standard library](https://docs.python.org/3/library/index.html) II.

#### [collections](https://docs.python.org/3/library/collections.html)
- Provides specialized container data types.

In [1]:
import collections

In [2]:
# Dictionary for counting hashable objects.
c = collections.Counter([1, 1, 2, 1, 3, 2])
c

Counter({1: 3, 2: 2, 3: 1})

In [3]:
c[1]

3

In [4]:
# Word frequencies in Hamlet, computed with a Counter.
import string
words = open('hamlet.txt').read().lower().split()
words = [word.strip(string.punctuation) for word in words]
word_counts = collections.Counter(words)
word_counts.most_common(30)

[('the', 1145),
 ('and', 973),
 ('to', 736),
 ('of', 674),
 ('i', 565),
 ('you', 539),
 ('a', 534),
 ('my', 513),
 ('in', 431),
 ('it', 409),
 ('that', 381),
 ('ham', 358),
 ('is', 339),
 ('not', 310),
 ('his', 297),
 ('this', 297),
 ('with', 268),
 ('but', 258),
 ('for', 248),
 ('your', 241),
 ('me', 231),
 ('lord', 223),
 ('as', 219),
 ('be', 216),
 ('he', 213),
 ('what', 200),
 ('king', 195),
 ('him', 195),
 ('so', 194),
 ('have', 180)]

In [5]:
# Dictionary with default values.
dd = collections.defaultdict(int)
dd

defaultdict(int, {})

In [6]:
# Adding a new key-value pair.
dd['foo'] = 10
dd

defaultdict(int, {'foo': 10})

In [7]:
# Acessing a non-existent key.
# This will not raise an error, but assigns the default value to the key.
# The default value is created by the function call int().
dd['bar']

0

In [8]:
int()

0

In [18]:
dd

defaultdict(int, {'foo': 10, 'bar': 0})

In [19]:
# Accessing a non-existent key, then appending an item.
dd2 = collections.defaultdict(list)
dd2['apple'].append(42)
dd2

defaultdict(list, {'apple': [42]})

In [20]:
list()

[]

In [23]:
# Convertsion to an ordinary dict.
d = dict(dd2)
d

{'apple': [42]}

In [26]:
# Tuple with named items.
Point = collections.namedtuple('Point', ['x', 'y'])

In [31]:
p1 = Point(10, 20)
p1

Point(x=10, y=20)

In [32]:
p2 = Point(x=11, y=22)
p2

Point(x=11, y=22)

In [34]:
# Tuple style usage.
print(p1[0], p1[1])

10 20


In [35]:
# Struct style usage.
print(p1.x, p1.y)

10 20


In [36]:
# namedtuple objects can be used as a dictionary keys.
{p1: 'foo', p2: 'bar'}

{Point(x=10, y=20): 'foo', Point(x=11, y=22): 'bar'}

In [37]:
# Can we change items of a namedtuple?
p1[0] = 100

TypeError: 'Point' object does not support item assignment

In [42]:
# Using a namedtuple to store a football game result.
Game = collections.namedtuple('Game', ['round', 'hteam', 'ateam', 'hgoals', 'agoals'])
g = Game(10, 'Liverpool', 'Chelsea', 2, 1)
g

Game(round=10, hteam='Liverpool', ateam='Chelsea', hgoals=2, agoals=1)

In [43]:
g.hteam

'Liverpool'

In [54]:
# Exercise: Write a program that simulates n rolls with 2 dice,
# then prints how many times the sum of rolls was 2, 3, ..., 12!

from random import randint

n = 100_000
results = [randint(1, 6) + randint(1, 6) for _ in range(n)]
freq = collections.Counter(results)
for r in range(2, 13):
    print(r, freq[r])

2 2785
3 5527
4 8350
5 11136
6 13939
7 16487
8 13840
9 11144
10 8431
11 5564
12 2797


In [55]:
# solution 2 (without a comprehension)
n = 100_000

results = []
for _ in range(n):
    result = randint(1, 6) + randint(1, 6)
    results.append(result)

freq = collections.Counter(results)
for r in range(2, 13):
    print(r, freq[r])

2 2876
3 5448
4 8412
5 11042
6 13795
7 16596
8 14051
9 11017
10 8402
11 5498
12 2863


In [57]:
# solution 3 (using a list for counting)
n = 100_000
results = [randint(1, 6) + randint(1, 6) for _ in range(n)]
freq = [0] * 13 # we use a list data structure to count frequencies
for r in results:
    freq[r] += 1
freq[2:]

[2784, 5735, 8423, 11112, 13912, 16657, 13968, 11057, 8199, 5492, 2661]

In [63]:
# solution 4 (applying Counter on-the-fly)
n = 100_000
freq = collections.Counter() # empty counter
for _ in range(n):
    r = randint(1, 6) + randint(1, 6)
    freq.update([r]) # update the counter
for r in range(2, 13):
    print(r, freq[r])

2 2759
3 5638
4 8696
5 11069
6 13853
7 16643
8 13773
9 11040
10 8272
11 5518
12 2739


In [64]:
# solution 5 (like solution 1, but in a memory efficient way)
n = 100_000
results = (randint(1, 6) + randint(1, 6) for _ in range(n)) # create a generator object instead of list
freq = collections.Counter(results)
for r in range(2, 13):
    print(r, freq[r])

2 2774
3 5625
4 8409
5 10996
6 13949
7 16585
8 13791
9 11186
10 8269
11 5567
12 2849


#### [copy](https://docs.python.org/3/library/copy.html)
- Contains a shallow and a deep copy function.

In [65]:
import copy

In [68]:
# In Python, assignment does NOT copy, it only creates a reference.
a = [1, 2, 3]
b = a
b[0] = 100
print(a, b)

[100, 2, 3] [100, 2, 3]


In [69]:
# Making a shallow copy.
a = [1, 2, 3]
b = copy.copy(a)
b[0] = 100
print(a, b)

[1, 2, 3] [100, 2, 3]


In [70]:
# Making a shallow copy of a list of lists object.
# Shallow copy only copies at the highest level of the data structure!

a = [[1], [2], [3]]
b = copy.copy(a) # only a shallow copy is made
b[0][0] = 100
print(a, b)

[[100], [2], [3]] [[100], [2], [3]]


In [71]:
# Making a deep copy of a list of lists object.
a = [[1], [2], [3]]
b = copy.deepcopy(a)
b[0][0] = 100
print(a, b)

[[1], [2], [3]] [[100], [2], [3]]


#### [glob](https://docs.python.org/3/library/glob.html)
- Provides a function for collecting file names matching a given pattern.

In [72]:
import glob

In [73]:
# The files of the current directory with extension ".txt".
glob.glob('*.txt')

['celsius_fahrenheit.txt',
 'pl.txt',
 'example_file_2.txt',
 'real_programmers.txt',
 'baseball.txt',
 'example_file.txt',
 'matrix.txt',
 'hamlet.txt']

In [74]:
# Sorting the file names.
sorted(glob.glob('*.txt'))

['baseball.txt',
 'celsius_fahrenheit.txt',
 'example_file.txt',
 'example_file_2.txt',
 'hamlet.txt',
 'matrix.txt',
 'pl.txt',
 'real_programmers.txt']

#### [gzip](https://docs.python.org/3/library/gzip.html)

- Provides tools for reading and writing GZIP archives.
- Remark: The standard library supports other formats too (e.g. BZ2, LZMA, ZIP, TAR).

In [75]:
import gzip

In [78]:
# Writing a GZIP file.
gzip.open('foo.gz', 'wt').write('orange' * 100_000)

600000

In [81]:
# Reading a GZIP file.
gzip.open('foo.gz', 'rt').read()[:1000]

'orangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeorangeora

#### [os](https://docs.python.org/3/library/os.html)

- Provides access to certain services of the operating system.

In [84]:
import os

In [86]:
# Executing a command.
# os.system('ls *.txt')

In [None]:
# What type of file can be found at a given file path?
# os.path.isdir('/tmp')
# os.path.isfile('/tmp')

In [None]:
# Extracting the directory name from a path.
# os.path.dirname('/tmp/a/bbb.txt')

In [None]:
# Accessing environment variables.
# os.environ['LANG']

#### [pickle](https://docs.python.org/3/library/pickle.html)

- Provides a solution for the serialization of Python data structures (transformation to bytes), and for deserialization.

In [88]:
import pickle

In [91]:
# Serializing a complex data structure to file.
data = {'foo': 10, 'bar': [1.5, 2.5]}
pickle.dump(data, open('data.pkl', 'wb'))

In [92]:
# Deserialization.
data2 = pickle.load(open('data.pkl', 'rb'))
data2

{'foo': 10, 'bar': [1.5, 2.5]}

In [94]:
# Serialization to string.
data = {'foo': 10, 'bar': [1.5, 2.5]}
p = pickle.dumps(data)
p

b'\x80\x04\x95)\x00\x00\x00\x00\x00\x00\x00}\x94(\x8c\x03foo\x94K\n\x8c\x03bar\x94]\x94(G?\xf8\x00\x00\x00\x00\x00\x00G@\x04\x00\x00\x00\x00\x00\x00eu.'

In [96]:
# Deserialization.
data2 = pickle.loads(p)
data2

{'foo': 10, 'bar': [1.5, 2.5]}

In [97]:
# Two handy utility functions.

def to_pickle(obj, fname, protocol=4):
    '''Serialize object to file.'''
    pickle.dump(obj, open(fname, 'wb'), protocol)
    
def from_pickle(fname):
    '''Deserialize object from file.'''
    return pickle.load(open(fname, 'rb'))

In [98]:
to_pickle(data, 'data.pkl')

In [99]:
from_pickle('data.pkl')

{'foo': 10, 'bar': [1.5, 2.5]}

#### [subprocess](https://docs.python.org/3/library/subprocess.html)

- Provides tools for starting and controlling subprocesses.

In [100]:
import subprocess

In [102]:
# Starting a process and obtaining the standard output as a string.
# subprocess.getoutput('ls *.txt')

In [None]:
# Controlling an interactive command line program.
# p = subprocess.Popen(['python', '-i'], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
# p.stdin.write('1 + 1\n'.encode('utf-8'))
# p.stdin.flush()
# p.stdout.readline().decode('utf-8')

#### [urllib](https://docs.python.org/3/library/urllib.html)

- A package for opening, reading and handling web addresses ([URL](https://en.wikipedia.org/wiki/URL)s).

In [103]:
from urllib.request import urlopen

In [111]:
# Exercise: Write a program that downloads the web page found at
# https://hu.exchange-rates.org/Rate/USD/HUF
# and extracts the USD-HUF exchange rate from it.

url = 'https://www.x-rates.com/calculator/?from=USD&to=HUF&amount=1'
data = urlopen(url).read().decode('utf-8')

In [119]:
pattern = '<span class="ccOutputRslt">'
idx = data.find(pattern)
float(data[idx + len(pattern):].split('<')[0])

375.22

In [120]:
# Package the solution to a function!
def get_usd_huf():
    url = 'https://www.x-rates.com/calculator/?from=USD&to=HUF&amount=1'
    data = urlopen(url).read().decode('utf-8')
    pattern = '<span class="ccOutputRslt">'
    idx = data.find(pattern)
    return float(data[idx + len(pattern):].split('<')[0])

In [121]:
get_usd_huf()

375.31

In [127]:
import datetime
print(datetime.datetime.now(), get_usd_huf())

2024-10-30 11:03:47.647600 376.16


In [124]:
# URL encoding and decoding.
from urllib.request import quote, unquote

quote('https://hu.wikipedia.org/wiki/Mesterséges_intelligencia')

'https%3A//hu.wikipedia.org/wiki/Mesters%C3%A9ges_intelligencia'

In [125]:
unquote('https%3A//hu.wikipedia.org/wiki/Mesters%C3%A9ges_intelligencia')

'https://hu.wikipedia.org/wiki/Mesterséges_intelligencia'

## Exercise: Conway's Game of Life

Write a program that implements [Conway's Game of Life](https://en.wikipedia.org/wiki/Conway%27s_Game_of_Life)!

DESIGN CHOICES:
- the world will be stored in a finite, 2-D numpy array of characters
- the initial state will be read from a string
- border handling: the bordering rows and columns will never change state

In [9]:
# Let's specify the initial state in a string!
# (We could read it from file too, but let's just use a string for simplicity!)

worldstr = '''
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢😎😎🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢😎😎🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢😎🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
'''.strip()

In [10]:
# Read initial state to 2-D numpy array.
import numpy as np

def init_world(worldstr):
    return np.array([list(row) for row in worldstr.split('\n')])

In [11]:
# Helper function to display a given state of the world.
def display_world(world):
    for row in world:
        print(''.join(row))

In [12]:
def update_world(world):
    nrows, ncols = world.shape
    new_world = world.copy() # make copy of the world
    
    # iterate over inner positions
    for i in range(1, nrows - 1):
        for j in range(1, ncols - 1):
            nbs = world[i - 1:i + 2, j - 1:j + 2] # select 3x3 neighborhood
            nlive = (nbs == '😎').sum() - (nbs[1, 1] == '😎') # count live neighbors

            # update cell state
            if nlive < 2 or nlive > 3:
                new_world[i, j] = '🤢'
            elif nlive == 3:
                new_world[i, j] = '😎'
    
    return new_world

In [15]:
world = np.array([list(row) for row in worldstr.split('\n')])

#while True: # main loop
for _ in range(52):
    display_world(world)
    world = update_world(world)
    print()

🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢😎😎🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢😎😎🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢😎🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢

🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢😎😎😎🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢😎🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢😎😎🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢

🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢🤢
🤢🤢🤢🤢🤢🤢🤢🤢🤢😎🤢