In [1]:
#!/usr/bin/env PYTHONHASHSEED=1234 python3

# Copyright 2014-2019 Brett Slatkin, Pearson Education Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Reproduce book environment
import random
random.seed(1234)

import logging
from pprint import pprint
from sys import stdout as STDOUT

# Write all output to a temporary directory
import atexit
import gc
import io
import os
import tempfile

TEST_DIR = tempfile.TemporaryDirectory()
atexit.register(TEST_DIR.cleanup)

# Make sure Windows processes exit cleanly
OLD_CWD = os.getcwd()
atexit.register(lambda: os.chdir(OLD_CWD))
os.chdir(TEST_DIR.name)

def close_open_files():
    everything = gc.get_objects()
    for obj in everything:
        if isinstance(obj, io.IOBase):
            obj.close()

atexit.register(close_open_files)

<function __main__.close_open_files>

In [2]:
# Example 1
a = b'h\x65llo'
print(list(a))
print(a)

[104, 101, 108, 108, 111]
b'hello'


In [3]:
# Example 2
a = 'a\u0300 propos'
print(list(a))
print(a)

['a', '̀', ' ', 'p', 'r', 'o', 'p', 'o', 's']
à propos


In [4]:

# Example 3
def to_str(bytes_or_str):
    if isinstance(bytes_or_str, bytes):
        value = bytes_or_str.decode('utf-8')
    else:
        value = bytes_or_str
    return value  # Instance of str

print(repr(to_str(b'foo')))
print(repr(to_str('bar')))

'foo'
'bar'


In [5]:
# Example 4
def to_bytes(bytes_or_str):
    if isinstance(bytes_or_str, str):
        value = bytes_or_str.encode('utf-8')
    else:
        value = bytes_or_str
    return value  # Instance of bytes

print(repr(to_bytes(b'foo')))
print(repr(to_bytes('bar')))

b'foo'
b'bar'


In [6]:
# Example 5
print(b'one' + b'two')
print('one' + 'two')

b'onetwo'
onetwo


In [7]:
# Example 6
try:
    b'one' + 'two'
except:
    logging.exception('Expected')
else:
    assert False

ERROR:root:Expected
Traceback (most recent call last):
  File "<ipython-input-7-80ba06b1ee6a>", line 3, in <module>
    b'one' + 'two'
TypeError: can't concat str to bytes


In [8]:
# Example 7
try:
    'one' + b'two'
except:
    logging.exception('Expected')
else:
    assert False

ERROR:root:Expected
Traceback (most recent call last):
  File "<ipython-input-8-7d2614cdbe1e>", line 3, in <module>
    'one' + b'two'
TypeError: can only concatenate str (not "bytes") to str


In [10]:
# Example 8
assert b'red' > b'blue'
assert 'red' > 'blue'

In [11]:
# Example 9
try:
    assert 'red' > b'blue'
except:
    logging.exception('Expected')
else:
    assert False

ERROR:root:Expected
Traceback (most recent call last):
  File "<ipython-input-11-405d8c27305f>", line 3, in <module>
    assert 'red' > b'blue'
TypeError: '>' not supported between instances of 'str' and 'bytes'


In [12]:
# Example 10
try:
    assert b'blue' < 'red'
except:
    logging.exception('Expected')
else:
    assert False

ERROR:root:Expected
Traceback (most recent call last):
  File "<ipython-input-12-6b712dd07224>", line 3, in <module>
    assert b'blue' < 'red'
TypeError: '<' not supported between instances of 'bytes' and 'str'


In [13]:
# Example 11
print(b'foo' == 'foo')

False


In [14]:
# Example 12
print(b'red %s' % b'blue')
print('red %s' % 'blue')

b'red blue'
red blue


In [15]:
# Example 13
try:
    print(b'red %s' % 'blue')
except:
    logging.exception('Expected')
else:
    assert False

ERROR:root:Expected
Traceback (most recent call last):
  File "<ipython-input-15-8a8a62039e17>", line 3, in <module>
    print(b'red %s' % 'blue')
TypeError: %b requires a bytes-like object, or an object that implements __bytes__, not 'str'


In [16]:
# Example 14
print('red %s' % b'blue')

red b'blue'


In [17]:
# Example 15
try:
    with open('data.bin', 'w') as f:
        f.write(b'\xf1\xf2\xf3\xf4\xf5')
except:
    logging.exception('Expected')
else:
    assert False

ERROR:root:Expected
Traceback (most recent call last):
  File "<ipython-input-17-9b654fe52288>", line 4, in <module>
    f.write(b'\xf1\xf2\xf3\xf4\xf5')
TypeError: write() argument must be str, not bytes


In [18]:
# Example 16
with open('data.bin', 'wb') as f:
    f.write(b'\xf1\xf2\xf3\xf4\xf5')

In [19]:
# Example 17
try:
    # Silently force UTF-8 here to make sure this test fails on
    # all platforms. cp1252 considers these bytes valid on Windows.
    real_open = open
    def open(*args, **kwargs):
        kwargs['encoding'] = 'utf-8'
        return real_open(*args, **kwargs)
    
    with open('data.bin', 'r') as f:
        data = f.read()
except:
    logging.exception('Expected')
else:
    assert False

ERROR:root:Expected
Traceback (most recent call last):
  File "<ipython-input-19-38f68c785d57>", line 11, in <module>
    data = f.read()
  File "/home/dtateyama/anaconda3/envs/pyfund/lib/python3.7/codecs.py", line 322, in decode
    (result, consumed) = self._buffer_decode(data, self.errors, final)
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xf1 in position 0: invalid continuation byte


In [20]:
# Example 18
# Restore the overloaded open above.
open = real_open

with open('data.bin', 'rb') as f:
    data = f.read()

assert data == b'\xf1\xf2\xf3\xf4\xf5'

In [21]:
# Example 19
with open('data.bin', 'r', encoding='cp1252') as f:
    data = f.read()

assert data == 'ñòóôõ'