## File I/O

### Old string formatting

In [2]:
import math
print('The value of PI is approximately %5.3f.' % math.pi) # old version
print('The value of PI is approximately {}'.format(math.pi))

The value of PI is approximately 3.142.
The value of PI is approximately 3.141592653589793


### Reading and Writing Files

In [4]:
help(open)
# 't' text mode (default) 如果字元有 00，會讀不出


Help on built-in function open in module io:

open(file, mode='r', buffering=-1, encoding=None, errors=None, newline=None, closefd=True, opener=None)
    Open file and return a stream.  Raise IOError upon failure.
    
    file is either a text or byte string giving the name (and the path
    if the file isn't in the current working directory) of the file to
    be opened or an integer file descriptor of the file to be
    wrapped. (If a file descriptor is given, it is closed when the
    returned I/O object is closed, unless closefd is set to False.)
    
    mode is an optional string that specifies the mode in which the file
    is opened. It defaults to 'r' which means open for reading in text
    mode.  Other common values are 'w' for writing (truncating the file if
    it already exists), 'x' for creating and writing to a new file, and
    'a' for appending (which on some Unix systems, means that all writes
    append to the end of the file regardless of the current seek position

In [3]:
f = open('workfile.txt', 'r') # r, w, a; b
""""
Normally, files are opened in text mode, that means, 
you read and write strings from and to the file,
which are encoded in a specific encoding.
'b' appended to the mode opens the file in binary mode,
now the data is read and written in the form of bytes objects.
"""
f.read()

'0123456789abcdef'

In [4]:
f = open('workfile.txt', 'r')
print f.readline()
print f.readline()

0123456789abcdef



In [5]:
# This is memory efficient, fast, and leads to simple code

f = open('workfile.txt', 'r')
for line in f:
    print(line)

0123456789abcdef


In [6]:
# Read all the lines of a file in a list
f = open('workfile.txt', 'r')
print list(f)

['0123456789abcdef']


In [1]:
f = open('test.txt', 'r')
print f.readlines()

['123456\n', 'lsdkfj\n', 'sldkfj']


In [8]:
f = open('test.txt', 'w')
f.write('This is a test\n')
#f.close() # Try this!
"""
When you’re done with a file, call f.close() to close it and free up any system resources taken up by the open file.
"""

'\nWhen you\xe2\x80\x99re done with a file, call f.close() to close it and free up any system resources taken up by the open file.\n'

In [4]:
try:
    f = open('workfile.txt', 'r+')
except IOError:
    f = open('workfile.txt', 'wr')
f.write(b'0123456789abcdef')
f.seek(5)      # Go to the 6th byte in the file
a = f.read(1)
f.seek(1)  # Go to the 1st in the file，整個檔案的第一格
b = f.read(1)
f.close()
print a, b

5 1


In [10]:
"""
It is good practice to use the with keyword when dealing with file objects.
This has the advantage that the file is properly closed after its suite finishes,
even if an exception is raised on the way.
"""
with open('workfile.txt', 'r') as f:
    read_data = f.read()
f.closed

True

### Saving structured data with json

In [10]:
# serialization 序列化
import json
json.dumps([1, 'simple', 'list'])

'[1, "simple", "list"]'

In [11]:
x = [1, 'simple', [2, 3, 4], {"one":1, "two":2, "three":3}]
json.dumps(x)

'[1, "simple", [2, 3, 4], {"three": 3, "two": 2, "one": 1}]'

In [12]:
f = open("test.txt", "w")
json.dump(x, f)
f.close()

f = open("test.txt", "r")
print f.readlines()
f.close()

['[1, "simple", [2, 3, 4], {"three": 3, "two": 2, "one": 1}]']


In [15]:
with open('test.txt', 'r') as f:
    x1, x2, x3, x4 = json.load(f)
f = open('test.txt', 'r')
data = json.load(f)
f.close()
print x1, x2, x3, x4
print data, data[3]

1 simple [2, 3, 4] {u'one': 1, u'three': 3, u'two': 2}
[1, u'simple', [2, 3, 4], {u'one': 1, u'three': 3, u'two': 2}] {u'one': 1, u'three': 3, u'two': 2}


### CSV

In [15]:
# Take a look at EDA-NewYorkTimes.ipynb

###  pickle
The pickle module implements binary protocols for serializing and de-serializing a Python object structure.
* JSON is a text serialization format (it outputs unicode text, although most of the time it is then encoded to utf-8), while pickle is a binary serialization format;
* JSON is human-readable, while pickle is not;
* JSON is interoperable and widely used outside of the Python ecosystem, while pickle is Python-specific;
* JSON, by default, can only represent a subset of the Python built-in types, and no custom classes; pickle can represent an extremely large number of Python types (many of them automatically, by clever usage of Python’s introspection facilities; complex cases can be tackled by implementing specific object APIs).

In [1]:
import pickle

# An arbitrary collection of objects supported by pickle.
data = {
    'a': [1, 2.0, 3, 4+6j],
    'b': ("character string", b"byte string"),
    'c': {None, True, False}
}

with open('test.pickle', 'wb') as f:
    pickle.dump(data, f)

In [3]:
import pickle

with open('test.pickle', 'rb') as f:
    data = pickle.load(f)
    
print data

<class 'dict'>


In [18]:
import pprint
pprint.pprint(data)

{'a': [1, 2.0, 3, (4+6j)],
 'b': ('character string', 'byte string'),
 'c': set([None, False, True])}


In [19]:
import pprint
tup = ('spam', ('eggs', ('lumberjack', ('knights', ('ni', ('dead', ('parrot', ('fresh fruit',))))))))
stuff = ['a' * 10, tup, ['a' * 30, 'b' * 30], ['c' * 20, 'd' * 20]]
pprint.pprint(stuff)

['aaaaaaaaaa',
 ('spam',
  ('eggs',
   ('lumberjack',
    ('knights', ('ni', ('dead', ('parrot', ('fresh fruit',)))))))),
 ['aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa', 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'],
 ['cccccccccccccccccccc', 'dddddddddddddddddddd']]


In [20]:
pprint.pprint(stuff, depth=3)

['aaaaaaaaaa',
 ('spam', ('eggs', (...))),
 ['aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa', 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'],
 ['cccccccccccccccccccc', 'dddddddddddddddddddd']]


## Errors and Exceptions

### Syntax Errors

In [21]:
while True print('Hello world')

SyntaxError: invalid syntax (<ipython-input-21-614901b0e5ee>, line 1)

### Exceptions

In [22]:
10 * (1/0)

ZeroDivisionError: integer division or modulo by zero

In [23]:
4 + spam*3

NameError: name 'spam' is not defined

In [24]:
2' + 2

SyntaxError: EOL while scanning string literal (<ipython-input-24-e10b39e5376b>, line 1)

### Handling Exceptions

In [25]:
# https://docs.python.org/3/reference/compound_stmts.html#try
# Take a look, and learn BNF (Backus-Naur Form)

In [26]:
a, b = 1, 2
try:
    if a < b:
        print(a)
finally:
    print("finally")

1
finally


In [27]:
def the_Answer_to_the_Ultimate_Question_of_Life_the_Universe_and_Everything():
    try:
        1/0
    finally:
        return 42

the_Answer_to_the_Ultimate_Question_of_Life_the_Universe_and_Everything()

42

In [20]:
a = 22
b = 33
 
try:
    if a < b: # Try to change this!
        print(non_exit_var)
except:
    print("except")
else:
    print("else except")
    """
    The optional else clause is executed if and when control flows off the end of the try clause.
    Exceptions in the else clause are not handled by the preceding except clauses.
    The optional else clause, when present, must follow all except clauses.
    The use of the else clause is better than adding additional code to the try clause
    because it avoids accidentally catching an exception that wasn’t raised by the code
    being protected by the try ... except statement.
    """
finally:
    print("finally")

except
finally


In [29]:
try:
    if 1/0:
        print "E"
except (ZeroDivisionError, NameError, SyntaxError):
    pass

In [22]:
import sys

try:
    f = open('myfile.txt')
    s = f.readline()
    i = int(s.strip())
except OSError as err:
    print("OS error: {0}".format(err))
except ValueError:
    print("Could not convert data to an integer.")
except:
    print("Unexpected error:", sys.exc_info())
    
# Try to catch "IOError"

('Unexpected error:', (<type 'exceptions.IOError'>, IOError(2, 'No such file or directory'), <traceback object at 0x103d8dd40>))


### Raising Exceptions

In [25]:
def this_fails():
#     x = 1/0
    raise ValueError("ABC")

try:
    this_fails()
except:
    print("Unexpected error:", sys.exc_info()) # Try [1]

('Unexpected error:', (<type 'exceptions.ValueError'>, ValueError('ABC',), <traceback object at 0x103da00e0>))
