# 5 - Files and I/O

## Reading and Writing Text Data

In [1]:
text = '''
The cat
sat on
the mat
'''

text

'\nThe cat\nsat on\nthe mat\n'

In [2]:
import os

file_name = "test_file.txt"

if os.path.isfile(file_name):
    os.remove(file_name)


In [3]:
with open(file_name, "w") as target_file:
    target_file.write(text)


In [4]:
%%bash

cat test_file.txt


The cat
sat on
the mat


In [5]:
with open(file_name, "r") as target_file:
    print(target_file.read())



The cat
sat on
the mat



As for different character encodings...

In [6]:
s = "Spicy jalapeño"
s

'Spicy jalapeño'

In [8]:
with open(file_name, "w", encoding="ascii") as target_file:
    target_file.write(s)

UnicodeEncodeError: 'ascii' codec can't encode character '\xf1' in position 12: ordinal not in range(128)

In [9]:
with open(file_name, "w", encoding="ascii", errors="replace") as target_file:
    target_file.write(s)


In [10]:
%%bash
cat test_file.txt

Spicy jalape?o

In [11]:
with open(file_name, "w", encoding="ascii", errors="ignore") as target_file:
    target_file.write(s)


In [12]:
%%bash
cat test_file.txt

Spicy jalapeo

## Printing to a File

In [16]:
file_name

'test_file.txt'

In [15]:
with open(file_name, 'w') as target_file:
    print('Hello World!', file=target_file)


In [17]:
%%bash
cat test_file.txt

Hello World!


## Printing with a Different Separator or Line Ending

In [18]:
print('ACME', 50, 91.5)

ACME 50 91.5


In [19]:
print('ACME', 50, 91.5, sep=',')

ACME,50,91.5


In [20]:
print('ACME', 50, 91.5, sep=',', end='!!\n')

ACME,50,91.5!!


In [21]:
for i in range(5):
    print(i, end=" ")

0 1 2 3 4 

## Reading and Writing Binary Data
Use the open() function with mode rb or wb to read or write binary data.

In [22]:
file_name = "somefile.bin"

with open(file_name, "wb") as target_file:
    target_file.write(b"Hello World")


In [23]:
%%bash
cat somefile.bin

Hello World

In [24]:
with open(file_name, "rb") as target_file:
    print(target_file.read())


b'Hello World'


## Writing to a File That Doesn’t Already Exist
You want to write data to a file, but only if it doesn’t already exist on the filesystem. Solved by using the little-known x mode to open()

In [26]:
file_name

'somefile.bin'

In [25]:
import os

os.path.isfile(file_name)

True

In [27]:
with open(file_name, "xt") as target_file:
    target_file.write(b"Hello World")


FileExistsError: [Errno 17] File exists: 'somefile.bin'

In [28]:
# alternatively
if not os.path.isfile(file_name):
    pass  # ...


## Performing I/O Operations on a String
Use the io.StringIO() and io.BytesIO() classes to create file-like objects that operate on string data. 

In [36]:
import io

s = io.StringIO()
s

<_io.StringIO at 0x2522494e828>

In [37]:
s.write("Hello World\n")

12

In [38]:
print("This is a test", file=s)

In [39]:
s.getvalue()

'Hello World\nThis is a test\n'

## Reading and Writing Compressed Datafiles
The gzip and bz2 modules make it easy to work with such files.

In [41]:
import gzip

file_name = "test_gzfile.gz"

with gzip.open(file_name, "wt", compresslevel=5) as target_file:
    target_file.write("Hello World!")


In [42]:
import os

os.path.isfile(file_name)

True

In [43]:
%%bash
cat test_gzfile.gz

‹Ù³]ÿtest_gzfile òHÍÉÉWÏ/ÊIQ   ÿÿ £)   

In [44]:
with gzip.open(file_name, "rt") as target_file:
    print(target_file.read())


Hello World!


##  Reading Binary Data into a Mutable Buffer

In [45]:
import os.path

def read_into_buffer(file_name):
    buf = bytearray(os.path.getsize(file_name))
    with open(file_name, "rb") as target_file:
        target_file.readinto(buf)
    return buf


In [47]:
with open("sample.bin", "wb") as target_file:
    target_file.write(b"Hello World")


In [48]:
buf = read_into_buffer("sample.bin")
buf

bytearray(b'Hello World')

## Manipulating Pathnames

In [52]:
import os

path = "/Users/beazley/Data/data.csv"
path

'/Users/beazley/Data/data.csv'

In [53]:
os.path.basename(path)

'data.csv'

In [54]:
os.path.dirname(path)

'/Users/beazley/Data'

In [55]:
os.path.join("tmp", "data", os.path.basename(path))

'tmp\\data\\data.csv'

## Testing for the Existence of a File

In [56]:
file_name

'test_gzfile.gz'

In [57]:
os.path.isfile(file_name)

True

In [59]:
os.path.isdir("/etc/passwd")

False

In [61]:
os.mkdir("test_folder")

In [62]:
os.path.isdir("test_folder")

True

In [63]:
os.path.getsize("test_folder")  # nothing in there at the moment

0

In [64]:
import time

time.ctime(os.path.getmtime("test_folder"))

'Sun Jun 23 09:13:22 2019'

## Getting a Directory Listing

In [66]:
import os

os.listdir(".")

['.git',
 '.ipynb_checkpoints',
 '1 - Data Structures and Algorithms.ipynb',
 '2 - Strings and Text.ipynb',
 '3 - Numbers, Dates, and Times.ipynb',
 '4 - Iterators and Generators.ipynb',
 '5 - Files and IO.ipynb',
 'README.md',
 'sample.bin',
 'somefile.bin',
 'test_file.txt',
 'test_folder',
 'test_gzfile.gz']

In [68]:
# all regular files
[name for name in os.listdir(".")
 if os.path.isfile(os.path.join(".", name))]

['1 - Data Structures and Algorithms.ipynb',
 '2 - Strings and Text.ipynb',
 '3 - Numbers, Dates, and Times.ipynb',
 '4 - Iterators and Generators.ipynb',
 '5 - Files and IO.ipynb',
 'README.md',
 'sample.bin',
 'somefile.bin',
 'test_file.txt',
 'test_gzfile.gz']

In [69]:
# and folders
[name for name in os.listdir(".")
 if os.path.isdir(os.path.join(".", name))]

['.git', '.ipynb_checkpoints', 'test_folder']

In [70]:
# jupyter notebook files
[name for name in os.listdir(".")
 if name.endswith(".ipynb")]

['1 - Data Structures and Algorithms.ipynb',
 '2 - Strings and Text.ipynb',
 '3 - Numbers, Dates, and Times.ipynb',
 '4 - Iterators and Generators.ipynb',
 '5 - Files and IO.ipynb']

In [71]:
# alternatively
import glob

glob.glob(os.path.join(".", "*.ipynb"))

['.\\1 - Data Structures and Algorithms.ipynb',
 '.\\2 - Strings and Text.ipynb',
 '.\\3 - Numbers, Dates, and Times.ipynb',
 '.\\4 - Iterators and Generators.ipynb',
 '.\\5 - Files and IO.ipynb']

In [72]:
from fnmatch import fnmatch

[name for name in os.listdir(".")
 if fnmatch(name, "*.ipynb")]

['1 - Data Structures and Algorithms.ipynb',
 '2 - Strings and Text.ipynb',
 '3 - Numbers, Dates, and Times.ipynb',
 '4 - Iterators and Generators.ipynb',
 '5 - Files and IO.ipynb']

## Bypassing Filename Encoding
By default, all filenames are encoded and decoded according to the text encoding returned by sys.getfilesystemencoding(). 

In [1]:
import sys

sys.getfilesystemencoding()

'utf-8'

In [2]:
with open("jalape\xf1o.txt", "w") as target_file:
    target_file.write('Spicy!')


In [9]:
import os

[name for name in os.listdir(".") if "jal" in name]

['jalapeño.txt']

In [11]:
[name for name in os.listdir(b".") if b"jal" in name]

[b'jalape\xc3\xb1o.txt']

The filename handling changes ever so slightly when byte strings are supplied to file-related functions.

## Printing Bad Filenames
When printing filenames of unknown origin, use this convention to avoid errors:

In [14]:
file_name = "jalape\xf1o.txt"

In [16]:
def bad_filename(file_name):
    print("Calling bad_filename!")
    return repr(file_name)[1:-1]

try:
    print(file_name)
except UnicodeEncodeError:
    print(bad_filename(file_name))


jalapeño.txt


## Adding or Changing the Encoding of an Already Open File


In [21]:
import urllib.request
import io

u = urllib.request.urlopen("http://www.python.org")
f = io.TextIOWrapper(u, encoding="utf-8")
text = f.read()

In [23]:
text[:100]

'<!doctype html>\n<!--[if lt IE 7]>   <html class="no-js ie6 lt-ie7 lt-ie8 lt-ie9">   <![endif]-->\n<!-'

In [24]:
import sys

sys.stdout.encoding

'UTF-8'

The I/O system is built as a series of layers. 

In [26]:
f = open('sample.txt','w')

In [27]:
f

<_io.TextIOWrapper name='sample.txt' mode='w' encoding='cp1252'>

In [28]:
f.buffer

<_io.BufferedWriter name='sample.txt'>

In [29]:
f.buffer.raw

<_io.FileIO name='sample.txt' mode='wb' closefd=True>

io.TextIOWrapper is a text-handling layer that encodes and decodes Unicode, io.BufferedWriter is a buffered I/O layer that handles binary data, and io.FileIO is a raw file representing the low-level file descriptor in the operating system.

## Writing Bytes to a Text File

In [32]:
import sys

sys.stdout.write(b'Hello\n')  # this works fine now...

Hello


## Making Temporary Files and Directories
The tempfile module has a variety of functions for performing this task.