5.1. Reading and Writing Text Data

5.2. Printing to a File

In [1]:
with open('data/somefile.txt', 'rt') as f:
    data = f.read()

print(data)

Lorem ipsum dolor sit amet, consectetur adipiscing 
elit, sed do eiusmod tempor incididunt ut labore et dolore magna 
aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco 
laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor 
in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla 
pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa 
qui officia deserunt mollit anim id est laborum.


In [7]:
with open('data/somefile.txt', 'rt') as f:
    for line in f:
        print(line)


Lorem ipsum dolor sit amet, consectetur adipiscing 

elit, sed do eiusmod tempor incididunt ut labore et dolore magna 

aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco 

laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor 

in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla 

pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa 

qui officia deserunt mollit anim id est laborum.


In [3]:
with open('data/wrteToSomeFile.txt', 'wt') as f:
    f.write('abc')

In [4]:
line1 = 'abc'
line2 = 'def'
with open('data/wrteToSomeFile.txt', 'wt') as f:
    print(line1, file=f)
    print(line2, file=f)

In [5]:
f = open('data/somefile.txt', 'rt')
data = f.read()
print(data)
f.close()

Lorem ipsum dolor sit amet, consectetur adipiscing 
elit, sed do eiusmod tempor incididunt ut labore et dolore magna 
aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco 
laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor 
in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla 
pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa 
qui officia deserunt mollit anim id est laborum.


5.3. Printing with a Different Separator or Line Ending

In [8]:
print('ACME', 50, 91.5)

ACME 50 91.5


In [9]:
print('ACME', 50, 91.5, sep=',')

ACME,50,91.5


In [11]:
print('ACME', 50, 91.5, sep=',', end='!!\n\n')

ACME,50,91.5!!



In [13]:
print(','.join(['ACME', '50', '91.5']))

ACME,50,91.5


In [14]:
row = ('ACME', '50', '91.5')
print(*row, sep=',')

ACME,50,91.5


5.4. Reading and Writing Binary Data

In [15]:
with open('data/somefile.bin', 'wb') as f:
    f.write(b'Hello world')

In [16]:
with open('data/somefile.bin', 'rb') as f:
    data = f.read()

print(data)

b'Hello world'


5.5. Writing to a File That Doesn’t Already Exist

In [17]:
with open('data/somefile', 'xt') as f:
    f.write('Hello\n')

In [18]:
with open('data/somefile', 'xt') as f:
    f.write('Hello\n')

FileExistsError: [Errno 17] File exists: 'data/somefile'

In [19]:
import os
file = 'data/somefile'
if not os.path.exists(file):
    with open(file, 'wt') as f:
        f.write('Hello\n')
else:
    print('File already exists!')

File already exists!


5.6. Performing I/O Operations on a String

In [22]:
import io
s = io.StringIO()
s.write('Hello World\n')

12

In [23]:
print('This is a test', file=s)

In [24]:
s.getvalue()

'Hello World\nThis is a test\n'

In [26]:
s = io.StringIO('Hello\nWorld\n')
s.read(4)

'Hell'

In [27]:
s.read()

'o\nWorld\n'

In [28]:
s = io.BytesIO()
s.write(b'binary data')
s.getvalue()

b'binary data'

5.7. Reading and Writing Compressed Datafiles

In [30]:
import gzip
file = 'data/somefile.gz'
text = 'Hello\nWorld\n'
with gzip.open(file, 'wt') as f:
    f.write(text)

with gzip.open(file, 'rt') as f:
    data = f.read()
    print(data)

Hello
World



In [31]:
import bz2
file = 'data/somefile.bz2'
text = 'Hello\nWorld\n'
with bz2.open(file, 'wt') as f:
    f.write(text)

with bz2.open(file, 'rt') as f:
    data = f.read()
    print(data)

Hello
World



5.8. Iterating Over Fixed-Sized Records

In [40]:
from functools import partial

RECORD_SIZE = 20

with open('data/somefile.txt', 'rt') as f:
    records = iter(partial(f.read, RECORD_SIZE), '')
    for r in records:
        print(r)

Lorem ipsum dolor si
t amet, consectetur 
adipiscing 
elit, se
d do eiusmod tempor 
incididunt ut labore
 et dolore magna 
al
iqua. Ut enim ad min
im veniam, quis nost
rud exercitation ull
amco 
laboris nisi u
t aliquip ex ea comm
odo consequat. Duis 
aute irure dolor 
in
 reprehenderit in vo
luptate velit esse c
illum dolore eu fugi
at nulla 
pariatur. 
Excepteur sint occae
cat cupidatat non pr
oident, sunt in culp
a 
qui officia deser
unt mollit anim id e
st laborum.


5.9. Reading Binary Data into a Mutable Buffer

In [45]:
import os.path

def read_into_buffer(filename):
    buf = bytearray(os.path.getsize(filename))
    with open(filename, 'rb') as f:
        f.readinto(buf)
    return buf

In [43]:
file = 'data/sample.bin'
with open(file, 'wb') as f:
    f.write(b'Hello World')

In [46]:
buf = read_into_buffer(file)
buf

bytearray(b'Hello World')

In [48]:
with open('data/newsample.bin', 'wb') as f:
    f.write(buf)

5.10. Memory Mapping Binary Files

In [56]:
import os
import mmap
def memory_map(filename, access=mmap.ACCESS_WRITE):
    size = os.path.getsize(filename)
    fd = os.open(filename, os.O_RDWR)
    return mmap.mmap(fd, size, access=access)

In [54]:
size = 1000000
file = 'data/data'
with open(file, 'wb') as f:
    f.seek(size - 1)
    f.write(b'\x00')

In [57]:
m = memory_map(file)

In [58]:
len(m)

1000000

In [59]:
m[0:10]

b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'

In [60]:
m[0]

0

In [62]:
m[0:11] = b'Hello World'
m.close()

In [64]:
with open(file, 'rb') as f:
    print(f.read(50))

b'Hello World\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'


In [65]:
with memory_map(file) as m:
    print(len(m))
    print(m[0:10])

1000000
b'Hello Worl'


In [66]:
m.closed

True

5.11. Manipulating Pathnames

In [67]:
import os
path = '/Users/beazley/Data/data.csv'

In [68]:
os.path.basename(path)

'data.csv'

In [69]:
os.path.dirname(path)

'/Users/beazley/Data'

In [70]:
os.path.join('tmp', 'data', os.path.basename(path))

'tmp/data/data.csv'

5.12. Testing for the Existence of a File

In [None]:
import os

In [71]:
os.path.exists('/etc/passwd')

True

In [72]:
os.path.exists('/tmp/spam')

False

In [73]:
os.path.isfile('/etc/passwd')

True

In [74]:
os.path.isdir('/etc/passwd')

False

In [75]:
os.path.islink('/usr/local/bin/python3')

True

In [76]:
os.path.realpath('/usr/local/bin/python3')

'/usr/local/Cellar/python3/3.6.4_2/Frameworks/Python.framework/Versions/3.6/bin/python3.6'

In [77]:
os.path.getsize('/etc/passwd')

6774

In [78]:
os.path.getmtime('/etc/passwd')

1501033047.0

In [79]:
import time
time.ctime(os.path.getmtime('/etc/passwd'))

'Tue Jul 25 18:37:27 2017'

5.13. Getting a Directory Listing

In [81]:
import os
directory = '.'
names = [name for name in os.listdir(directory) 
         if os.path.isfile(os.path.join(directory, name))]
names

['.DS_Store',
 '03-Numbers-Dates-and-Times.ipynb',
 '04-Iterators-and-Generators.ipynb',
 '.gitignore',
 '01-Data-Structures-and-Algorithms.ipynb',
 '02-String-and-Text.ipynb',
 '05-Files-and-IO.ipynb']

In [82]:
dir_names = [name for name in os.listdir(directory) 
         if os.path.isdir(os.path.join(directory, name))]
dir_names

['www', '.ipynb_checkpoints', 'data']

In [87]:
jupyter_files = [name for name in os.listdir(directory)
                if os.path.isfile(os.path.join(directory, name))
                                 and name.endswith('.ipynb')]
jupyter_files

['03-Numbers-Dates-and-Times.ipynb',
 '04-Iterators-and-Generators.ipynb',
 '01-Data-Structures-and-Algorithms.ipynb',
 '02-String-and-Text.ipynb',
 '05-Files-and-IO.ipynb']

In [88]:
import glob
jupyter_files = glob.glob('./*.ipynb')
jupyter_files

['./03-Numbers-Dates-and-Times.ipynb',
 './04-Iterators-and-Generators.ipynb',
 './01-Data-Structures-and-Algorithms.ipynb',
 './02-String-and-Text.ipynb',
 './05-Files-and-IO.ipynb']

In [89]:
from fnmatch import fnmatch
jupyter_files = [name for name in os.listdir(directory)
                if fnmatch(name, '*.ipynb')]
jupyter_files

['03-Numbers-Dates-and-Times.ipynb',
 '04-Iterators-and-Generators.ipynb',
 '01-Data-Structures-and-Algorithms.ipynb',
 '02-String-and-Text.ipynb',
 '05-Files-and-IO.ipynb']

5.14. Bypassing Filename Encoding

In [91]:
import sys
sys.getfilesystemencoding()

'utf-8'

In [92]:
with open('data/jalape\xf1o.txt', 'w') as f:
    f.write('Spicy!')

In [93]:
import os
os.listdir('data')

['newsample.bin',
 'somefile.bin',
 'somefile.txt',
 'somefile.bz2',
 'somefile',
 'jalapeño.txt',
 'wrteToSomeFile.txt',
 '.ipynb_checkpoints',
 'somefile.gz',
 'data',
 'sample.bin']

In [94]:
os.listdir(b'data/')

[b'newsample.bin',
 b'somefile.bin',
 b'somefile.txt',
 b'somefile.bz2',
 b'somefile',
 b'jalape\xc3\xb1o.txt',
 b'wrteToSomeFile.txt',
 b'.ipynb_checkpoints',
 b'somefile.gz',
 b'data',
 b'sample.bin']

In [96]:
with open(b'data/jalape\xc3\xb1o.txt') as f:
    print(f.read())

Spicy!
