# File I/O

# Pathname manipulation

In [1]:
# Most pathname functions will NOT expand '~"
# use expanduser

import os

ep = os.path.expanduser('~/foo/bar/zap.txt')
ep

'C:\\Users\\reneh/foo/bar/zap.txt'

In [2]:
# get pieces of paths

[os.path.split(ep), os.path.splitext(ep)]

[('C:\\Users\\reneh/foo/bar', 'zap.txt'),
 ('C:\\Users\\reneh/foo/bar/zap', '.txt')]

In [3]:
# put paths together

os.path.join('/Users', 'lstead', 'foo/', 'bar', 'zap.txt')

'/Users\\lstead\\foo/bar\\zap.txt'

In [4]:
# leading /  on foo eliminates components on the left
os.path.join('/Users', 'lstead', '/foo', 'bar', 'zap.txt')

'/foo\\bar\\zap.txt'

# Getting file status

In [7]:
# os.path.exists and os.access reports file status without throwning errors
# os.stat throws an error if the path doesn't exist. 
import os

# similar to touch
path = '/tmp/touch'
open(path, 'w').close()
os.utime(path, None)


def ac(p):
    # can check exists, readable, writeable, executable
    return([ os.access(p, m) for m in [os.F_OK, os.R_OK, os.W_OK, os.X_OK] ])

ac(path)


FileNotFoundError: [Errno 2] No such file or directory: '/tmp/touch'

In [6]:
# last accessed time, last modified time
[os.path.getatime(path), os.path.getmtime(path)]

FileNotFoundError: [WinError 3] The system cannot find the path specified: '/tmp/touch'

In [None]:
# file exists predicate

os.path.exists(path)

In [None]:
[os.path.isfile(path), os.path.isdir(path)]

In [None]:
[os.path.isfile('/tmp'), os.path.isdir('/tmp')]

In [None]:
os.stat(path)

In [None]:
# removes a file, but throws error if it doesn't exist

os.remove(path)
ac(path)

In [None]:
# file is gone

os.path.exists(path)

In [None]:
# stat gets upset and throws an error if the file doesn't exist

os.stat(path)

In [None]:
# Returns list of files and dirs in a directory
# can use isfile and isdir to figure out which is which

os.listdir( os.path.join(os.path.expanduser('~'), 'anaconda'))

# 'walk' and get all the files and dirs under a start dir

In [None]:
g = os.walk('~/anaconda/ssl')

In [None]:
# didn't work - walk doesn't like '~'

next(g)

In [None]:
# returns a generator...

e = os.path.expanduser('~/anaconda/ssl')
print(e)
g = os.walk(e)
g

In [None]:
# nicer than os.listdir() in that files and dirs are in separate lists
# returns (dirpath, dirs in dirpath, files in dir)

next(g)

In [None]:
# descend into 'misc' directory

next(g)

In [None]:
# finished

next(g)

# open function
- used to open files for reading and writing

# Writing files 
- no automatic newlines

In [None]:
# open file, write to file descriptor, close file descriptor
# can be error prone - easy to forget to close. also, if there
# is an error, the close call could be skipped
# not closing file descriptors can cause a server to crash
# 'w' is the 'open mode' - tells 'open' to open the file for writing

path = '/tmp/four.txt'
fd = open(path, 'w')
for e in ['one', 'two', 'three', 'four']:
    fd.write(e + '\n')
fd.close()

# with 
- 'with' is a 'context manager'
- binds return value from open to 'fd'
- 'with' will automatically close the file when the 'with' block is exited, even if by error
- note ':' and indenting defines a statement block over which 'fd' will be bound

In [None]:
with open(path, 'w') as fd:
    for e in ['one', 'two', 'three', 'four']:
        fd.write(e + '\n')

In [None]:
# could do one write with join

with open(path, 'w') as fd:
    fd.write('\n'.join(['one', 'two', 'three', 'four']))

In [None]:
# or write out the string with newlines

with open(path, 'w') as fd:
    fd.write("one\ntwo\nthree\nfour\n")

In [None]:
# can append(open mode 'a') to an existing file

path = '/tmp/four.txt'
with open(path, 'a') as f:
    for l in ['five', 'six']:
        f.write(l + '\n')

# print function output can goto a file

In [None]:
path = '/tmp/print'

with open(path, "w") as f:
    print(1,2,3,4,sep='\n', file=f)

with open(path, 'r') as f:
    print(f.read())

# Reading files - eager
- read the entire file immediately

In [None]:
# eager read - read the entire file into one string
# 'r' tells 'open' to open the file for reading

with open(path, 'r') as fd:    
    print( fd.read())

In [None]:
# eager read - get a list of all the lines 

with open(path,'r') as fd:
    print(fd.readlines())

# Reading files - lazy
- suppose you are looking for a substring in a huge unsorted file of text lines
    - lazy read probably wins
    - don't have to read in entire file before you can start search
    - don't have to allocate memory to hold the whole file
    - once you find the substring, you don't have to read the rest of the file

In [None]:
# read one line at a time 

with open(path, 'r') as fd:
    while True:
        x = fd.readline()
        # returns empty string when finished
        if x == '':
            break;
        print(x)

In [None]:
# note double spacing
# each line in the file has a newline, plus print is adding one
# can turn off the print newline with keyword arg 'end'

with open(path, 'r') as fd:
    while True:
        x = fd.readline()
        # returns empty string when finished
        if x == '':
            break;
        print(x, end='')

In [None]:
fd = open('/tmp/four.txt')
fd

In [None]:
# a file descriptor is an iterator over the file lines

[fd, iter(fd), fd is iter(fd)]

In [None]:
next(fd)

In [None]:
# don't have to finish iterator...

next(fd)

In [None]:
# note with readline and readlines each line has a trailing '\n', 
# which you usually don't want
# use strip() to remove
# can this cause a problem?

'one\n'.strip()

In [None]:
# read N chars at a time

with open(path, 'r')  as f:
    while True:
        s = f.read(3)
        if s == '':
            break;
        print(s)
        

In [None]:
# ... or can finish iterator later on

[next(fd), next(fd), next(fd), next(fd)]

In [None]:
# exhausted, can not be used again

next(fd)

# In memory "files"
- very useful 
- [doc](https://docs.python.org/3.5/library/io.html#io.StringIO)

In [None]:
import io

ios = io.StringIO()

print('one', file=ios)
ios.write('two')

ios.getvalue()

In [None]:
ios = io.StringIO('asdfasdf')

ios.read()

# shutil module 
- has utilities for reading and writing:
    - compressed files - 'gzip', 'bz2' 
    - file archives - 'zip', 'tar', 'hdf5'
- move and copy files
- [doc](https://docs.python.org/3.5/library/shutil.html)