# Files

# tempfile module
- will create a valid unique temporary pathname on any OS

In [None]:
import os

# module for making temp files
import tempfile

# does not create the file, just makes a pathname
tp = tempfile.NamedTemporaryFile().name
tp2 = tempfile.NamedTemporaryFile().name

# os.path.exists(path) - True if file path exists

[tp, tp2, os.path.exists(tp), os.path.exists(tp2)]

# Getting file status

In [None]:
# os.path.exists and os.access reports 
# file status without throwning errors
# os.stat throws an error if the path doesn't exist. 

# this is similar to linux 'touch' command 
# make an empty file

open(tp, 'w').close()

def ac(p):
    # can check exists, readable, writeable, executable
    return([ os.access(p, m) \
    for m in [os.F_OK, os.R_OK, os.W_OK, os.X_OK] ])

ac(tp)


In [None]:
# last accessed time, last modified time

[os.path.getatime(tp), os.path.getmtime(tp)]

In [None]:
# does a path refer to a file or a directory?

[os.path.isfile(tp), os.path.isdir(tp)]

In [None]:
# gets several pieces of info in one call

sr = os.stat(tp)
sr

In [None]:
# get attributes

[sr.st_mode, sr.st_atime]

In [None]:
# removes a file, but raises error if it doesn't exist

os.remove(tp)
ac(tp)

In [None]:
# file is gone

os.path.exists(tp)

In [None]:
# stat gets upset and throws an error if the file doesn't exist

os.stat(tp)

In [None]:
# Returns list of files and dirs in a directory
# can use isfile and isdir to figure out 
# which is which

fds = os.listdir(os.path.expanduser('~/anaconda3'))
fds

# 'walk' - gets all the files and dirs under a start dir
- very easy to use

In [None]:
# returns a generator...

e = os.path.expanduser('~/anaconda3/ssl')
print(e)
g = os.walk(e)
g

In [None]:
# each element from the walk generator 
# is a tuple (dirpath, dirs in dirpath, files in dir)

[tup for tup in g]

# open function
- used to open files for reading and writing

# Writing files 
- no automatic newlines

In [None]:
# open file, write to file descriptor, close file descriptor
# can be error prone - easy to forget to close. also, if there
# is an error, the close call could be skipped
# not closing file descriptors can cause a server to crash
# 'w' is the 'open mode' - tells 'open' to 
# open the file for writing

fd = open(tp, 'w')
for e in ['one', 'two', 'three', 'four']:
    fd.write(e + '\n')
fd.close()

# with 
- 'with' is a 'context manager'
- binds return value from open to 'fd'
- note ':' and indenting defines a statement block over which 'fd' will be bound
- 'with' will automatically close the file when the 'with' block is exited, even if by error

In [None]:
with open(tp, 'w') as fd:
    for e in ['one', 'two', 'three', 'four']:
        fd.write(e + '\n')

In [None]:
# could do one write with join

with open(tp, 'w') as fd:
    fd.write('\n'.join(['one', 'two', 'three', 'four']))

In [None]:
# or write out the string with newlines

with open(tp, 'w') as fd:
    fd.write("one\ntwo\nthree\nfour\n")

In [None]:
# before append

os.stat(tp)

In [None]:
# can append(open mode 'a') to an existing file

with open(tp, 'a') as f:
    for l in ['five', 'six']:
        f.write(l + '\n')

In [None]:
# file is longer now

os.stat(tp)

# print function output can goto a file

In [None]:

with open(tp2, "w") as f:
    print(1,2,3,4, sep='\n', file=f)

with open(tp2, 'r') as f:
    print(f.read())

# Reading files - eager
- read the entire file immediately

In [None]:
# eager read - read the entire file into one string
# 'r' tells 'open' to open the file for reading

with open(tp, 'r') as fd:    
    print(fd.read())

In [None]:
# eager read - get a list of all the lines 

with open(tp,'r') as fd:
    print(fd.readlines())

# Reading files - lazy
- suppose you are looking for a substring in a huge unsorted file of text lines
    - lazy read probably wins
    - don't have to read in entire file before you can start searching
    - don't have to allocate memory to hold the whole file
    - once you find the substring, you don't have to read the rest of the file

In [None]:
# read one line at a time 

with open(tp, 'r') as fd:
    while True:
        x = fd.readline()
        # returns empty string when finished
        if x == '':
            break;
        print(x)

In [None]:
# note double spacing
# each line in the file has a newline,  
# plus print is adding one
# can turn off the print newline 
# with keyword arg 'end'

with open(tp, 'r') as fd:
    while True:
        x = fd.readline()
        # returns empty string when finished
        if x == '':
            break;
        print(x, end='')

In [None]:
fd = open(tp, 'r')
fd

In [None]:
# a file descriptor is an iterator 
# over the file lines

[fd, iter(fd), fd is iter(fd)]

In [None]:
next(fd)

In [None]:
# don't have to finish iterator...

next(fd)

In [None]:
# note with readline and readlines 
# each line has a trailing '\n', 
# which you usually don't want
# use strip() to remove
# can this cause a problem?

'one\n'.strip()

In [None]:
# read N chars at a time

with open(tp, 'r')  as f:
    while True:
        s = f.read(3)
        if s == '':
            break;
        print(s)
        

In [None]:
# ... or can finish iterator later on

[next(fd), next(fd), next(fd), next(fd)]

# Can do I/O in unicode or binary
- 'open' defaults to 'str' (unicode)
- pass 'b' flag to 'open' for 'bytes'(binary)


In [None]:
uni = '\U00002119\u01b4\u2602\u210c\xf8\u1f24'

utf8, utf16, utf32 = [uni.encode(et) \
                      for et in \
                      ['utf-8', 'utf-16', 'utf-32']]

[uni, utf8, utf16, utf32]

In [None]:
# won't work - file stream expects a
# 'str' by default, but utf32 is type 'bytes'

import tempfile

path = tempfile.NamedTemporaryFile().name

with open(path, "w") as f:
    f.write(utf32)

In [None]:
# make a binary stream by adding 'b' flag to 'open'

with open(path, 'bw') as f:
    f.write(utf32)

In [None]:
#  reading in 'str' mode defaults to utf-8, 
# but the file we wrote is utf-32
# so, this read fails

# but, somethimes if you give open the 
# wrong encoding, it will read
# w/o error and give you garbage!

with open(path, "r") as f:
    print(f.read())

In [None]:
# tell 'open' the right unicode encoding

with open(path, "r" , encoding='utf-32') as f:
    print(f.read())

In [None]:
# can read file bytes

with open(path, "rb") as f:
    b = f.read()
b

In [None]:
utf32

# In memory "files"
- very useful 
- [doc](https://docs.python.org/3.5/library/io.html#io.StringIO)

In [None]:
import io

ios = io.StringIO()

print('one', file=ios)
ios.write('two')

ios.getvalue()

In [None]:
ios = io.StringIO('asdfasdf')

ios.read()


# shutil module 
- move, copy, delete file trees
- [doc](https://docs.python.org/3.5/library/shutil.html)

# glob - linux style filename matching
- [doc](https://docs.python.org/3.5/library/glob.html)

# modules that R/W archive formats, like zip and tar
- [doc](https://docs.python.org/3.5/library/archiving.html)
