# File Handling in Python

In [10]:
f = open('new_file.txt', 'w')
f.write('Hello')
f.write('World!')
f.close()

In [11]:
f = open('new_file.txt')
f.read()


'HelloWorld!'

In [12]:
f.close()

Adding new lines with '\n'

'r'	open for reading (default)    
'w'	open for writing, truncating the file first    
'x'	open for exclusive creation, failing if the file already exists    
'a'	open for writing, appending to the end of the file if it exists    
'b'	binary mode    
't'	text mode (default)   
'+'	open a disk file for updating (reading and writing)    

In [13]:
# Write new lines
f = open('new_file.txt', 'w')
f.write('Hello\n')
f.write('World!')
f.close()

In [14]:
f = open('new_file.txt')
f.read()

'Hello\nWorld!'

In [15]:
f.close()

## A better way to open/close files. 

In [16]:
with open('new_file.txt', 'w') as f:
    f.write('Hello\n')
    f.write('World!')

In [17]:
with open('new_file.txt') as f:
    print(f.read())

Hello
World!


### Looping over a list and write to file

In [18]:
# Writing a list to a file
words = ['Hello', 'Word!', 'Goodbye']
with open('list_file.txt', 'w') as f:
    for word in words:
        f.write(word)
        
with open('list_file.txt') as f:
    print(f.read())

HelloWord!Goodbye


In [19]:
# Writing a list to a file as csv
words = ['Hello', 'Word!', 'Goodbye']
with open('list_file.txt', 'w') as f:
    f.write(",".join(words))

with open('list_file.txt') as f:
    print(f.read())

Hello,Word!,Goodbye


In [20]:
# Writing a list to a file with new line
words = ['Hello', 'Word!', 'Goodbye']
with open('list_file.txt', 'w') as f:
    for word in words:
        # Add a new line character
        f.write("{}\n".format(word))
        
with open('list_file.txt') as f:
    print(f.read())

Hello
Word!
Goodbye



In [21]:
# Reading a file line by line
with open('list_file.txt') as f:
    for line in f:
        print(line, end='')

Hello
Word!
Goodbye


In [22]:
# Only print out lines that start with H
with open('list_file.txt') as f:
    for line in f:
        if line.startswith('H'):
            print(line, end='')

Hello


In [23]:
# writing a list of lists (2d array) to a file
presidents = [
    ['first', 'last', 'number'],
    ['Donald', 'Trump', 45],
    ['Barack', 'Obama', 44],
    ['George', 'Bush', 43],
    ['Bill', 'Clinton', 42]
]

with open('presidents', 'w') as f:
    for president in presidents:
        f.write(','.join(president))
    

TypeError: sequence item 2: expected str instance, int found

In [24]:
# writing a list of lists (2d array) to a file
presidents = [
    ['first', 'last', 'number'],
    ['Donald', 'Trump', 45],
    ['Barack', 'Obama', 44],
    ['George', 'Bush', 43],
    ['Bill', 'Clinton', 42]
]

with open('presidents.txt', 'w') as f:
    for president in presidents:
        # Convert third element to a str
        president[2] = str(president[2])
        f.write(','.join(president) + '\n')
    
with open('presidents.txt') as f:
    print(f.read())

first,last,number
Donald,Trump,45
Barack,Obama,44
George,Bush,43
Bill,Clinton,42



In [25]:
# Read file into a list
with open('presidents.txt') as f:
    presidents = list(f)
    
print(presidents)

# Read file into a list
with open('presidents.txt') as f:
    presidents = f.readlines()
    
print(presidents)

['first,last,number\n', 'Donald,Trump,45\n', 'Barack,Obama,44\n', 'George,Bush,43\n', 'Bill,Clinton,42\n']
['first,last,number\n', 'Donald,Trump,45\n', 'Barack,Obama,44\n', 'George,Bush,43\n', 'Bill,Clinton,42\n']


In [26]:
# Going back to the beginning of a file using previous example
# Read file into a list
with open('presidents.txt') as f:
    print(list(f))
    print(f.readlines()) # we already read the entire contens
    
with open('presidents.txt') as f:
    print(list(f))
    f.seek(0) # go back to the 0 byte offset
    print(f.readlines())

['first,last,number\n', 'Donald,Trump,45\n', 'Barack,Obama,44\n', 'George,Bush,43\n', 'Bill,Clinton,42\n']
[]
['first,last,number\n', 'Donald,Trump,45\n', 'Barack,Obama,44\n', 'George,Bush,43\n', 'Bill,Clinton,42\n']
['first,last,number\n', 'Donald,Trump,45\n', 'Barack,Obama,44\n', 'George,Bush,43\n', 'Bill,Clinton,42\n']


### CSV module

In [27]:
import csv

In [28]:
with open('presidents.txt') as f:
    reader = csv.reader(f)
    for row in reader:
        print(row)

['first', 'last', 'number']
['Donald', 'Trump', '45']
['Barack', 'Obama', '44']
['George', 'Bush', '43']
['Bill', 'Clinton', '42']


In [29]:
# Line number
with open('presidents.txt') as f:
    reader = csv.reader(f)
    for row in reader:
        print((reader.line_num, row))

(1, ['first', 'last', 'number'])
(2, ['Donald', 'Trump', '45'])
(3, ['Barack', 'Obama', '44'])
(4, ['George', 'Bush', '43'])
(5, ['Bill', 'Clinton', '42'])


In [30]:
# Writing csv using csv module
presidents = [
    ['first', 'last', 'number'],
    ['Donald', 'Trump', 45],
    ['Barack', 'Obama', 44],
    ['George', 'Bush', 43],
    ['Bill', 'Clinton', 42]
]

with open('presidents.txt', 'w') as f:
    writer = csv.writer(f)
    for president in presidents:
        writer.writerow(president)

with open('presidents.txt') as f:
    print(f.read())    

first,last,number
Donald,Trump,45
Barack,Obama,44
George,Bush,43
Bill,Clinton,42



In [31]:
# Appending to a file and using writerows
presidents = [
    ['George', 'Bush', 41],
    ['Ronald', 'Reagan', 40],
    ['Jimmy', 'Carter', 39]
]

with open('presidents.txt', 'a') as f:
    writer = csv.writer(f)
    writer.writerows(presidents)

with open('presidents.txt') as f:
    print(f.read()) 

first,last,number
Donald,Trump,45
Barack,Obama,44
George,Bush,43
Bill,Clinton,42
George,Bush,41
Ronald,Reagan,40
Jimmy,Carter,39



In [32]:
# Picking a different delimiter
presidents = [
    ['first', 'last', 'number'],
    ['Donald', 'Trump', 45],
    ['Barack', 'Obama', 44],
    ['George', 'Bush', 43],
    ['Bill', 'Clinton', 42]
]

with open('presidents.txt', 'w') as f:
    writer = csv.writer(f, delimiter="|")
    for president in presidents:
        writer.writerow(president)

with open('presidents.txt') as f:
    print(f.read())    

first|last|number
Donald|Trump|45
Barack|Obama|44
George|Bush|43
Bill|Clinton|42



### Dictionaries and csv

In [33]:
with open('presidents.txt') as f:
    reader = csv.DictReader(f, delimiter='|')
    for row in reader:
        print(row['first'] + " " + row['last'])
    
        

Donald Trump
Barack Obama
George Bush
Bill Clinton


In [41]:
# writing out a list of dictionaries, no header
presidents = [
    
    {'first': 'Donald', 'last': 'Trump'},
    {'first': 'Barack', 'last': 'Obama'},
    {'first': 'George', 'last': 'Bush'},
    {'first': 'Bill', 'last': 'Clinton'},

]

header = ['first', 'last']
with open('presidents.txt', 'w') as f:
    writer = csv.DictWriter(f, fieldnames=header)
    for row in presidents:
        writer.writerow(row)

   
with open('presidents.txt') as f:
    print(f.read())    
        

Donald,Trump
Barack,Obama
George,Bush
Bill,Clinton



In [35]:
# writing out a list of dictionaries
# With header
presidents = [
    
    {'first': 'Donald', 'last': 'Trump'},
    {'first': 'Barack', 'last': 'Obama'},
    {'first': 'George', 'last': 'Bush'},
    {'first': 'Bill', 'last': 'Clinton'},

]

header = ['first', 'last']
with open('presidents.txt', 'w') as f:
    writer = csv.DictWriter(f, fieldnames=header)
    writer.writeheader()
    for row in presidents:
        writer.writerow(row)

   
with open('presidents.txt') as f:
    print(f.read())  

first,last
Donald,Trump
Barack,Obama
George,Bush
Bill,Clinton



In [36]:
# writing out a list of dictionaries
# Ignore fields when writing out
presidents = [
    
    {'first': 'Donald', 'last': 'Trump'},
    {'first': 'Barack', 'last': 'Obama'},
    {'first': 'George', 'last': 'Bush'},
    {'first': 'Bill', 'last': 'Clinton'},

]

header = ['first']
with open('presidents.txt', 'w') as f:
    writer = csv.DictWriter(f, fieldnames=header)
    writer.writeheader()
    for row in presidents:
        writer.writerow(row)

   
with open('presidents.txt') as f:
    print(f.read())  

ValueError: dict contains fields not in fieldnames: 'last'

In [37]:
header = ['first']
with open('presidents.txt', 'w') as f:
    writer = csv.DictWriter(f, fieldnames=header, extrasaction='ignore')
    writer.writeheader()
    for row in presidents:
        writer.writerow(row)

with open('presidents.txt') as f:
    print(f.read())  

first
Donald
Barack
George
Bill



In [38]:
#  Guess a file

presidents = [
    ['first', 'last', 'number'],
    ['Donald', 'Trump', 45],
    ['Barack', 'Obama', 44],
    ['George', 'Bush', 43],
    ['Bill', 'Clinton', 42]
]

with open('presidents.txt', 'w') as f:
    writer = csv.writer(f, delimiter="|")
    for president in presidents:
        writer.writerow(president)
        
with open('presidents.txt') as f:
    dialect = csv.Sniffer().sniff(f.read(256))
    f.seek(0)
    reader = csv.reader(f, dialect)
    for row in reader:
        print(row)

['first', 'last', 'number']
['Donald', 'Trump', '45']
['Barack', 'Obama', '44']
['George', 'Bush', '43']
['Bill', 'Clinton', '42']


In [40]:
# Compress an existing file
import gzip
import shutil
with open('presidents.txt', 'rb') as f_in:
    with gzip.open('presidents.txt.gz', 'wb') as f_out:
        shutil.copyfileobj(f_in, f_out)

In [42]:
# Read compressed file
import gzip
with gzip.open('presidents.txt.gz', 'rb') as f:
    for row in f:
        print(row)

print("\n")
with gzip.open('presidents.txt.gz', 'rb') as f:
    for row in f:
        print(row.decode(), end="")

b'first|last|number\r\n'
b'Donald|Trump|45\r\n'
b'Barack|Obama|44\r\n'
b'George|Bush|43\r\n'
b'Bill|Clinton|42\r\n'


first|last|number
Donald|Trump|45
Barack|Obama|44
George|Bush|43
Bill|Clinton|42


#### Exceptions
* EOFError
* OSError
* FileExistsError
* FileNotFoundError
