In [None]:
# Working with files in Python

# Python has basic file reading and writing capabilities.
# These can be useful for scripting or data exploration.

# The open() function has 4 modes
# "r" - Read - Default value. Opens a file for reading, error if the file does not exist
# "a" - Append - Opens a file for appending, creates the file if it does not exist
# "w" - Write - Opens a file for writing, creates the file if it does not exist
# "x" - Create - Creates the specified file, returns an error if the file exists

# We can also specify two ways to handle the file
# "t" - Text - Default value. Text mode
# "b" - Binary - Binary mode (e.g. images)

# If we don't pass any arguments, the default is 'rt'

In [1]:
# If we are working in google drive we need to import the module and mount our google drive.

from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [6]:
# Once we have mounted our drive we can access files we have stored.
# If we need to access a shared drive we can create a shortcut for ease of access.

file_path = "/content/drive/MyDrive/Data Engineering Course/Python/data/Pride_and_Prejudice.txt"

# We can open the file and assign it to a variable
# When we use this method we need to make sure we close the file when we are done.
f = open(file_path, 'rt')

# When we use the read() method the entire file is read.
book = f.read()
print(book[:51])

﻿The Project Gutenberg eBook of Pride and Prejudice


In [7]:
# We can also specify how many characters to return in the read method.
# Since we already read the file we need to reset to the starting position.
f.seek(0)
print(f.read(22))
f.close()

﻿The Project Gutenberg


In [8]:
# Files are often separated by lines
# We can use the readlines() method to return an array of lines
# Python is smart and will automatically detect the appropriate line endings
# But we need to know that UNIX and Windows(DOS) file line endings differ.
# DOS - (\r\n)
# UNIX (\n)

# If we open a file this way, it is automatically closed afterwards.
with open(file_path) as f:
  lines = f.readlines()

print(lines[0])
print(lines[2])

﻿The Project Gutenberg eBook of Pride and Prejudice, by Jane Austen

This eBook is for the use of anyone anywhere in the United States and



In [None]:
# Now lets try writing a file.
# All we need to do is choose either 'a' append or 'w' write
# 'w' will overwrite any existing content.

out_file = '/content/drive/MyDrive/Data Engineering Course/Python/data/sample.txt'
line1 = 'This is some file content!\n'
line2 = ['This is some more file content.\n', 'And even more!\n']
line3 = 'This will be the only line in the file'

with open(out_file, 'w') as f:
  f.write(line1)

with open(out_file, 'a') as f:
  f.writelines(line2)

with open(out_file) as f:
  print(f.read())

with open(out_file, 'w') as f:
  f.write(line3)

with open(out_file) as f:
  print(f.read())

This is some file content!
This is some more file content.
And even more!

This will be the only line in the file
