In [1]:
# Change directory to VSCode workspace root so that relative path loads work correctly. Turn this addition off with the DataScience.changeDirOnImportExport setting
# ms-python.python added
import os
try:
	os.chdir(os.path.join(os.getcwd(), '../../python-tools'))
	print(os.getcwd())
except:
	pass


 ### 1. Read and write text data
 * Use the `open()` function with mode `rt` to read a text file.

In [2]:
# Read the entire file as a single string
with open('data/ai-wiki.txt', 'rt') as f:
    data = f.read()


In [3]:
# Iterate over the lines of the file
with open('data/ai-wiki.txt', 'rt') as f:
    for line in f:
        # process line
        line.split()


In [4]:
# Write chunks of text data
# with open('somefile.txt', 'wt') as f:
#     f.write(text1)
#     f.write(text2)


In [5]:
# Redirected print statement
# with open('somefile.txt', 'wt') as f:
#     print(line1, file=f)
#     print(line2, file=f)
#     ...


 ### 2. Printing to a file
 * use __file__ keyword argument to __print()__

In [6]:
# with open('somefile.txt', 'rt') as f:
#     print('Hello World!', file=f)


 ### 3. Printing with a different separator or line ending

In [7]:
print('SBUX', 50, 91.5)


SBUX 50 91.5


In [8]:
print('SBUX', 50, 91.5, sep=',')



SBUX,50,91.5


In [9]:
print('SBUX', 50, 91.5, sep=',', end='!!\n')




SBUX,50,91.5!!


In [10]:
#use of the end argument is also how you suppress the output of newlines in output.
for i in range(5):
    print(i)


0
1
2
3
4


In [11]:
for i in range(5):
    print(i, end=' ')


0 1 2 3 4

 ### 4. Performing i/o operations on a string
 * Use the `io.StringIO()` and `io.BytesIO()` classes to create file-like objects that operate on string data.

In [12]:
import io


In [13]:
s = io.StringIO()
s.write('Hello World\n')

12

In [14]:
print('This is a test', file=s)


In [15]:
# Get all of the data written so far
s.getvalue()


'Hello World\nThis is a test\n'

In [16]:
# Wrap a file interface around an existing string
s = io.StringIO('Hello\nWorld\n')
s.read(4)


'Hell'

In [17]:
s.read()


'o\nWorld\n'

In [18]:
# The io.StringIO class should only be used for text.
# If you are operating with binary data, use the io.BytesIO class instead.
s_bit = io.BytesIO()
s_bit.write(b'binary data')
s_bit.getvalue()


b'binary data'

 ### 5. Read and writing compressed datafiles
 *  Both `gzip`and `bz2` provides an alternative implementation of open() that can be used for this purpose.

In [19]:
import gzip
import bz2


In [20]:
# gzip compression

# with gzip.open('somefile.gz', 'rt') as f:
#     text = f.read()

# with gzip.open('somefile.gz', 'wt') as f:
#     f.write(text)


In [21]:
# bz2 compression

# with bz2.open('somefile.bz2', 'rt') as f:
#     text = f.read()

# with bz2.open('somefile.bz2', 'wt') as f:
#     f.write(text)


### 6. Iterate over fixed-sized records
* use the `iter()` function and `functools.partial()`

In [22]:
from functools import partial

```
RECORD_SIZE = 32

with open('somefile.data', 'rb') as f:
    records = iter(partial(f.read, RECORD_SIZE), b'')
    for r in records:
        ...
```

### 7. Getting a directory listing
* use the `os.listdir()` function to obtain a list of files in a directory

In [23]:
import os

names = os.listdir('data')
names

['ai-wiki.txt',
 'avocado.csv',
 'billboard.csv',
 'billboard_ratings.csv',
 'billboard_songs.csv',
 'corpus.txt',
 'country_timeseries.csv',
 'gapminder.tsv',
 'pew.csv',
 'weather.csv']

In [24]:
import os.path


In [25]:
# get all regular files
names = [name for name in os.listdir('data') 
        if os.path.isfile(os.path.join('data', name))]

In [26]:
# get all dirs
dirnames = [name for name in os.listdir('data')
            if os.path.isdir(os.path.join('data', name))]


In [27]:
# the startswith() and endwith() methods of strings can be useful for filtering the contents
txtfiles = [name for name in os.listdir('data') if name.endswith('.txt')]
txtfiles

['ai-wiki.txt', 'corpus.txt']

In [28]:
from fnmatch import fnmatch
txtfiles = [name for name in os.listdir('data') if fnmatch(name, '*.txt')]

In [29]:
# for filename matching, use the glob or fnmatch module instead.
import glob
txtfiles = glob.glob('data/*.txt')

In [30]:
# get file sizes and modification dates
name_sz_date = [(name, os.path.getsize(name), os.path.getmtime(name))
                for name in txtfiles]

for name, size, mtime in name_sz_date:
    print(name, size, mtime)

data\ai-wiki.txt 5777 1570470768.1134496
data\corpus.txt 4122838 1569871470.9051304


In [31]:
# alternative: get file metadata
file_metadata = [(name, os.stat(name)) for name in txtfiles]
for name, meta in file_metadata:
    print(name, meta)

data\ai-wiki.txt os.stat_result(st_mode=33206, st_ino=68679894317408831, st_dev=779648, st_nlink=1, st_uid=0, st_gid=0, st_size=5777, st_atime=1570470768, st_mtime=1570470768, st_ctime=1570470768)
data\corpus.txt os.stat_result(st_mode=33206, st_ino=3377699721499073, st_dev=779648, st_nlink=1, st_uid=0, st_gid=0, st_size=4122838, st_atime=1569871470, st_mtime=1569871470, st_ctime=1569871470)
