<a href="https://colab.research.google.com/github/nceder/qpb4e/blob/main/code/Chapter%2012/Chapter_12.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Using the filesystem

## 12.2.2 The current working directory

In [None]:
import os
os.getcwd()

'/content'

In [None]:
os.listdir(os.curdir)

['.config', 'sample_data']

In [None]:
os.chdir('sample_data')    #A
os.getcwd()

'/content/sample_data'

## 12.2.3 Accessing directories with pathlib

In [None]:
import pathlib
cur_path = pathlib.Path()
cur_path.cwd()

PosixPath('/content/sample_data')

## 12.2.4 Manipulating pathnames

In [None]:
import os
print(os.path.join('bin', 'utils', 'disktools'))

bin/utils/disktools


In [None]:
path1 = os.path.join('mydir', 'bin');
path2 = os.path.join('utils', 'disktools', 'chkdisk')
print(os.path.join(path1, path2))

mydir/bin/utils/disktools/chkdisk


In [None]:
import os
print(os.path.split(os.path.join('some', 'directory', 'path')))

('some/directory', 'path')


In [None]:
import os
os.path.basename(os.path.join('some', 'directory', 'path.jpg'))

'path.jpg'

In [None]:
os.path.dirname(os.path.join('some', 'directory', 'path.jpg'))

'some/directory'

In [None]:
os.path.splitext(os.path.join('some', 'directory', 'path.jpg'))

('some/directory/path', '.jpg')

In [None]:
import os
os.path.expandvars('$HOME\\temp')

'C:\\Users\\administrator\\personal\\temp'


## 12.2.5 Manipulating pathnames with pathlib

In [None]:
from pathlib import Path
cur_path = Path()
print(cur_path.joinpath('bin', 'utils', 'disktools'))

bin/utils/disktools


In [None]:
cur_path / 'bin' / 'utils' / 'disktools'

PosixPath('bin/utils/disktools')

In [None]:
cur_path = Path()
print(cur_path.joinpath('bin', 'utils', 'disktools'))

bin/utils/disktools


In [None]:
a_path = Path('bin/utils/disktools')
print(a_path.parts)

('bin', 'utils', 'disktools')


In [None]:
a_path = Path('some', 'directory', 'path.jpg')
a_path.name

'path.jpg'

In [None]:
print(a_path.parent)

some/directory


In [None]:
a_path.suffix

'.jpg'

##12.2.6 Useful constants and functions

In [None]:
import os
os.name

'posix'

# 12.3 Getting information about files

In [None]:
import os
os.path.exists('/content/sample_data/')

True

In [None]:
os.path.exists('/content/sample_data/README.md')

True

In [None]:
os.path.exists('/content/sample_data/ljsljkflkjs')

False

In [None]:
os.path.isdir('/content/sample_data/')

True

In [None]:
os.path.isdir('/content/sample_data/README.md')

False

In [None]:
os.path.isfile('/content/sample_data/README.md')

True

## 12.3.1 Getting information about files with scandir

In [None]:
with os.scandir("..") as my_dir:
    for entry in my_dir:
        print(entry.name, entry.is_file())


.config False
sample_data False


# 12.4 More filesystem operations

In [None]:
os.chdir('/content/sample_data')
os.listdir(os.curdir)

['anscombe.json',
 'README.md',
 'california_housing_train.csv',
 'california_housing_test.csv',
 'mnist_test.csv',
 'mnist_train_small.csv']

In [None]:
import glob
glob.glob("*")

['anscombe.json',
 'README.md',
 'california_housing_train.csv',
 'california_housing_test.csv',
 'mnist_test.csv',
 'mnist_train_small.csv']

In [None]:
glob.glob("*json")

['anscombe.json']

In [None]:
! touch a.tmp 1.tmp 2.tmp

In [None]:
glob.glob("?.tmp")

['1.tmp', '2.tmp', 'a.tmp']

In [None]:
glob.glob("[0-9].tmp")

['1.tmp', '2.tmp']

In [None]:
os.rename('README.md', 'README.md.old')
os.listdir(os.curdir)

['anscombe.json',
 'README.md.old',
 '1.tmp',
 '2.tmp',
 'a.tmp',
 '2.tm',
 '2.tme',
 'california_housing_train.csv',
 'california_housing_test.csv',
 'mnist_test.csv',
 'mnist_train_small.csv']

In [None]:
os.remove('a.tmp')
os.listdir(os.curdir)

['anscombe.json',
 '.ipynb_checkpoints',
 'README.md.old',
 '1.tmp',
 '2.tmp',
 'california_housing_train.csv',
 'california_housing_test.csv',
 'mnist_test.csv',
 'mnist_train_small.csv']

In [None]:
os.makedirs('mydir')
os.listdir(os.curdir)

['anscombe.json',
 '.ipynb_checkpoints',
 'README.md.old',
 'mydir',
 '1.tmp',
 '2.tmp',
 'california_housing_train.csv',
 'california_housing_test.csv',
 'mnist_test.csv',
 'mnist_train_small.csv']

In [None]:
os.path.isdir('mydir')

True

In [None]:
os.rmdir('mydir')
os.listdir(os.curdir)

['anscombe.json',
 '.ipynb_checkpoints',
 'README.md.old',
 '1.tmp',
 '2.tmp',
 'california_housing_train.csv',
 'california_housing_test.csv',
 'mnist_test.csv',
 'mnist_train_small.csv']

## 12.4.1 More filesystem operations with pathlib


In [None]:
new_path = cur_path.joinpath('/content', 'sample_data')
list(new_path.iterdir())

[PosixPath('/content/sample_data/anscombe.json'),
 PosixPath('/content/sample_data/.ipynb_checkpoints'),
 PosixPath('/content/sample_data/README.md.old'),
 PosixPath('/content/sample_data/1.tmp'),
 PosixPath('/content/sample_data/2.tmp'),
 PosixPath('/content/sample_data/california_housing_train.csv'),
 PosixPath('/content/sample_data/california_housing_test.csv'),
 PosixPath('/content/sample_data/mnist_test.csv'),
 PosixPath('/content/sample_data/mnist_train_small.csv')]

In [None]:
list(cur_path.glob("*"))

[PosixPath('anscombe.json'),
 PosixPath('.ipynb_checkpoints'),
 PosixPath('README.md.old'),
 PosixPath('1.tmp'),
 PosixPath('2.tmp'),
 PosixPath('california_housing_train.csv'),
 PosixPath('california_housing_test.csv'),
 PosixPath('mnist_test.csv'),
 PosixPath('mnist_train_small.csv')]

In [None]:
list(cur_path.glob("*json"))

[PosixPath('anscombe.json')]

In [None]:
list(cur_path.glob("?.tmp"))

[PosixPath('1.tmp'), PosixPath('2.tmp')]

In [None]:
list(cur_path.glob("[0-9].tmp"))

[PosixPath('1.tmp'), PosixPath('2.tmp')]

In [None]:
old_path = Path('README.md.old')
new_path = Path('README.md')
old_path.rename(new_path)
list(cur_path.iterdir())

[PosixPath('anscombe.json'),
 PosixPath('README.md'),
 PosixPath('.ipynb_checkpoints'),
 PosixPath('1.tmp'),
 PosixPath('2.tmp'),
 PosixPath('california_housing_train.csv'),
 PosixPath('california_housing_test.csv'),
 PosixPath('mnist_test.csv'),
 PosixPath('mnist_train_small.csv')]

In [None]:
new_path = Path('1.tmp')
new_path.unlink()
list(cur_path.iterdir())

[PosixPath('anscombe.json'),
 PosixPath('README.md'),
 PosixPath('.ipynb_checkpoints'),
 PosixPath('2.tmp'),
 PosixPath('california_housing_train.csv'),
 PosixPath('california_housing_test.csv'),
 PosixPath('mnist_test.csv'),
 PosixPath('mnist_train_small.csv')]

In [None]:
new_path = Path ('mydir')
new_path.mkdir(parents=True)
list(cur_path.iterdir())

[PosixPath('anscombe.json'),
 PosixPath('README.md'),
 PosixPath('.ipynb_checkpoints'),
 PosixPath('mydir'),
 PosixPath('2.tmp'),
 PosixPath('california_housing_train.csv'),
 PosixPath('california_housing_test.csv'),
 PosixPath('mnist_test.csv'),
 PosixPath('mnist_train_small.csv')]

In [None]:
new_path.is_dir()

True

In [None]:
new_path = Path('mydir')
new_path.rmdir()
list(cur_path.iterdir())

[PosixPath('anscombe.json'),
 PosixPath('README.md'),
 PosixPath('.ipynb_checkpoints'),
 PosixPath('2.tmp'),
 PosixPath('california_housing_train.csv'),
 PosixPath('california_housing_test.csv'),
 PosixPath('mnist_test.csv'),
 PosixPath('mnist_train_small.csv')]

In [None]:
# create directory structure for below

In [1]:
import os
for root, dirs, files in os.walk(os.curdir):
    print("{0} has {1} files".format(root, len(files)))
    if ".git" in dirs:                          #A
        print(dirs)
        #dirs.remove(".git")        #B

. has 0 files
./.config has 7 files
./.config/configurations has 1 files
./.config/logs has 0 files
./.config/logs/2024.05.08 has 6 files
./sample_data has 6 files


# 12.6 Lab 12: More file operations

In [None]:
import pathlib
cur_path = pathlib.Path(".")

size = 0
for text_path in cur_path.glob("*.txt"):
    if not text_path.is_symlink():
        size += text_path.stat().st_size

print(size)
