<a href="https://colab.research.google.com/github/nceder/qpb4e/blob/main/code/Chapter%2012/Chapter_12.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Using the filesystem

## 12.2.2 The current working directory

In [1]:
import os
os.getcwd()

'/content'

In [2]:
os.listdir(os.curdir)

['.config', 'sample_data']

In [3]:
os.chdir('sample_data')    #A
os.getcwd()

'/content/sample_data'

## 12.2.3 Accessing directories with pathlib

In [4]:
import pathlib
cur_path = pathlib.Path()
cur_path.cwd()

PosixPath('/content/sample_data')

## 12.2.4 Manipulating pathnames

In [5]:
import os
print(os.path.join('bin', 'utils', 'disktools'))

bin/utils/disktools


In [6]:
path1 = os.path.join('mydir', 'bin');
path2 = os.path.join('utils', 'disktools', 'chkdisk')
print(os.path.join(path1, path2))

mydir/bin/utils/disktools/chkdisk


In [7]:
import os
print(os.path.split(os.path.join('some', 'directory', 'path')))

('some/directory', 'path')


In [8]:
import os
os.path.basename(os.path.join('some', 'directory', 'path.jpg'))

'path.jpg'

In [9]:
os.path.dirname(os.path.join('some', 'directory', 'path.jpg'))

'some/directory'

In [10]:
os.path.splitext(os.path.join('some', 'directory', 'path.jpg'))

('some/directory/path', '.jpg')

In [11]:
import os
os.path.expandvars('$HOME\\temp')

'/root\\temp'

## 12.2.5 Manipulating pathnames with pathlib

In [12]:
from pathlib import Path
cur_path = Path()
print(cur_path.joinpath('bin', 'utils', 'disktools'))

bin/utils/disktools


In [13]:
cur_path / 'bin' / 'utils' / 'disktools'

PosixPath('bin/utils/disktools')

In [14]:
cur_path = Path()
print(cur_path.joinpath('bin', 'utils', 'disktools'))

bin/utils/disktools


In [15]:
a_path = Path('bin/utils/disktools')
print(a_path.parts)

('bin', 'utils', 'disktools')


In [16]:
a_path = Path('some', 'directory', 'path.jpg')
a_path.name

'path.jpg'

In [17]:
print(a_path.parent)

some/directory


In [18]:
a_path.suffix

'.jpg'

##12.2.6 Useful constants and functions

In [19]:
import os
os.name

'posix'

# 12.3 Getting information about files

In [20]:
import os
os.path.exists('/content/sample_data/')

True

In [21]:
os.path.exists('/content/sample_data/README.md')

True

In [22]:
os.path.exists('/content/sample_data/ljsljkflkjs')

False

In [23]:
os.path.isdir('/content/sample_data/')

True

In [24]:
os.path.isdir('/content/sample_data/README.md')

False

In [25]:

os.path.isfile('/content/sample_data/README.md')

True

## 12.3.1 Getting information about files with scandir

In [26]:
with os.scandir("..") as my_dir:
    for entry in my_dir:
        print(entry.name, entry.is_file())


.config False
sample_data False


# 12.4 More filesystem operations

In [27]:
os.chdir('/content/sample_data')
os.listdir(os.curdir)

['anscombe.json',
 'README.md',
 'mnist_test.csv',
 'mnist_train_small.csv',
 'california_housing_train.csv',
 'california_housing_test.csv']

In [28]:
import glob
glob.glob("*")

['anscombe.json',
 'README.md',
 'mnist_test.csv',
 'mnist_train_small.csv',
 'california_housing_train.csv',
 'california_housing_test.csv']

In [29]:
glob.glob("*json")

['anscombe.json']

In [30]:
! touch a.tmp 1.tmp 2.tmp

In [31]:
glob.glob("?.tmp")

['a.tmp', '2.tmp', '1.tmp']

In [32]:
glob.glob("[0-9].tmp")

['2.tmp', '1.tmp']

In [33]:
os.rename('README.md', 'README.md.old')
os.listdir(os.curdir)

['anscombe.json',
 'README.md.old',
 'a.tmp',
 '2.tmp',
 '1.tmp',
 'mnist_test.csv',
 'mnist_train_small.csv',
 'california_housing_train.csv',
 'california_housing_test.csv']

In [34]:
os.remove('a.tmp')
os.listdir(os.curdir)

['anscombe.json',
 'README.md.old',
 '2.tmp',
 '1.tmp',
 'mnist_test.csv',
 'mnist_train_small.csv',
 'california_housing_train.csv',
 'california_housing_test.csv']

In [35]:
os.makedirs('mydir')
os.listdir(os.curdir)

['anscombe.json',
 'README.md.old',
 'mydir',
 '2.tmp',
 '1.tmp',
 'mnist_test.csv',
 'mnist_train_small.csv',
 'california_housing_train.csv',
 'california_housing_test.csv']

In [36]:
os.path.isdir('mydir')

True

In [37]:
os.rmdir('mydir')
os.listdir(os.curdir)

['anscombe.json',
 'README.md.old',
 '2.tmp',
 '1.tmp',
 'mnist_test.csv',
 'mnist_train_small.csv',
 'california_housing_train.csv',
 'california_housing_test.csv']

## 12.4.1 More filesystem operations with pathlib


In [38]:
new_path = cur_path.joinpath('/content', 'sample_data')
list(new_path.iterdir())

[PosixPath('/content/sample_data/anscombe.json'),
 PosixPath('/content/sample_data/README.md.old'),
 PosixPath('/content/sample_data/2.tmp'),
 PosixPath('/content/sample_data/1.tmp'),
 PosixPath('/content/sample_data/mnist_test.csv'),
 PosixPath('/content/sample_data/mnist_train_small.csv'),
 PosixPath('/content/sample_data/california_housing_train.csv'),
 PosixPath('/content/sample_data/california_housing_test.csv')]

In [39]:
list(cur_path.glob("*"))

[PosixPath('anscombe.json'),
 PosixPath('README.md.old'),
 PosixPath('2.tmp'),
 PosixPath('1.tmp'),
 PosixPath('mnist_test.csv'),
 PosixPath('mnist_train_small.csv'),
 PosixPath('california_housing_train.csv'),
 PosixPath('california_housing_test.csv')]

In [40]:
list(cur_path.glob("*json"))

[PosixPath('anscombe.json')]

In [41]:
list(cur_path.glob("?.tmp"))

[PosixPath('2.tmp'), PosixPath('1.tmp')]

In [42]:
list(cur_path.glob("[0-9].tmp"))

[PosixPath('2.tmp'), PosixPath('1.tmp')]

In [43]:
old_path = Path('README.md.old')
new_path = Path('README.md')
old_path.rename(new_path)
list(cur_path.iterdir())

[PosixPath('anscombe.json'),
 PosixPath('README.md'),
 PosixPath('2.tmp'),
 PosixPath('1.tmp'),
 PosixPath('mnist_test.csv'),
 PosixPath('mnist_train_small.csv'),
 PosixPath('california_housing_train.csv'),
 PosixPath('california_housing_test.csv')]

In [44]:
new_path = Path('1.tmp')
new_path.unlink()
list(cur_path.iterdir())

[PosixPath('anscombe.json'),
 PosixPath('README.md'),
 PosixPath('2.tmp'),
 PosixPath('mnist_test.csv'),
 PosixPath('mnist_train_small.csv'),
 PosixPath('california_housing_train.csv'),
 PosixPath('california_housing_test.csv')]

In [45]:
new_path = Path ('mydir')
new_path.mkdir(parents=True)
list(cur_path.iterdir())

[PosixPath('anscombe.json'),
 PosixPath('README.md'),
 PosixPath('mydir'),
 PosixPath('2.tmp'),
 PosixPath('mnist_test.csv'),
 PosixPath('mnist_train_small.csv'),
 PosixPath('california_housing_train.csv'),
 PosixPath('california_housing_test.csv')]

In [46]:
new_path.is_dir()

True

In [47]:
new_path = Path('mydir')
new_path.rmdir()
list(cur_path.iterdir())

[PosixPath('anscombe.json'),
 PosixPath('README.md'),
 PosixPath('2.tmp'),
 PosixPath('mnist_test.csv'),
 PosixPath('mnist_train_small.csv'),
 PosixPath('california_housing_train.csv'),
 PosixPath('california_housing_test.csv')]

# 12.6 Lab 12: More file operations

---



How might you calculate the total size of all files ending with .test that aren't symlinks in a directory? If your first answer was using `os` and `os.path`, also try it with `pathlib`, and vice versa.

Write some code that builds off your solution, and moves the .test files detected above to a new subdirectory in the same directory called 'backup'.

## 12.6.2 Solutions
### Human (author) solutions

In [49]:
! cp california_housing_test.csv california_housing_test.csv.test
! cp california_housing_train.csv california_housing_train.csv.test
! mkdir subdir
! cp mnist_test.csv subdir/mnist_test.csv.test
! mkdir subdir/subdir2
! cp mnist_train_small.csv subdir/subdir2/mnist_train_small.csv.test

In [50]:
# Human (author) solution - pathlib library

import pathlib
cur_path = pathlib.Path(".")

size = 0
for text_path in cur_path.rglob("*.test"):
    if not text_path.is_symlink():
        size += text_path.stat().st_size

print(size)

56820894


#### Move files to backup directory

In [51]:
# Human (author) solution - pathlib library

import pathlib
cur_path = pathlib.Path(".")
new_path = pathlib.Path("backup")
new_path.mkdir(exist_ok=True)   #A

size = 0
for text_path in cur_path.rglob("*.test"):
    if not text_path.is_symlink():
        size += text_path.stat().st_size
        text_path.rename(new_path / text_path.name)   #B

print(size)

58828465


#### Refactor to use `os` library

In [52]:
! cp sample_data/california_housing_test.csv california_housing_test.csv.test
! cp california_housing_train.csv california_housing_train.csv.test
! cp mnist_test.csv subdir/mnist_test.csv.test
! cp mnist_train_small.csv subdir/subdir2/mnist_train_small.csv.test
! rm backup/*.test

cp: cannot stat 'sample_data/california_housing_test.csv': No such file or directory


In [53]:
# Human (author) solution - os library

import os
cur_path = "."

size = 0
for root, dirs, files in os.walk(os.curdir):   #A
    for file in files:      #B
        test_path = os.path.join(root, file)

        if (not os.path.islink(test_path) and
            os.path.splitext(test_path)[-1] == '.test'):  #C
            size += os.path.getsize(test_path)   #D
print(f"{size}")


56519753


In [57]:
# Human (author) solution - os library

import os

cur_path = "."
new_path = os.path.join(cur_path,"backup")

# create backup directory
os.makedirs(new_path, exist_ok=True)   #A
size = 0
for root, dirs, files in os.walk(cur_path,"sample_data"):
    for file in files:
        test_path = os.path.join(root, file)

        if (not os.path.islink(test_path)
          and os.path.splitext(test_path)[-1] == ".test"):
            size += os.path.getsize(test_path)
            os.rename(test_path, os.path.join(new_path,
                       os.path.basename(test_path)))   #B
print(f"{size}")

58828465


### Copilot solutions

In [58]:
! cp california_housing_test.csv california_housing_test.csv.test
! cp california_housing_train.csv california_housing_train.csv.test
! cp mnist_test.csv subdir/mnist_test.csv.test
! cp mnist_train_small.csv subdir/subdir2/mnist_train_small.csv.test
! rm backup/*.test

In [59]:
# Generated by Copilot - os library
# txt_file_size.py

import os

size = 0
for root, dirs, files in os.walk(os.curdir):
    for file in files:
        file_path = os.path.join(root, file)
        if not os.path.islink(file_path) and os.path.splitext(file)[1] == '.test':
            size += os.path.getsize(file_path)

print(f"Total size of all .test files that are not symlinks: {size} bytes")

Total size of all .test files that are not symlinks: 56820894 bytes


#### Move to backup directory

In [73]:
# Generated by Copilot - os library
# txt_file_size_and_move.py

import os

import shutil

size = 0
os.makedirs('backup', exist_ok=True)
for root, dirs, files in os.walk(os.curdir):
    for file in files:
        file_path = os.path.join(root, file)
        if (not os.path.islink(file_path)
                and os.path.splitext(file)[1] == '.test'
                and "backup" not in file_path):
            print(file_path)
            size += os.path.getsize(file_path)
            shutil.move(file_path, 'backup')

print(f"Total size of all .test files that are not symlinks: {size} bytes")

./subdir/mnist_test.csv.test
./subdir/subdir2/mnist_train_small.csv.test
Total size of all .test files that are not symlinks: 54813323 bytes


#### Refactor with pathlib libarary

In [61]:
! cp california_housing_test.csv california_housing_test.csv.test
! cp california_housing_train.csv california_housing_train.csv.test
! cp mnist_test.csv subdir/mnist_test.csv.test
! cp mnist_train_small.csv subdir/subdir2/mnist_train_small.csv.test
! rm backup/*.test

In [65]:
# Generated by Copilot - pathlib library
# txt_file_size_and_move.py

from pathlib import Path

size = 0
backup_dir = Path('backup')
backup_dir.mkdir(exist_ok=True)

for file in Path().rglob('*'):
    if not file.is_symlink() and file.suffix == '.test':
        size += file.stat().st_size
        file.rename(backup_dir / file.name)

print(f"Total size of all .test files that are not symlinks: {size} bytes")


Total size of all .test files that are not symlinks: 58828465 bytes


### Colaboratory solutions

In [66]:
! cp california_housing_test.csv california_housing_test.csv.test
! cp california_housing_train.csv california_housing_train.csv.test
! cp mnist_test.csv subdir/mnist_test.csv.test
! cp mnist_train_small.csv subdir/subdir2/mnist_train_small.csv.test
! rm backup/*.test

In [67]:
# Generated by Colaboratory - os libary
# prompt: Write a Python script to calculate the total size of all files with a
#         .test extension that are not symlinks in the current directory and subdirectories.

import os

total_size = 0

for root, _, files in os.walk('.'):
    for file in files:
        if file.endswith('.test') and not os.path.islink(os.path.join(root, file)):
            total_size += os.path.getsize(os.path.join(root, file))

print(f"Total size of .test files: {total_size} bytes")


Total size of .test files: 56820894 bytes


#### Move to backup directory

In [68]:
# Generated by Colaboratory - os libary
# prompt: Modify that script in the previous cell to also move the files with a
#         .test extension to a subdirectory of the current directory called backup

import os

total_size = 0
backup_dir = os.path.join(os.getcwd(), 'backup')

# Create the backup directory if it doesn't exist
if not os.path.exists(backup_dir):
    os.makedirs(backup_dir)

for root, _, files in os.walk('.'):
    for file in files:
        if file.endswith('.test') and not os.path.islink(os.path.join(root, file)):
            total_size += os.path.getsize(os.path.join(root, file))
            # Move the file to the backup directory
            os.rename(os.path.join(root, file), os.path.join(backup_dir, file))

print(f"Total size of .test files: {total_size} bytes")


Total size of .test files: 58828465 bytes


#### Refactor with pathlib libarary

In [69]:
! cp california_housing_test.csv california_housing_test.csv.test
! cp california_housing_train.csv california_housing_train.csv.test
! cp mnist_test.csv subdir/mnist_test.csv.test
! cp mnist_train_small.csv subdir/subdir2/mnist_train_small.csv.test
! rm backup/*.test

In [70]:
# this checks to see if the version of Python can run Path.walk()
import sys
from pathlib import Path

if sys.version_info.minor < 12:
  raise RuntimeError(f"Python version must be >= 3.12 to run Path.walk(), current version is {sys.version}")

# Generated by Colaboratory - pathlib libary
# prompt: Rewrite the script in the previous cell to use pathlib instead of os
#         and os.path. Do not use os or os.path modules

total_size = 0
backup_dir = Path.cwd().joinpath('backup')

# Create the backup directory if it doesn't exist
if not backup_dir.exists():
    backup_dir.mkdir(parents=True)

for root, _, files in Path('.').walk():
    for file in files:
        if file.endswith('.test') and not file.is_symlink():
            total_size += file.stat().st_size
            # Move the file to the backup directory
            file.rename(backup_dir.joinpath(file))

print(f"Total size of .test files: {total_size} bytes")

RuntimeError: Python version must be >= 3.12 to run Path.walk(), current version is 3.10.12 (main, Mar 22 2024, 16:50:05) [GCC 11.4.0]