# pathlib module 

In [1]:
import os
import sys
import pathlib

In [2]:
print(dir(pathlib))



## Working with Directories

In [3]:
print(f"Current directory: {pathlib.Path.cwd()}")
print(f"Home directory   : {pathlib.Path.home()}")

Current directory: /workspaces/PythonForDataEngineeringApril2023/10_Modules/04b_pathlib
Home directory   : /home/codespace


In [4]:
pathlib.Path.home()

PosixPath('/home/codespace')

### List Directories

In [7]:
path = pathlib.Path.cwd()
dirs = [e for e in path.iterdir() if e.is_dir()]
print(dirs)

[PosixPath('/workspaces/PythonForDataEngineeringApril2023/10_Modules/04b_pathlib/sampefolder')]


In [8]:
files = [e for e in path.iterdir() if e.is_file()]
print(files)

[PosixPath('/workspaces/PythonForDataEngineeringApril2023/10_Modules/04b_pathlib/samplefile.txt'), PosixPath('/workspaces/PythonForDataEngineeringApril2023/10_Modules/04b_pathlib/a_pathlib.ipynb')]


### Change directory

In [9]:
path = pathlib.Path("..")
print(f"Current directory: {pathlib.Path.cwd()}")

Current directory: /workspaces/PythonForDataEngineeringApril2023/10_Modules/04b_pathlib


In [10]:
os.chdir(path)
print(f"Current directory: {pathlib.Path.cwd()}")

Current directory: /workspaces/PythonForDataEngineeringApril2023/10_Modules


In [12]:
os.chdir(r"/workspaces/PythonForDataEngineeringApril2023/")
print(f"Current directory: {pathlib.Path.cwd()}")

Current directory: /workspaces/PythonForDataEngineeringApril2023


## Necessity of pathlib

Method 1

In [13]:
outpath = os.path.join(os.getcwd(), "output")
outpath_file = os.path.join(outpath, "out.xlsx")
print(outpath_file)

/workspaces/PythonForDataEngineeringApril2023/output/out.xlsx


Method 2

In [14]:
outpath_file = os.path.join(os.path.join(os.getcwd(), "output"), "out.xlsx")
print(outpath_file)

/workspaces/PythonForDataEngineeringApril2023/output/out.xlsx


Method 3

In [15]:
outpath_file = pathlib.Path.cwd() / "output" / "output.xlsx"
print(outpath_file)

/workspaces/PythonForDataEngineeringApril2023/output/output.xlsx


## Working with paths

### Constructing the paths

In [16]:
wave = pathlib.Path("ocean", "wave.txt")
print(wave)  # ocean\wave.txt

ocean/wave.txt


In [17]:
home = pathlib.Path.home()
wave_absolute = pathlib.Path(home, "ocean", "wave.txt")
print(home)  # C:\Users\Amma
print(wave_absolute)  # C:\Users\Amma\ocean\wave.txt

/home/codespace
/home/codespace/ocean/wave.txt


In [18]:
shark = pathlib.Path(
    pathlib.Path.home(), "ocean", "animals", pathlib.Path("fish", "shark.txt")
)
print(shark)  # C:\Users\Amma\ocean\animals\fish\shark.txt

/home/codespace/ocean/animals/fish/shark.txt


### Computing Relative Paths

In [19]:
shark = pathlib.Path("ocean", "animals", "fish", "shark.txt")
below_ocean = shark.relative_to(pathlib.Path("ocean"))
below_animals = shark.relative_to(pathlib.Path("ocean", "animals"))
print(shark)  # ocean\animals\fish\shark.txt
print(below_ocean)  # animals\fish\shark.txt
print(below_animals)  # fish\shark.txt

ocean/animals/fish/shark.txt
animals/fish/shark.txt
fish/shark.txt


In [20]:
shark.parent

PosixPath('ocean/animals/fish')

In [21]:
shark.parent.parent

PosixPath('ocean/animals')

In [22]:
shark.parent.parent.parent

PosixPath('ocean')

In [23]:
if sys.platform == "win32":
    print(pathlib.PureWindowsPath("foo/bar"))
else:
    print(pathlib.PosixPath("foo/bar"))

foo/bar


In [24]:
pathlib.PurePosixPath("foo/bar")

PurePosixPath('foo/bar')

In [25]:
pathlib.PurePath("foo/bar")

PurePosixPath('foo/bar')

In [26]:
path_obj = pathlib.PurePath("/usr /bin /python")
print(f"{path_obj =}")

path_obj =PurePosixPath('/usr /bin /python')


In [27]:
print(dir(path_obj))

['__bytes__', '__class__', '__class_getitem__', '__delattr__', '__dir__', '__doc__', '__eq__', '__format__', '__fspath__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__rtruediv__', '__setattr__', '__sizeof__', '__slots__', '__str__', '__subclasshook__', '__truediv__', '_cached_cparts', '_cparts', '_drv', '_flavour', '_format_parsed_parts', '_from_parsed_parts', '_from_parts', '_hash', '_make_child', '_parse_args', '_parts', '_pparts', '_root', '_str', 'anchor', 'as_posix', 'as_uri', 'drive', 'is_absolute', 'is_relative_to', 'is_reserved', 'joinpath', 'match', 'name', 'parent', 'parents', 'parts', 'relative_to', 'root', 'stem', 'suffix', 'suffixes', 'with_name', 'with_stem', 'with_suffix']


In [28]:
print(f"""{path_obj.is_absolute() =}""")

path_obj.is_absolute() =True


In [29]:
pathlib.PurePath("C:\Windows\Logs\WindowsUpdate").is_absolute()

False

In [30]:
pathlib.PurePath("C:\Windows\Logs\WindowsUpdate").as_uri()

ValueError: relative path can't be expressed as a file URI

In [31]:
try:
    pathlib.PurePath("\Logs\WindowsUpdate").as_uri()
except ValueError as ex:
    print(ex)

relative path can't be expressed as a file URI


In [32]:
pathlib.WindowsPath("C:\Windows\Logs\WindowsUpdate")

NotImplementedError: cannot instantiate 'WindowsPath' on your system

In [33]:
path_obj = pathlib.WindowsPath("C:\Windows\Logs\WindowsUpdate")
print(dir(path_obj))

NotImplementedError: cannot instantiate 'WindowsPath' on your system

In [34]:
print(
    f"""
{path_obj.is_dir()   =}
{path_obj.is_file()  =}
{path_obj.is_fifo()  =}

{path_obj.exists()   =}
{path_obj.drive      =}
{path_obj.cwd()      =}
"""
)

AttributeError: 'PurePosixPath' object has no attribute 'is_dir'

In [35]:
print(sys.executable)

/workspaces/PythonForDataEngineeringApril2023/.venv/bin/python


```
    .name: the file name without any directory
    .parent: the directory containing the file, or the parent directory if path is a directory
    .stem: the file name without the suffix
    .suffix: the file extension
    .anchor: the part of the path before the directories
```

In [37]:
path_obj = pathlib.PosixPath(sys.executable)
print(
    f""" 
{path_obj.drive =}
{path_obj.parts =}

{path_obj.name          =}
{path_obj.parent        =}
{path_obj.parent.parent =}
{path_obj.stem          =}
{path_obj.suffix        =}
{path_obj.anchor        =}
"""
)

 
path_obj.drive =''
path_obj.parts =('/', 'workspaces', 'PythonForDataEngineeringApril2023', '.venv', 'bin', 'python')

path_obj.name          ='python'
path_obj.parent        =PosixPath('/workspaces/PythonForDataEngineeringApril2023/.venv/bin')
path_obj.parent.parent =PosixPath('/workspaces/PythonForDataEngineeringApril2023/.venv')
path_obj.stem          ='python'
path_obj.suffix        =''
path_obj.anchor        ='/'



In [38]:
new_path = path_obj.parent.parent / ("new" + path.suffix)

new_path

PosixPath('/workspaces/PythonForDataEngineeringApril2023/.venv/new')

In [35]:
pathlib.WindowsPath("/python").joinpath("edited_version")

WindowsPath('/python/edited_version')

In [36]:
pathlib.PurePosixPath("/python").joinpath("edited_version")

PurePosixPath('/python/edited_version')

In [37]:
pathlib.Path.home().joinpath("python", "scripts", "test.py")

WindowsPath('C:/Users/Amma/python/scripts/test.py')

## Working with directories

In [38]:
path_obj = pathlib.Path("first/second/__third__")
print(f"{path_obj.is_dir() =}")

path_obj.is_dir() =False


In [39]:
path_obj.resolve()

WindowsPath('first/second/__third__')

In [40]:
os.listdir()

['.ipynb_checkpoints', 'Pathlib-Cheatsheet.pdf', 'pathlib_module.ipynb']

In [41]:
pathlib.Path("first/second/__third__").mkdir(parents=True)

In [42]:
try:
    pathlib.Path("first/second/__third__").mkdir(parents=True)
except FileExistsError as ex:
    print(ex)

[WinError 183] Cannot create a file when that file already exists: 'first\\second\\__third__'


In [43]:
pathlib.Path("first/second/__third__").mkdir(parents=True, exist_ok=True)

In [44]:
os.makedirs("first/second/__third__", exist_ok=True)

In [45]:
os.listdir()

['.ipynb_checkpoints',
 'first',
 'Pathlib-Cheatsheet.pdf',
 'pathlib_module.ipynb']

In [46]:
pathlib.Path("first").rename(".first_one")

WindowsPath('.first_one')

In [47]:
os.rename(".first_one", ".first_other")

In [48]:
os.listdir()

['.first_other',
 '.ipynb_checkpoints',
 'Pathlib-Cheatsheet.pdf',
 'pathlib_module.ipynb']

## Write and Read Files

creating empty file

In [49]:
path = pathlib.Path("file.txt")
path.touch()
print(path)

file.txt


In [50]:
os.listdir()

['.first_other',
 '.ipynb_checkpoints',
 'file.txt',
 'Pathlib-Cheatsheet.pdf',
 'pathlib_module.ipynb']

In [51]:
path.rename("file2.txt")

WindowsPath('file2.txt')

In [52]:
os.listdir()

['.first_other',
 '.ipynb_checkpoints',
 'file2.txt',
 'Pathlib-Cheatsheet.pdf',
 'pathlib_module.ipynb']

In [53]:
p = pathlib.Path(".first_other/sample_text_file.txt")
p.write_text("Sample to write data to a file")

30

In [54]:
path = pathlib.Path.cwd() / ".first_other" / "sample_text_file.txt"
path.read_text()

'Sample to write data to a file'

In [55]:
os.listdir()

['.first_other',
 '.ipynb_checkpoints',
 'file2.txt',
 'Pathlib-Cheatsheet.pdf',
 'pathlib_module.ipynb']

In [56]:
path

WindowsPath('D:/MEGAsync/Python-related/PythonMaterial/python3/10_Modules/pathlib_module/.first_other/sample_text_file.txt')

In [57]:
path.with_suffix(".md")

WindowsPath('D:/MEGAsync/Python-related/PythonMaterial/python3/10_Modules/pathlib_module/.first_other/sample_text_file.md')

In [58]:
path.replace(path.with_suffix(".md"))

WindowsPath('D:/MEGAsync/Python-related/PythonMaterial/python3/10_Modules/pathlib_module/.first_other/sample_text_file.md')

In [59]:
os.listdir(".first_other")

['sample_text_file.md', 'second']

### Display a Directory Tree

In [60]:
def tree(directory):
    print(f"+ {directory}")
    for path in sorted(directory.rglob("*")):
        depth = len(path.relative_to(directory).parts)
        spacer = "    " * depth
        print(f"{spacer}+ {path.name}")

In [61]:
tree(pathlib.Path.cwd())

+ D:\MEGAsync\Python-related\PythonMaterial\python3\10_Modules\pathlib_module
    + .first_other
        + sample_text_file.md
        + second
            + __third__
    + .ipynb_checkpoints
        + pathlib_module-checkpoint.ipynb
    + file2.txt
    + Pathlib-Cheatsheet.pdf
    + pathlib_module.ipynb


### Deleting Directories and Files

In [62]:
pathlib.Path("file2.txt").unlink()

In [63]:
os.chdir(".first_other")

In [64]:
os.listdir()

['sample_text_file.md', 'second']

In [65]:
pathlib.Path("sample_text_file.md").unlink()

In [66]:
os.listdir()

['second']

In [67]:
try:
    pathlib.Path("second").rmdir()
except OSError as ex:
    print(ex)

[WinError 145] The directory is not empty: 'second'


In [68]:
import shutil

shutil.rmtree("second")

In [69]:
os.chdir("..")
os.listdir()

['.first_other',
 '.ipynb_checkpoints',
 'Pathlib-Cheatsheet.pdf',
 'pathlib_module.ipynb']

In [70]:
tree(pathlib.Path(".first_other"))

+ .first_other


In [71]:
pathlib.Path(".first_other").rmdir()

In [72]:
os.listdir()

['.ipynb_checkpoints', 'Pathlib-Cheatsheet.pdf', 'pathlib_module.ipynb']

### Pretty Table

In [73]:
!pip install -U prettytable --user

Requirement already up-to-date: prettytable in c:\users\amma\appdata\roaming\python\python38\site-packages (0.7.2)


In [74]:
pathlib.Path(sys.executable).parent

WindowsPath('C:/Users/Amma/AppData/Local/Programs/Python/Python38')

In [75]:
#!/usr/bin/env python

from pathlib import Path
import datetime
from prettytable import PrettyTable
import sys

path = pathlib.Path(sys.executable).parent

pt = PrettyTable()
pt.field_names = ["File name", "Size", "Created"]

pt.align["File name"] = "l"
pt.align["Size"] = "r"
pt.align["Created"] = "l"

for e in path.glob("**/*.txt"):
    created = datetime.datetime.fromtimestamp(e.stat().st_ctime)
    size = e.stat().st_size
    pt.add_row([e.name, size, f"{created:%Y-%m-%d}"])

print(pt)

+-----------------------------------------------------------+--------+------------+
| File name                                                 |   Size | Created    |
+-----------------------------------------------------------+--------+------------+
| LICENSE.txt                                               |  31453 | 2020-05-13 |
| NEWS.txt                                                  | 905782 | 2020-05-13 |
| CREDITS.txt                                               |   1903 | 2020-05-13 |
| extend.txt                                                |   3725 | 2020-05-13 |
| HISTORY.txt                                               |  10608 | 2020-05-13 |
| NEWS.txt                                                  |  49286 | 2020-05-13 |
| NEWS2x.txt                                                |  27832 | 2020-05-13 |
| README.txt                                                |   9850 | 2020-05-13 |
| TODO.txt                                                  |   8688 | 2020-

### Counting files by extension

In [77]:
import collections

files = [path.suffix for path in path.iterdir() if path.is_file() and path.suffix]
data = collections.Counter(files)

print(data)

for key, val in data.items():
    print(f"{key}: {val}")

Counter({'.dll': 4, '.txt': 2, '.exe': 2})
.txt: 2
.exe: 2
.dll: 4
