In [130]:
import os

from pprint import pprint
from pathlib import Path

## `joinpath`

In [17]:
my_files = ['accounts.txt', 'test.csv', 'bignumbers.csv']

for file in my_files:
    print(Path.joinpath(Path('data/test'), file))

data/test/accounts.txt
data/test/test.csv
data/test/bignumbers.csv


In [20]:
dirs = ['home', 'thomas', 'Documents']

print(os.path.join('home', 'thomas', 'Documents', 'test.txt'))

home/thomas/Documents/test.txt


In [23]:
for file in my_files:
    print(os.path.join('Documents', file))

Documents/accounts.txt
Documents/test.csv
Documents/bignumbers.csv


## `cwd`

In [36]:
str(Path.cwd())

'/Users/thomas/Coding/data-librarian/2023-2024_Data_Librarian_Thomas_Schmidt/Modul_2'

In [35]:
Path.joinpath(Path.cwd(), 'test.txt')

PosixPath('/Users/thomas/Coding/data-librarian/2023-2024_Data_Librarian_Thomas_Schmidt/Modul_2/test.txt')

## `mkdir`

In [40]:
Path('./docs/test/new_dir').mkdir(parents=True, exist_ok=True)

## Paths: `absolute` and `relative`

In [64]:
dir1 = Path.cwd()
dir2 = Path('docs')

In [65]:
Path.is_absolute(dir1)

True

In [66]:
Path.is_absolute(dir2)

False

In [67]:
dir2

PosixPath('docs')

In [68]:
dir2.absolute()

PosixPath('/Users/thomas/Coding/data-librarian/2023-2024_Data_Librarian_Thomas_Schmidt/Modul_2/docs')

In [69]:
dir2.resolve()

PosixPath('/Users/thomas/Coding/data-librarian/2023-2024_Data_Librarian_Thomas_Schmidt/Modul_2/docs')

## Get `filenames` and `dirs`

In [82]:
filename = Path.joinpath(dir1, 'test.txt')
filename

PosixPath('/Users/thomas/Coding/data-librarian/2023-2024_Data_Librarian_Thomas_Schmidt/Modul_2/test.txt')

In [85]:
# filename
filename.name

'test.txt'

In [86]:
# parent dirs
filename.parent

PosixPath('/Users/thomas/Coding/data-librarian/2023-2024_Data_Librarian_Thomas_Schmidt/Modul_2')

## Dateigröße und Ordnerinhalte

In [87]:
filepath = Path('./data/lorem_ipsum.txt')

In [88]:
filepath.exists()

True

In [91]:
# Größe in Bytes
filepath.stat().st_size

861

In [97]:
# Get all files and dirs
list(dir1.glob('*'))

[PosixPath('/Users/thomas/Coding/data-librarian/2023-2024_Data_Librarian_Thomas_Schmidt/Modul_2/german_publications.json'),
 PosixPath('/Users/thomas/Coding/data-librarian/2023-2024_Data_Librarian_Thomas_Schmidt/Modul_2/.DS_Store'),
 PosixPath('/Users/thomas/Coding/data-librarian/2023-2024_Data_Librarian_Thomas_Schmidt/Modul_2/ZKDL2324_Modul2'),
 PosixPath('/Users/thomas/Coding/data-librarian/2023-2024_Data_Librarian_Thomas_Schmidt/Modul_2/.gitkeep'),
 PosixPath('/Users/thomas/Coding/data-librarian/2023-2024_Data_Librarian_Thomas_Schmidt/Modul_2/zbiw-data-librarian-module2-1_edits.ipynb'),
 PosixPath('/Users/thomas/Coding/data-librarian/2023-2024_Data_Librarian_Thomas_Schmidt/Modul_2/docs'),
 PosixPath('/Users/thomas/Coding/data-librarian/2023-2024_Data_Librarian_Thomas_Schmidt/Modul_2/Data Librarian Modul 2 Kennenlerntag.pdf'),
 PosixPath('/Users/thomas/Coding/data-librarian/2023-2024_Data_Librarian_Thomas_Schmidt/Modul_2/readme.md'),
 PosixPath('/Users/thomas/Coding/data-librarian/20

In [99]:
# Get all jupyter notebooks
notebooks = list(dir1.glob('*.ipynb'))
notebooks

[PosixPath('/Users/thomas/Coding/data-librarian/2023-2024_Data_Librarian_Thomas_Schmidt/Modul_2/zbiw-data-librarian-module2-1_edits.ipynb'),
 PosixPath('/Users/thomas/Coding/data-librarian/2023-2024_Data_Librarian_Thomas_Schmidt/Modul_2/pyterrier_intro.ipynb'),
 PosixPath('/Users/thomas/Coding/data-librarian/2023-2024_Data_Librarian_Thomas_Schmidt/Modul_2/zbiw-data-librarian-module2-1_aufgabenstellung.ipynb'),
 PosixPath('/Users/thomas/Coding/data-librarian/2023-2024_Data_Librarian_Thomas_Schmidt/Modul_2/zbiw-data-librarian-module2-1_pandas.ipynb'),
 PosixPath('/Users/thomas/Coding/data-librarian/2023-2024_Data_Librarian_Thomas_Schmidt/Modul_2/tut-write-read-files.ipynb')]

In [112]:
# Calculate size of all notebooks
size_total = 0

for n in notebooks:
    size_total += n.stat().st_size
    
print(f'Size of all notebooks: {size_total / 1000000:.2f} MB')

Size of all notebooks: 9.56 MB


In [114]:
# Calculate size of all files
size_total = 0

for file in Path('.').glob('*.*'):
    size_total += file.stat().st_size
    
print(f'Size of all files: {size_total / 1000000:.2f} MB')
print(f"File count: {len(list(Path('.').glob('*.*')))}")

Size of all files: 12.53 MB
File count: 12


## Gültigkeit von Pfaden

In [116]:
Path('data/description.md').exists()

True

In [117]:
Path('data/error.md').exists()

False

In [119]:
Path('data/description.md').is_file()

True

In [120]:
Path('./data/').is_dir()

True

In [121]:
Path('./fictional_dir/').is_dir()

False

## `open()`, `read()` and `write()`

### Dateien lesen

In [132]:
file = Path('./data/lorem_ipsum.txt')

with open(file, 'r', encoding='utf-8') as filein:
    data = filein.read()

print(data)

Distinctio soluta deleniti quidem maiores et sed voluptas. Qui est quis libero dolor. Illo omnis quibusdam molestias et quis nam repudiandae sint.

Impedit quod repellendus dolor. Earum est dolorem non voluptas aut. Cum corporis aut aspernatur consequatur vel aut sit qui. Veniam sit dolorem omnis amet facilis nobis dolor. Eum magni sunt quia libero. Fuga voluptatem et voluptatem aut veritatis possimus.

Illum et deleniti nisi ex autem eum. Eveniet cum porro enim vitae sint consectetur. Fugit aut fugit inventore omnis. Quidem sint quia reprehenderit quae molestiae quia. Id facere similique et.

Vero dignissimos aspernatur ex qui architecto. Ipsam repellendus sit ipsa. Nihil eligendi accusamus esse repudiandae debitis. Dolore eligendi aut enim dolorem.

Esse dicta hic id. Alias alias mollitia nihil porro voluptatem sit et. Maiores sint autem numquam.



In [137]:
with open(file, 'r', encoding='utf-8') as filein:
    data = filein.readlines()
    data = [line.strip() for line in data if line != '\n']

data

['Distinctio soluta deleniti quidem maiores et sed voluptas. Qui est quis libero dolor. Illo omnis quibusdam molestias et quis nam repudiandae sint.',
 'Impedit quod repellendus dolor. Earum est dolorem non voluptas aut. Cum corporis aut aspernatur consequatur vel aut sit qui. Veniam sit dolorem omnis amet facilis nobis dolor. Eum magni sunt quia libero. Fuga voluptatem et voluptatem aut veritatis possimus.',
 'Illum et deleniti nisi ex autem eum. Eveniet cum porro enim vitae sint consectetur. Fugit aut fugit inventore omnis. Quidem sint quia reprehenderit quae molestiae quia. Id facere similique et.',
 'Vero dignissimos aspernatur ex qui architecto. Ipsam repellendus sit ipsa. Nihil eligendi accusamus esse repudiandae debitis. Dolore eligendi aut enim dolorem.',
 'Esse dicta hic id. Alias alias mollitia nihil porro voluptatem sit et. Maiores sint autem numquam.']

### Dateien schreiben im Schreibmodus `r`

In [143]:
def print_filecontent(filepath):
    with open(filepath, 'r', encoding='utf-8') as filein:
        print(filein.read())

In [151]:
new_file = Path('./data/textfile.txt')

with open(new_file, 'w', encoding='utf-8') as fileout:
    fileout.write('Hello!')
    
print_filecontent(new_file)

Hello!


## Schreiben mit append `a`

In [152]:
# Append text to a file
with open(new_file, 'a', encoding='utf-8') as fileout:
    fileout.write('\nAnother hello!')
    
print_filecontent(new_file)

Hello!
Another hello!


## `shelve`