# Files, FileSystem
- general, basics
    - open (builtin)
    - pathlib (oop module to handle a filesystem)
    - os.path (oldschool module to handle a filesystem)
    - shutil (copy, delete (recursive))
- specific or high level
    - json
    - csv
    - lxml, BeautifulSoup
    - pandas  

In [98]:
import json
from pathlib import Path
import shutil
from datetime import datetime

In [12]:
f = open('data/cities.csv', encoding='UTF-8')
f

<_io.TextIOWrapper name='data/cities.csv' mode='r' encoding='UTF-8'>

In [14]:
data = list(f)
data[:5]

['insee_code,city_code,zip_code,label,latitude,longitude,department_name,department_number,region_name,region_geojson_name\n',
 '25620,ville du pont,25650,ville du pont,46.999873398,6.498147193,doubs,25,bourgogne-franche-comté,Bourgogne-Franche-Comté\n',
 '25624,villers grelot,25640,villers grelot,47.361512085,6.235167025,doubs,25,bourgogne-franche-comté,Bourgogne-Franche-Comté\n',
 '25615,villars les blamont,25310,villars les blamont,47.368383721,6.871414913,doubs,25,bourgogne-franche-comté,Bourgogne-Franche-Comté\n',
 '25619,les villedieu,25240,les villedieu,46.713906258,6.26583065,doubs,25,bourgogne-franche-comté,Bourgogne-Franche-Comté\n']

In [16]:
f.close()

In [20]:
with open('data/cities.csv', encoding='UTF-8') as f:
    data = list(f)
# autoclose here
print(data[:5])

['insee_code,city_code,zip_code,label,latitude,longitude,department_name,department_number,region_name,region_geojson_name\n', '25620,ville du pont,25650,ville du pont,46.999873398,6.498147193,doubs,25,bourgogne-franche-comté,Bourgogne-Franche-Comté\n', '25624,villers grelot,25640,villers grelot,47.361512085,6.235167025,doubs,25,bourgogne-franche-comté,Bourgogne-Franche-Comté\n', '25615,villars les blamont,25310,villars les blamont,47.368383721,6.871414913,doubs,25,bourgogne-franche-comté,Bourgogne-Franche-Comté\n', '25619,les villedieu,25240,les villedieu,46.713906258,6.26583065,doubs,25,bourgogne-franche-comté,Bourgogne-Franche-Comté\n']


In [26]:
with open('data\\cities.csv', encoding='UTF-8') as f:
    data2 = list(f)
# autoclose here
print(data2[:5])

['insee_code,city_code,zip_code,label,latitude,longitude,department_name,department_number,region_name,region_geojson_name\n', '25620,ville du pont,25650,ville du pont,46.999873398,6.498147193,doubs,25,bourgogne-franche-comté,Bourgogne-Franche-Comté\n', '25624,villers grelot,25640,villers grelot,47.361512085,6.235167025,doubs,25,bourgogne-franche-comté,Bourgogne-Franche-Comté\n', '25615,villars les blamont,25310,villars les blamont,47.368383721,6.871414913,doubs,25,bourgogne-franche-comté,Bourgogne-Franche-Comté\n', '25619,les villedieu,25240,les villedieu,46.713906258,6.26583065,doubs,25,bourgogne-franche-comté,Bourgogne-Franche-Comté\n']


In [42]:
city = { 'name': 'Toulouse', 'population': 477000 }
with open('data/city.json', mode='w', encoding='UTF-8') as f:
    json.dump(city, f)
# file is flushed and closed here

## Manage filesystem with pathlib

In [47]:
current_dir = Path('.')
current_dir

WindowsPath('.')

In [51]:
my_dir = current_dir.absolute()
my_dir

WindowsPath('C:/Users/matth/Documents/Formation/Python/Stage202411')

In [55]:
isinstance(my_dir, Path)

True

In [57]:
type(my_dir)

pathlib.WindowsPath

In [60]:
my_dir.parent

WindowsPath('C:/Users/matth/Documents/Formation/Python')

In [68]:
for ancester in my_dir.parents:
    print(ancester)

C:\Users\matth\Documents\Formation\Python
C:\Users\matth\Documents\Formation
C:\Users\matth\Documents
C:\Users\matth
C:\Users
C:\


In [86]:
for f in my_dir.iterdir():
    print(f"* {f}")
    print("\t- is a directory:", f.is_dir())
    print("\t- is a regular file:", f.is_file())
    stats = f.stat()
    # print("\t- stats:", stats)
    print("\t- size:", stats.st_size)
    dt_last_modified = datetime.fromtimestamp(stats.st_mtime)
    print("\t- last modifed:", dt_last_modified)

* C:\Users\matth\Documents\Formation\Python\Stage202411\.git
	- is a directory: True
	- is a regular file: False
	- size: 4096
	- last modifed: 2024-12-05 10:20:43.240155
* C:\Users\matth\Documents\Formation\Python\Stage202411\.gitignore
	- is a directory: False
	- is a regular file: True
	- size: 94
	- last modifed: 2024-12-03 13:12:08.537911
* C:\Users\matth\Documents\Formation\Python\Stage202411\.ipynb_checkpoints
	- is a directory: True
	- is a regular file: False
	- size: 4096
	- last modifed: 2024-12-05 14:45:24.427371
* C:\Users\matth\Documents\Formation\Python\Stage202411\.mypy_cache
	- is a directory: True
	- is a regular file: False
	- size: 0
	- last modifed: 2024-12-02 10:14:08.112272
* C:\Users\matth\Documents\Formation\Python\Stage202411\.virtual_documents
	- is a directory: True
	- is a regular file: False
	- size: 4096
	- last modifed: 2024-12-05 14:44:57.261229
* C:\Users\matth\Documents\Formation\Python\Stage202411\Basics.ipynb
	- is a directory: False
	- is a regular

In [90]:
for pf in current_dir.glob('**/*.py'):
    print(pf)

hello.py
o.py
.ipynb_checkpoints\hello-checkpoint.py
ProjectCity\city.py
ProjectCity\demo_add.py
ProjectCity\main.py
ProjectEuclide\euclide.py
ProjectEuclide\main.py
ProjectCity\tests\conftest.py
ProjectCity\tests\test_city.py
ProjectCity\tests\__init__.py
ProjectEuclide\tests\test_euclide.py
ProjectEuclide\tests\__init__.py


In [92]:
csv_path = my_dir / 'data' / 'cities.csv'
csv_path

WindowsPath('C:/Users/matth/Documents/Formation/Python/Stage202411/data/cities.csv')

In [94]:
csv_path.exists()

True

In [96]:
csv_path2 = my_dir / 'data' / 'cities2.csv'
csv_path2.exists()

False

In [100]:
shutil.copy(csv_path, csv_path2)
csv_path2.exists()

True