Gestion des fichiers
--

La plupart des systèmes de fichiers modernes sont en unicode et les fichiers textuels générés sont encodés en unicode. Mais on croise souvent des fichiers textuels encodés différemment.

In [1]:
with open("fichier_unicode.txt") as f:
    content = f.read()
print(content)
print(type(content))

Ceci est un fichier.
Il contient deux caractères accentués.

<class 'str'>


In [2]:
with open("fichier_unicode.txt", encoding="utf-8") as f:
    content = f.read()
print(content)
print(type(content))

Ceci est un fichier.
Il contient deux caractères accentués.

<class 'str'>


In [3]:
with open("fichier_latin1.txt") as f:
    content = f.read()
print(content)
print(type(content))

UnicodeDecodeError: 'utf-8' codec can't decode byte 0xe9 in position 7: invalid continuation byte

In [4]:
with open("fichier_latin1.txt", encoding="latin1") as f:
    content = f.read()
print(content)
print(type(content))

accentué.

<class 'str'>


In [None]:
with open("fichier_latin1.txt", encoding="iso-8859-15") as f:
    content = f.read()
print(content)
print(type(content))

Ouverture d'un fichier binaire
--

In [5]:
with open("fichier_latin1.txt", "rb") as f:
    content = f.read()
print(content)
print(type(content))

b'accentu\xe9.\n'
<class 'bytes'>


In [6]:
# You should execute this line to install chardet
import subprocess
print(subprocess.getstatusoutput("pip install chardet"))

(0, 'Collecting chardet\n  Downloading chardet-5.2.0-py3-none-any.whl (199 kB)\n\x1b[?25l     \x1b[38;5;237m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\x1b[0m \x1b[32m0.0/199.4 kB\x1b[0m \x1b[31m?\x1b[0m eta \x1b[36m-:--:--\x1b[0m\n\x1b[2K     \x1b[38;2;249;38;114m━━━━━━━━\x1b[0m\x1b[38;5;237m╺\x1b[0m\x1b[38;5;237m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\x1b[0m \x1b[32m41.0/199.4 kB\x1b[0m \x1b[31m3.7 MB/s\x1b[0m eta \x1b[36m0:00:01\x1b[0m\n\x1b[2K     \x1b[38;2;249;38;114m━━━━━━━━\x1b[0m\x1b[38;5;237m╺\x1b[0m\x1b[38;5;237m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\x1b[0m \x1b[32m41.0/199.4 kB\x1b[0m \x1b[31m3.7 MB/s\x1b[0m eta \x1b[36m0:00:01\x1b[0m\n\x1b[2K     \x1b[38;2;249;38;114m━━━━━━━━\x1b[0m\x1b[38;5;237m╺\x1b[0m\x1b[38;5;237m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\x1b[0m \x1b[32m41.0/199.4 kB\x1b[0m \x1b[31m3.7 MB/s\x1b[0m eta \x1b[36m0:00:01\x1b[0m\n\x1b[2K     \x1b[38;2;249;38;114m━━━━━━━━\x1b[0m\x1b[38;5;237m╺\x1b[0m\x1b[38;5;237m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\x1b[0m \x1b[32m41.0/199.4 kB\x1b[0m \x1b[31

In [7]:
import chardet
with open("fichier_latin1.txt", "rb") as f:
    print(chardet.detect(f.read()))

{'encoding': 'ISO-8859-1', 'confidence': 0.73, 'language': ''}


Ouverture d'un fichier en détectant son encodage
--

In [8]:
import chardet
with open("fichier_latin1.txt", "rb") as f:
    detected_encoding = chardet.detect(f.read())

with open("fichier_latin1.txt", encoding=detected_encoding["encoding"]) as f:
    content = f.read()
    print(content)
    print(type(content))

accentué.

<class 'str'>


Ecriture dans un fichier
--

In [None]:
with open("test.txt", "w") as f:
    position = f.write("Truc.")
    print(position)

with open("test.txt", "r") as f:
    assert f.read() == "Truc.", "Le contenu n'est pas conforme."

In [None]:
with open("test.txt", "a") as f:
    print(f.tell())
    content = f.write("Chose.")

with open("test.txt", "r") as f:
    assert f.read() == "Truc.Chose."

In [None]:
print(content)

In [None]:
with open("multi.txt", "w") as f:
    f.write("Ceci\nest\nun\nfichier\nmultilignes.")

with open("multi.txt") as f:
    print(repr(f.read()))

In [None]:
with open("multi.txt") as f:
    print(f.readlines())

In [None]:
with open("multi.txt") as f:
    for i, line in enumerate(f.readlines()):
        print("ligne", i, ":", repr(line))

In [None]:
with open("multi.txt") as f:
    for i, line in enumerate(f.readlines()):
        print("ligne", i, ":", line.strip())

In [2]:
with open("multi.txt") as f:
    i = 0
    while True:
        line = f.readline()
        print("ligne", i, ":", line.strip(), "|", f.tell())
        if not line:
            break
        i += 1

ligne 0 : Ceci | 5
ligne 1 : était | 12
ligne 2 : un | 15
ligne 3 : beau | 20
ligne 4 : fichier | 28
ligne 5 : multilignes. | 40
ligne 6 :  | 40


In [None]:
with open("test.txt", "r+") as f:
    print(f.tell())
    print(f.read())
    print(f.tell())
    f.seek(4)
    f.write("-c")

with open("test.txt", "r") as f:
    assert f.read() == "Truc-chose.", "Erreur dans la démo"
    print("Ce qui s'est passé est ce qui était prévu")

In [None]:
with open("test.txt", "r") as f:
    content = f.read()

content = content[:5].lower() + "bidule-" + content[5:]

with open("test.txt", "w") as f:
    f.write(content)

with open("test.txt", "r") as f:
    print(f.read())

In [None]:
from os.path import exists
if not exists("existe_pas.txt"):
    print("Création du fichier")
    with open("existe_pas.txt", "w") as f:
        pass
else:
    print("fichier déjà créé")

In [None]:
with open("multi.txt", "r") as f:
    content = f.readlines()

print(content)
content.insert(3, "beau\n")
content[1] = "était\n"
print(content)

with open("multi.txt", "w") as f:
    f.writelines(content)

with open("multi.txt", "r") as f:
    print(f.read())

---