Skip to content

Commit

Permalink
Flake8 maintenance: test_encodings.py (#557)
Browse files Browse the repository at this point in the history
- remove pytest.mark.skip from 2 tests that now pass
- remove unused imports
- change lamda statements to functions
- add asserts to act on the formerly unused variables.
  • Loading branch information
dcslagel committed Feb 22, 2023
1 parent 8ee88a2 commit 45670ed
Showing 1 changed file with 39 additions and 20 deletions.
59 changes: 39 additions & 20 deletions tests/test_encoding.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
import os, sys
import os

sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
import logging
from pathlib import Path

import codecs
from lasio import read, reader

import pytest

from pathlib import Path
def egfn(fn):
# egfn = lambda fn: os.path.join(os.path.dirname(__file__), "examples", fn)
return os.path.join(os.path.dirname(__file__), "examples", fn)

from lasio import read, reader

egfn = lambda fn: os.path.join(os.path.dirname(__file__), "examples", fn)
stegfn = lambda vers, fn: os.path.join(os.path.dirname(__file__), "examples", vers, fn)
def stegfn(vers, fn):
# stegfn = lambda vers, fn: os.path.join(os.path.dirname(__file__), "examples", vers, fn)
return os.path.join(os.path.dirname(__file__), "examples", vers, fn)


def test_encoding_attr():
Expand All @@ -22,36 +22,48 @@ def test_encoding_attr():

def test_utf8_chardet():
las = read(egfn("encodings_utf8.las"), autodetect_encoding="chardet")
assert las.encoding.upper() == "UTF-8"


def test_utf8wbom_chardet():
las = read(egfn("encodings_utf8wbom.las"), autodetect_encoding="chardet")
# "SIG" is short for signature. This means it will use the BOM metadata
# instead of the file contents to identify the encoding
assert las.encoding.upper() == "UTF-8-SIG"


def test_utf16lebom_chardet():
# BE = Big Endian. Big Endian is the default endian.
las = read(egfn("encodings_utf16lebom.las"), autodetect_encoding="chardet")
assert las.encoding.upper() == "UTF-16"


def test_utf16le_specified_ok():
# LE = Little Endian
las = read(egfn("encodings_utf16le.las"), encoding="UTF-16-LE")
assert las.encoding.upper() == "UTF-16-LE"


@pytest.mark.skip(reason="this is not behaving properly see PR #326")
def test_utf16le_chardet_fails():
with pytest.raises(Exception):
las = read(egfn("encodings_utf16le.las"), autodetect_encoding="chardet")
def test_utf16le_chardet():
# 02-15-2023: chardet is correctly identifying this file now, chardet: 5.1.0.
las = read(egfn("encodings_utf16le.las"), autodetect_encoding="chardet")
assert las.encoding.upper() == "UTF-16LE"


def test_utf16bebom_chardet():
las = read(egfn("encodings_utf16bebom.las"), autodetect_encoding="chardet")
assert las.encoding.upper() == "UTF-16"


def test_iso88591_chardet():
las = read(egfn("encodings_iso88591.las"), autodetect_encoding="chardet")
assert las.encoding.upper() == "ISO-8859-1"


def test_cp1252_chardet():
# Chardet read the file as ISO-8859-1
las = read(egfn("encodings_cp1252.las"), autodetect_encoding="chardet")
assert las.encoding.upper() == "ISO-8859-1"


"""
Expand All @@ -61,48 +73,55 @@ def test_cp1252_chardet():

def test_pathlib_utf8_chardet():
las = read(Path(egfn("encodings_utf8.las")), autodetect_encoding="chardet")
assert las.encoding.upper() == "UTF-8"


def test_pathlib_utf8wbom_chardet():
las = read(Path(egfn("encodings_utf8wbom.las")), autodetect_encoding="chardet")
assert las.encoding.upper() == "UTF-8-SIG"


def test_pathlib_utf16lebom_chardet():
las = read(Path(egfn("encodings_utf16lebom.las")), autodetect_encoding="chardet")
assert las.encoding.upper() == "UTF-16"


def test_pathlib_utf16le_specified_ok():
las = read(Path(egfn("encodings_utf16le.las")), encoding="UTF-16-LE")
assert las.encoding.upper() == "UTF-16-LE"


@pytest.mark.skip(reason="this is not behaving properly see PR #326")
def test_pathlib_utf16le_chardet_fails():
with pytest.raises(Exception):
las = read(Path(egfn("encodings_utf16le.las")), autodetect_encoding="chardet")
def test_pathlib_utf16le_chardet():
# 02-15-2023: chardet is correctly identifying this file now, chardet: 5.1.0.
las = read(Path(egfn("encodings_utf16le.las")), autodetect_encoding="chardet")
assert las.encoding.upper() == "UTF-16LE"


def test_pathlib_utf16bebom_chardet():
las = read(Path(egfn("encodings_utf16bebom.las")), autodetect_encoding="chardet")
assert las.encoding.upper() == "UTF-16"


def test_pathlib_iso88591_chardet():
las = read(Path(egfn("encodings_iso88591.las")), autodetect_encoding="chardet")
assert las.encoding.upper() == "ISO-8859-1"


def test_pathlib_cp1252_chardet():
las = read(Path(egfn("encodings_cp1252.las")), autodetect_encoding="chardet")
assert las.encoding.upper() == "ISO-8859-1"


def test_adhoc_test_encoding():
filename = stegfn("1.2", "sample.las")
res = reader.adhoc_test_encoding(filename)
assert res == "ascii"
encoding = reader.adhoc_test_encoding(filename)
assert encoding.upper() == "ASCII"


def test_open_with_codecs_no_autodetect():
filename = stegfn("1.2", "sample.las")
obj, encoding = reader.open_with_codecs(filename, autodetect_encoding=False)
assert encoding == "ascii"
assert encoding.upper() == "ASCII"


def test_open_with_codecs_no_autodetect_chars():
Expand Down

0 comments on commit 45670ed

Please sign in to comment.