Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

30% faster reads by removing unnecessary file.tell() calls #579

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 38 additions & 15 deletions lasio/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,29 +297,46 @@ def find_sections_in_file(file_obj):
or line breaks.

"""
file_pos = int(file_obj.tell())
file_pos = 0
starts = []
ends = []
line_no = 0
line = file_obj.readline()
# for i, line in enumerate(file_obj):
line_no = -1
line = True

while line:
line = file_obj.readline()
line_no = line_no + 1
sline = line.strip().strip("\n")
if sline.startswith("~"):
file_pos = int(file_obj.tell()) - full_line_length(line, file_obj)
starts.append((file_pos, line_no, sline))
if len(starts) > 1:
ends.append(line_no - 1)
file_pos = int(file_obj.tell())
line = file_obj.readline()
line_no = line_no + 1

ends.append(line_no)
section_positions = []
for j, (file_pos, first_line_no, sline) in enumerate(starts):
section_positions.append((file_pos, first_line_no, ends[j], sline))
section_positions = [
(file_pos, first_line_no, ends[j], sline)
for j, (file_pos, first_line_no, sline) in enumerate(starts)
]
return section_positions


def full_line_length(line, file_obj):
"""Return the full length of the line in characters, adjusted
based on the type of newline separators used ('\\n' vs. '\\r\\n')"""
newlines = (
file_obj.newlines
if not isinstance(file_obj.newlines, tuple)
else file_obj.newlines[0]
)
newline_adjust = len(newlines) - 1 if newlines is not None else 0
if line.endswith("\n"):
length = len(line) + newline_adjust
else:
length = len(line)
return length


def determine_section_type(section_title):
"""Return the type of the LAS section based on its title

Expand Down Expand Up @@ -454,7 +471,7 @@ def read_data_section_iterative_normal_engine(
title = file_obj.readline()

def items(f, start_line_no, end_line_no):
for line_no, line in enumerate(f, start=start_line_no+1):
for line_no, line in enumerate(f, start=start_line_no + 1):
line = line.strip("\n").strip()
if line.startswith(ignore_data_comments):
continue
Expand Down Expand Up @@ -581,18 +598,24 @@ def read_data_section_iterative_numpy_engine(file_obj, line_nos):
file_obj.seek(0)

# unpack=True transforms the data from an array of rows to an array of columns.
# loose=False will throw an error on non-numerical data, which then sends the
# loose=False will throw an error on non-numerical data, which then sends the
# parsing to the 'normal' parser.
array = np.genfromtxt(
file_obj, skip_header=first_line, max_rows=max_rows, names=None, unpack=True, loose=False
file_obj,
skip_header=first_line,
max_rows=max_rows,
names=None,
unpack=True,
loose=False,
dtype=float,
)

# If there is only one data row, np.genfromtxt treats it as one array of
# individual values. Lasio needs a array of arrays. This if statement
# converts the single line data array to an array of arrays(column data).
if len(array.shape) == 1:
arr_len = array.shape[0]
array = array.reshape(arr_len,1)
array = array.reshape(arr_len, 1)

return array

Expand Down Expand Up @@ -1035,7 +1058,7 @@ def configure_metadata_patterns(line, section_name):
# 3. double_dots '..' caused by mnemonic abbreviation (with period)
# next to the dot delimiter.
if ":" in line:
if not "." in line[:line.find(":")]:
if not "." in line[: line.find(":")]:
# If there is no period, then we assume that the colon exists and
# everything on the left is the name, and everything on the right
# is the value - therefore no unit or description field.
Expand Down