Skip to content
Permalink
Browse files

add magic command grep

  • Loading branch information...
sdpython committed Nov 27, 2015
1 parent 53c836c commit 099eb916ce249fcd3364b6ef66adb112876e9ea8
@@ -105,7 +105,7 @@ Versions
* **add:** method *pyensae.remote.remote_connection_ssh.ASSHClient.download_cluster*
* **add:** add magic command to test a streaming script for PIG
* **add:** function *pyensae.file_helper.content_helper.file_head*,
*pyensae.file_helper.content_helper.file_tail*
*pyensae.file_helper.content_helper.file_tail*, *pyensae.file_helper.content_helper.enumerate_grep*
* **add:** add magic command ``%lsrepo``, ``%compress``, ``%mpl_style``
* **del:** delete class *TransferFTP*, moves it to module pyquickhelper
* **add:** add magic command ``%hhelp`` to display the help for an object in HTML format
@@ -119,6 +119,22 @@ def test_head2(self):
except FileNotFoundError:
pass

def test_grep(self):
fLOG(
__file__,
self._testMethodName,
OutputPrint=__name__ == "__main__")
fp = os.path.join(
os.path.dirname(
os.path.abspath(__file__)),
"data",
"Exportutf8.txt")
mg = MagicFile()
fLOG("--", fp)
res = mg.grep("{0} .*6.* -n 3 -r".format(fp))
fLOG("*****", res)
self.assertEqual(res.strip("\n"), "1.2 3.4 5.6".strip("\n"))

def test_tail(self):
fLOG(
__file__,
@@ -6,4 +6,4 @@
from .content_helper import replace_comma_by_point
from .decompress_helper import decompress_zip, decompress_targz, decompress_gz
from .jython_helper import run_jython, get_jython_jar, is_java_installed, download_java_standalone
from .content_helper import file_head, file_tail
from .content_helper import file_head, file_tail, enumerate_grep
@@ -4,6 +4,7 @@
"""

import os
import re


def replace_comma_by_point(file):
@@ -30,17 +31,20 @@ def file_head(filename, nbline=10, encoding="utf8"):
.. versionadded:: 1.1
"""
if not os.path.exists(filename):
raise FileNotFoundError(filename)
if not os.path.isfile(filename):
raise FileNotFoundError("{0} is not a file".format(filename))
rows = []
with open(filename, "r", encoding=encoding) as f:
for line in f:
if isinstance(filename, str):
if not os.path.exists(filename):
raise FileNotFoundError(filename)
if not os.path.isfile(filename):
raise FileNotFoundError("{0} is not a file".format(filename))
with open(filename, "r", encoding=encoding) as f:
return file_head(f, nbline=nbline, encoding=encoding)
else:
rows = []
for line in filename:
rows.append(line)
if len(rows) >= nbline:
break
return rows
return rows


def file_tail(filename, nbline=10, encoding="utf8", threshold=2 ** 14):
@@ -85,3 +89,29 @@ def file_tail(filename, nbline=10, encoding="utf8", threshold=2 ** 14):
rows = content.split("\n")
res = rows[-nbline:] if len(rows) > nbline else rows
return [_ + "\n" for _ in res]


def enumerate_grep(filename, regex, encoding="utf8"):
"""
extract lines matching a regular expression
@param filename filename
@param regex regular expression
@param encoding encoding
@return iterator in lines
.. versionadded:: 1.1
"""
if isinstance(filename, str):
if not os.path.exists(filename):
raise FileNotFoundError(filename)
if not os.path.isfile(filename):
raise FileNotFoundError("{0} is not a file".format(filename))
with open(filename, "r", encoding=encoding) as f:
for _ in enumerate_grep(f, regex, encoding):
yield _
else:
reg = re.compile(regex)
for line in filename:
if reg.search(line):
yield line
@@ -15,7 +15,7 @@
from pyquickhelper.ipythonhelper import MagicCommandParser, MagicClassWithHelpers
from pyquickhelper import docstring2html, create_visual_diff_through_html_files
from .format_helper import format_file_size, format_file_mtime
from .content_helper import file_head, file_tail
from .content_helper import file_head, file_tail, enumerate_grep


@magics_class
@@ -79,6 +79,68 @@ def head(self, line):
else:
return HTML("<pre>\n{0}\n</pre>".format("".join(rows)))

@staticmethod
def grep_parser():
"""
defines the way to parse the magic command ``%grep``
"""
parser = MagicCommandParser(prog="grep",
description='display the first lines of a text file')
parser.add_argument('f', type=str, help='filename')
parser.add_argument('regex', type=str, help='regular expression')
parser.add_argument(
'-n',
'--n',
type=int,
default=-1,
help='number of lines to display, -1 for all')
parser.add_argument(
'-r',
'--raw',
default=False,
action='store_true',
help='display raw text instead of HTML')
parser.add_argument(
'-e',
'--encoding',
default="utf8",
help='file encoding')
return parser

@line_magic
def grep(self, line):
"""
defines ``%grep``
which displays the first lines of a file
@NB(grep)
The magic command ``%grep`` is equivalent to::
from pyensae.file_helper import enumerate_grep
list(enumerate_grep(<filename>, <regex>, <encoding>))
@endNB
"""
parser = self.get_parser(MagicFile.grep_parser, "grep")
args = self.get_args(line, parser)

if args is not None:
iter = enumerate_grep(args.f, args.regex, args.encoding)
if args.n != -1:
rows = []
for r in iter:
if len(rows) >= args.n:
break
rows.append(r)
else:
rows = list(iter)

if args.raw:
return "".join(rows)
else:
return HTML("<pre>\n{0}\n</pre>".format("".join(rows)))

@staticmethod
def tail_parser():
"""
Oops, something went wrong.

0 comments on commit 099eb91

Please sign in to comment.
You can’t perform that action at this time.