Skip to content

Commit

Permalink
Do not extract text from JPEG and Gif #74
Browse files Browse the repository at this point in the history
Signed-off-by: Philippe Ombredanne <pombredanne@nexb.com>
  • Loading branch information
pombredanne committed May 2, 2018
1 parent 5b5cca0 commit c57b5b6
Show file tree
Hide file tree
Showing 4 changed files with 8 additions and 5 deletions.
8 changes: 5 additions & 3 deletions src/typecode/contenttype.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,9 +454,11 @@ def is_media_with_meta(self):
"""
Return True if the file is a media file that may contain text metadata.
"""
# For now we only exclude PNGs, though there are likely several other
# mp(1,2,3,4), jpeg, gif all have support for metadata
if self.is_media and 'png image' in self.filetype_file.lower():
# For now we only exclude PNGs, JEPG and Gifs, though there are likely
# several other
# mp(1,2,3,4), jpeg, gif all have support for metadata but we exclude some
if (self.is_media and self.filetype_file.lower().startswith(
('gif image', 'png image','jpeg image'))):
return False
else:
return True
Expand Down
5 changes: 3 additions & 2 deletions tests/typecode/test_contenttype.py
Original file line number Diff line number Diff line change
Expand Up @@ -811,6 +811,7 @@ def test_media_image_gif(self):
test_file = self.get_test_loc('contenttype/media/Image1.gif')
assert is_media(test_file)
assert is_binary(test_file)
assert not contains_text(test_file)

def test_media_image_ico(self):
test_file = self.get_test_loc('contenttype/media/Image1.ico')
Expand Down Expand Up @@ -840,13 +841,13 @@ def test_media_image_jpeg(self):
test_file = self.get_test_loc('contenttype/media/Image1.jpeg')
assert is_media(test_file)
assert is_binary(test_file)
assert contains_text(test_file)
assert not contains_text(test_file)

def test_media_image_jpg(self):
test_file = self.get_test_loc('contenttype/media/Image1.jpg')
assert is_media(test_file)
assert is_binary(test_file)
assert contains_text(test_file)
assert not contains_text(test_file)

def test_media_image_pbm(self):
test_file = self.get_test_loc('contenttype/media/Image1.pbm')
Expand Down

0 comments on commit c57b5b6

Please sign in to comment.