pdfminer · pietermarsman · Oct 15, 2019 · Mar 25, 2018 · Oct 15, 2019 · Oct 15, 2019
diff --git a/pdfminer/image.py b/pdfminer/image.py
@@ -74,7 +74,7 @@ def export_image(self, image):
         if len(filters) == 1 and filters[0][0] in LITERALS_DCT_DECODE:
             ext = '.jpg'
         elif (image.bits == 1 or
-              image.bits == 8 and image.colorspace in (LITERAL_DEVICE_RGB, LITERAL_DEVICE_GRAY)):
+              image.bits == 8 and (LITERAL_DEVICE_RGB in image.colorspace or LITERAL_DEVICE_GRAY in image.colorspace)):
             ext = '.%dx%d.bmp' % (width, height)
         else:
             ext = '.%d.%dx%d.img' % (image.bits, width, height)
@@ -101,15 +101,15 @@ def export_image(self, image):
             for y in range(height):
                 bmp.write_line(y, data[i:i+width])
                 i += width
-        elif image.bits == 8 and image.colorspace is LITERAL_DEVICE_RGB:
+        elif image.bits == 8 and LITERAL_DEVICE_RGB in image.colorspace:
             bmp = BMPWriter(fp, 24, width, height)
             data = stream.get_data()
             i = 0
             width = width*3
             for y in range(height):
                 bmp.write_line(y, data[i:i+width])
                 i += width
-        elif image.bits == 8 and image.colorspace is LITERAL_DEVICE_GRAY:
+        elif image.bits == 8 and LITERAL_DEVICE_GRAY in image.colorspace:
             bmp = BMPWriter(fp, 8, width, height)
             data = stream.get_data()
             i = 0

diff --git a/tests/test_tools_pdf2txt.py b/tests/test_tools_pdf2txt.py
@@ -1,45 +1,51 @@
-#!/usr/bin/env python
+import os
+from shutil import rmtree
+from tempfile import NamedTemporaryFile, mkdtemp
 
-# -*- coding: utf-8 -*-
-
-import nose, logging, os
+import nose
 
 import tools.pdf2txt as pdf2txt
 
-path=os.path.dirname(os.path.abspath(__file__))+'/'
 
-def run(datapath,filename,options=None):
-    i=path+datapath+filename+'.pdf'
-    o=path+filename+'.txt'
+def full_path(relative_path_to_this_file):
+    this_file_dir = os.path.dirname(os.path.abspath(__file__))
+    abspath = os.path.abspath(os.path.join(this_file_dir, relative_path_to_this_file))
+    return abspath
+
+
+def run(datapath, filename, options=None):
+    i = full_path(datapath + filename + '.pdf')
+    o = full_path(filename + '.txt')
     if options:
-        s='pdf2txt -o%s %s %s'%(o,options,i)
+        s = 'pdf2txt -o%s %s %s' % (o, options, i)
     else:
-         s='pdf2txt -o%s %s'%(o,i)
+        s = 'pdf2txt -o%s %s' % (o, i)
     pdf2txt.main(s.split(' ')[1:])
 
+
 class TestDumpPDF():
 
     def test_1(self):
-        run('../samples/','jo')
-        run('../samples/','simple1')
-        run('../samples/','simple2')
-        run('../samples/','simple3')
+        run('../samples/', 'jo')
+        run('../samples/', 'simple1')
+        run('../samples/', 'simple2')
+        run('../samples/', 'simple3')
         run('../samples/','sampleOneByteIdentityEncode')
 
     def test_2(self):
-        run('../samples/nonfree/','dmca')
+        run('../samples/nonfree/', 'dmca')
 
     def test_3(self):
-        run('../samples/nonfree/','f1040nr')
+        run('../samples/nonfree/', 'f1040nr')
 
     def test_4(self):
-        run('../samples/nonfree/','i1040nr')
+        run('../samples/nonfree/', 'i1040nr')
 
     def test_5(self):
-        run('../samples/nonfree/','kampo')
+        run('../samples/nonfree/', 'kampo')
 
     def test_6(self):
-        run('../samples/nonfree/','naacl06-shinyama')
+        run('../samples/nonfree/', 'naacl06-shinyama')
 
     # this test works on Windows but on Linux & Travis-CI it says
     # PDFSyntaxError: No /Root object! - Is this really a PDF?
@@ -50,13 +56,38 @@ def test_7(self):
     """
 
     def test_8(self):
-        run('../samples/contrib/','2b','-A -t xml')
+        run('../samples/contrib/', '2b', '-A -t xml')
 
     def test_9(self):
-        run('../samples/nonfree/','175') # https://github.com/pdfminer/pdfminer.six/issues/65
+        run('../samples/nonfree/', '175')  # https://github.com/pdfminer/pdfminer.six/issues/65
 
     def test_10(self):
-        run('../samples/scancode/','patchelf') # https://github.com/euske/pdfminer/issues/96
+        run('../samples/scancode/', 'patchelf')  # https://github.com/euske/pdfminer/issues/96
+
+
+class TestDumpImages(object):
+
+    def extract_images(self, input_file):
+        output_dir = mkdtemp()
+        with NamedTemporaryFile() as output_file:
+            commands = ['-o', output_file.name, '--output-dir', output_dir, input_file]
+            pdf2txt.main(commands)
+        image_files = os.listdir(output_dir)
+        rmtree(output_dir)
+        return image_files
+
+    def test_nonfree_dmca(self):
+        """Extract images of pdf containing bmp images
+
+        Regression test for: https://github.com/pdfminer/pdfminer.six/issues/131
+        """
+        image_files = self.extract_images(full_path('../samples/nonfree/dmca.pdf'))
+        assert image_files[0].endswith('bmp')
+
+    def test_nonfree_175(self):
+        """Extract images of pdf containing jpg images"""
+        self.extract_images(full_path('../samples/nonfree/175.pdf'))
+
 
 if __name__ == '__main__':
     nose.runmodule()