# Convert PDF to CBZ

In [None]:
import sys
import glob
import os
import subprocess

In [None]:
# Path to 7-zip
exe = "C:\\Program Files\\7-Zip\\7z.exe"

In [None]:
def pdf_to_cbz(input_fn, out_path, silent=False):
    if not silent: print(' Convert PDF to CBZ with JPGs:')
    # Start with cleanup
    # Create output file name
    out_file = input_fn.split('/')[-1].split('\\')[-1][:-4]
    # Remove previously existing output file
    try: os.remove(out_path + '/' + out_file + '.cbz')
    except: pass
    # Create temporary folder to store the jpgs
    temp_path = out_path + 'temp/'
    
    # Open the input pdf file
    with open(input_fn,'rb') as file:
        file.seek(0)
        pdf = file.read()
    
    # Define JPG start & end bytes
    startmark = b'\xff\xd8'
    startfix = 0
    endmark = b'\xff\xd9'
    endfix = 2
    i = 0
    
    # Check if folder exists. If not, create
    if(not os.path.exists(temp_path)): # make directory if it doesn't exist
        os.makedirs(temp_path)
    
    if not silent: print(' - Reading JPGs')
    njpg = 0
    while True:
        istream = pdf.find(b'stream', i)
        if istream < 0:
            break
        istart = pdf.find(startmark, istream, istream + 20)
        if istart < 0:
            i = istream + 20
            continue
        iend = pdf.find(b'endstream', istart)
        if iend < 0:
            raise Exception('   ERROR: Didn\'t find end of JPG stream!')
        iend = pdf.find(endmark, iend - 20)
        if iend < 0:
            raise Exception('   ERROR: Didn\'t find end of JPG!')

        istart += startfix
        iend += endfix
        #print('Page %d from %d to %d' % (njpg, istart, iend))
        jpg = pdf[istart:iend]
        if (njpg < 10):
            temp_fn = temp_path + 'page_0' + str(njpg) + '.jpg'
        else:
            temp_fn = temp_path + 'page_' + str(njpg) + '.jpg'
        with open(temp_fn, 'wb') as jpgfile:
            jpgfile.write(jpg)

        njpg += 1
        i = iend
    
    if not silent: print(' - Converting to CBZ (compressing)')
    # Compress to a zip file
    subprocess.call([exe, 'a', out_path + '/' + out_file + '.zip', temp_path + "*.*"])
    # Rename zip to cbz
    os.rename(out_path + '/' + out_file + '.zip', out_path + '/' + out_file + '.cbz')
    
    if not silent: print(' - Removing temporary files')
    # This function deletes all the contents of a folder and the folder itself
    def empty_dir(directory):
        files = glob.glob(directory + '*')
        for f in files:
            os.remove(f)
        os.rmdir(directory)
        pass
    # Now delete the actual temporary folder
    empty_dir(temp_path)
    
    if not silent: print('Done...')
    pass

# Process all pdf files in a directory
def convert_pdf_folder(input_path, output_path):
    # List all files in the directories
    fn_list = sorted(glob.glob(input_path + '**/*.pdf', recursive=True))
    
    for fn_i, fn in enumerate(fn_list):
        # Debugging message
        if(fn_i % 1 == 0): # % 20 to show every 20th file being loaded
            print( '{:<07}'.format(str(round(fn_i * 100 / len(fn_list), 4))) + "%\t" + fn.split('/')[-1].split('\\')[-1]) # Show status
        
        # Process files
        pdf_to_cbz(fn, output_path, silent=True)
    print('100.0000%')
    pass

def convert_cbr_folder(input_path):
    # List all files in the directories
    fn_list = sorted(glob.glob(input_path + '**/*.cbr', recursive=True))
    
    # Check if output folder exists. If not, create
    output_path = input_path + 'cbz/'
    if(not os.path.exists(output_path)): # make directory if it doesn't exist
        os.makedirs(output_path)
    
    for fn_i, fn in enumerate(fn_list):
        # Debugging message
        if(fn_i % 1 == 0): # % 20 to show every 20th file being loaded
            print( '{:<07}'.format(str(round(fn_i * 100 / len(fn_list), 4))) + "%\t" + fn.split('/')[-1].split('\\')[-1]) # Show status
                
        # Create output file name
        out_file = fn.split('/')[-1].split('\\')[-1][:-4]
        
        # Remove previously existing output file
        try: os.remove(output_path + '/' + out_file + '.cbz')
        except: pass
        
        # Create temp temporary folder to store the jpgs
        temp_path = output_path + 'temp/'
        
        # Check if folder exists. If not, create
        if(not os.path.exists(temp_path)): # make directory if it doesn't exist
            os.makedirs(temp_path)
        
        # De-compress rar
        subprocess.call([exe, 'e', fn, '*.*', '-y', '-r', '-o' + temp_path])
    
        # Re-compress as zip
        subprocess.call([exe, 'a', output_path + out_file + '.zip', temp_path + "*.jpg"])
        
        # Rename zip to cbz
        os.rename(output_path + out_file + '.zip', output_path + out_file + '.cbz')
        
        # Cleanup
        # This function deletes all the contents of a folder and the folder itself
        def empty_dir(directory):
            files = glob.glob(directory + '*')
            for f in files:
                os.remove(f)
            os.rmdir(directory)
            pass
        # Now delete the actual temporary folder
        empty_dir(temp_path)
        
    print('100.0000%')
    pass

In [None]:
print('Some comic book series')
print('------------------')
in_dir = '/Some comic book series/'
convert_cbr_folder(in_dir)