In [None]:
from PyPDF2 import PdfFileReader, PdfFileWriter
import csv

In [None]:
def extract_information(pdf_path):
    with open(pdf_path, 'rb') as f:
        pdf = PdfFileReader(f)
        information = pdf.getDocumentInfo()
        nbr_pages = pdf.getNumPages()


    return information, nbr_pages

In [None]:
def print_extracted_information (pdf_path, information, nbr_pages):
    txt = f"""
    Information about {pdf_path}: 

    Author: {information.author}
    Creator: {information.creator}
    Producer: {information.producer}
    Subject: {information.subject}
    Title: {information.title}
    Number of pages: {nbr_pages}
    """
    print(txt)
    return

In [None]:
def extract_page_ranges (input_pdf, p_ranges, output_pdfs):
    # extracts ranges of pages (defined in p_ranges) from input pdf 
    # writing each to a file whose name is on output_pdfs
    # p_ranges and output_pdfs are lists with the same size
    if len (p_ranges) != len (output_pdfs):
        print ("Error! Lists with p_ranges and output_pdfs don't have the same length!")
        return False 
    
    # open the input pdf
    pdf = PdfFileReader (input_pdf)
    nbr_pages = pdf.getNumPages()
    
    # iterate through the inputs
    for d in zip(p_ranges, output_pdfs):
        # verify if the pages exist
        if d[0][0]<1 or d[0][0]>nbr_pages or d[0][1]<1 or d[0][1]>nbr_pages:
            print ("Warning! Page range out of documents pages: {0} != [1..{1}]".format(d[0],nbr_pages))
            continue
        # offset page numbers to 0 .. nbr_pages-1
        p_range = (d[0][0],d[0][1])
        output_pdf = d[1]
        
        
        pdf_writer = PdfFileWriter()
        print ('Pages {0} .. {1}: '.format(p_range[0], p_range[1]), end='', flush=True)
        for page in range (p_range[0]-1, p_range[1]):   # iterate the range
            print ('.',end='',flush=True)
            pdf_writer.addPage (pdf.getPage(page))
            
        # write into the file
        print (' -> {0} - '.format(output_pdf),end='',flush=True)
        with open(output_pdf,"wb") as out:
            pdf_writer.write (out)
        print ('OK!',flush=True)
    return True
            

In [None]:
##  main code

path = "2003\\"
out_path = "2003\\SeparatePDFs\\"

pdf_fileName = '2003_10_08_EPCG' 
csv_fileName = '2003EPCG' 

pdf_path = path + pdf_fileName + '.pdf'
csv_path = path + csv_fileName + '.csv'

pdf_information, nbr_pages = extract_information(pdf_path)
print_extracted_information (pdf_path, pdf_information, nbr_pages)

### ATTENTION
#
#  NUMBERS in page_ranges must range from 1 to nbr_pages
#
#  in extract_page_ranges these are offset to pyPDF2 represenattion: 0 .. nbr_pages-1

page_ranges = []
output_pdfs = []
with open(csv_path,"rt") as csv_in:
    
    # read one row at a time
    for rownum, row in enumerate(csv.reader(csv_in, delimiter=";")):
    
        #print (rownum, row)
        # use row 0 (which holds the columns titles) to
        # identify the columns containing the relavant fields
        if rownum==0: 
            p_range_s_col = row.index ('PageStart')
            p_range_f_col = row.index ('PageEnd')
            output_fileName_col = row.index ('File')
            
        else:
            print ("Collecting data: File:{0} from page {1} to {2}".format(row[output_fileName_col],
                                                                           int(row[p_range_s_col]),
                                                                           int(row[p_range_f_col])),
                   flush=True)
            page_ranges.append((int(row[p_range_s_col]),int(row[p_range_f_col])))
            output_pdfs.append(out_path+row[output_fileName_col])
    print ("All data collected!", flush=True)

if extract_page_ranges (pdf_path,page_ranges, output_pdfs):
    print ('\nThat\'s all, folks!')
else:
    print ('\nNOT OK! Something went wrong :-(')
    