## Install the required library

In [1]:
!pip install PyPDF2
!pip install pycryptodome 
!pip install pycryptodomex



## Import the Package

In [1]:
import PyPDF2

## Listing out the files

In [3]:
!ls

0_Image21.jpg
1_Image24.jpg
Image11.jpg
Image14.jpg
PyPDF2 Module.ipynb
TheDesert.pdf
TheDesert-protected.pdf
TheOcean.pdf


## Extract Text from PDF

In [4]:
with open('TheOcean.pdf', 'rb') as pdf_file_obj:
    pdf_file_reader = PyPDF2.PdfReader(pdf_file_obj)
    
    num_of_pages = len(pdf_file_reader.pages)
    print('No. of pages : ', num_of_pages)
    
    for page in range(num_of_pages):
        page = pdf_file_reader.pages[page]
        print(page.extract_text())

No. of pages :  3
The Oceans: Our Beautiful Blue Planet  
 
1 | P a g e  
  
 
The Ocean: Our Beautiful Blue Planet  
 
The ocean covers more than 70% of the Earth's surface and is home to millions of living 
organisms. It is an essential part of our planet, and without it, life as we know it would not 
exist. The ocean is not just a big body of water; it is a complex ecosystem that provides us 
with many resources, including food, energy, and medicine. It is also a source of inspiration 
and beauty.  
 
The ocean is an immense body of saltwater that is constantly moving. It is made up of five 
main regions: the Atlantic, Pacific, Indian, Southern, and Arctic Oceans. Each region has its 
unique features, such as water temperature, depth, and currents. The ocean's water is salty 
because of the minerals that are dissolved in it. Saltwater  is heavier than freshwater, which 
is why the ocean is deeper than most lakes and rivers.  
 
The ocean is home to millions of living organisms, incl

In [5]:
file_object = open('TheOcean.pdf', 'rb')

pdf_file_obj = PyPDF2.PdfReader(file_object)

In [6]:
len(pdf_file_obj.pages)

3

## Extracting Images from PDF

In [7]:
first_page = pdf_file_obj.pages[0]

first_page.images

[File(name=Image11.jpg, data: 67.1 kB, hash: -343772436863801620)]

In [8]:
first_page.images[0]

File(name=Image11.jpg, data: 67.1 kB, hash: -343772436863801620)

In [9]:
first_page.images[0].name

'Image11.jpg'

In [10]:
# first_page.images[0].data

In [11]:
file_name = first_page.images[0].name
file_data = first_page.images[0].data


with open(file_name, 'wb') as file:
    file.write(file_data)

In [12]:
num_of_pages = len(pdf_file_obj.pages)

for page in range(num_of_pages):
    particular_page = pdf_file_obj.pages[page]
    
    for image in particular_page.images:
        print(page, image)
        
        file_name = image.name
        file_data = image.data
        
        with open(file_name, 'wb') as file:
            file.write(file_data)

0 File(name=Image11.jpg, data: 67.1 kB)
1 File(name=Image14.jpg, data: 70.2 kB)


## Read an encrypted pdf

In [13]:
import PyPDF2

In [14]:
file_obj = open('TheDesert-protected.pdf', 'rb')

pdf_obj = PyPDF2.PdfReader(file_obj)

In [15]:
pdf_obj.is_encrypted

True

In [16]:
pdf_obj.decrypt('password')

<PasswordType.OWNER_PASSWORD: 2>

In [17]:
len(pdf_obj.pages)

3

In [18]:
count = 0
for page in pdf_obj.pages:
    print(page.extract_text())
    for image in page.images:
        print(image.name)
        with open(str(count) + '_' + image.name, 'wb') as file:
            file.write(image.data)
        count += 1

The Desert: An Enigmatic Landscape  
 
1 | P a g e  
  
The Desert: An Enigmatic Landscape  
 
The desert is a unique and enigmatic landscape that covers about 20% of the Earth's surface. 
It is a harsh and unforgiving environment that is characterized by extreme temperatures, low 
rainfall, and sparse vegetation. Despite its challenging conditions, the desert is home to a 
wide variety of plant and animal life, as well as diverse human cultures that have adapted to 
its unique conditions.  
 
Deserts are found in every continent on Earth, and each has its un ique features and 
characteristics. Some of the largest deserts in the world include the Sahara in Africa, the 
Arabian Desert in the Middle East, the Gobi in Asia, and the Mojave in North America. Each 
of these deserts is characterized by its unique landsca pe, climate, and biodiversity.  
 
The desert landscape is often characterized by vast expanses of sand dunes, rocky 
mountains, and dry riverbeds. The sand dunes are formed 

## Encrypt a PDF File

In [1]:
from PyPDF2 import PdfReader, PdfWriter

In [2]:
pdf_reader = PdfReader('TheOcean.pdf')
pdf_reader

<PyPDF2._reader.PdfReader at 0x257a1d41190>

In [3]:
pdf_writer = PdfWriter()
pdf_writer

<PyPDF2._writer.PdfWriter at 0x257a24e8590>

In [6]:
pdf_writer.append(pdf_reader)
pdf_writer

<PyPDF2._writer.PdfWriter at 0x1f150430590>

In [7]:
pdf_writer.encrypt('password')
pdf_writer

<PyPDF2._writer.PdfWriter at 0x1f150430590>

In [8]:
pdf_writer.write('TheOcean-encrypted.pdf')

(True, <_io.FileIO [closed]>)

## Merge two PDFs

In [1]:
import PyPDF2

In [2]:
!ls

0_Image21.jpg
1_Image24.jpg
Image11.jpg
Image14.jpg
PyPDF2 Module.ipynb
TheDesert.docx
TheDesert.pdf
TheDesert-protected.pdf
TheOcean.docx
TheOcean.pdf


In [3]:
pdf_merger_obj = PyPDF2.PdfWriter()

for pdf in ['TheOcean.pdf', 'TheDesert.pdf']:
    pdf_merger_obj.append(pdf)
    
pdf_merger_obj.write('merged-pdf.pdf')
pdf_merger_obj.close()

## Inserting pages

In [4]:
from PyPDF2 import PdfWriter

In [5]:
!ls

0_Image21.jpg
1_Image24.jpg
Image11.jpg
Image14.jpg
merged-pdf.pdf
PyPDF2 Module.ipynb
TheDesert.docx
TheDesert.pdf
TheDesert-protected.pdf
TheOcean.docx
TheOcean.pdf


### One Way

In [23]:
ocean_pdf = open('TheOcean.pdf', 'rb')
desert_pdf = open('TheDesert.pdf', 'rb')

In [27]:
output_pdf = PyPDF2.PdfWriter()

In [25]:
final_doc_obj = PdfWriter()

final_doc_obj.merge(position=0, fileobj=ocean_pdf)
final_doc_obj.merge(position=1, fileobj=desert_pdf, pages=[0])

final_doc_obj.write('page_insert.pdf')

ocean_pdf.close()
desert_pdf.close()

### Another Way

In [28]:
ocean_pdf = open('TheOcean.pdf', 'rb')
desert_pdf = open('TheDesert.pdf', 'rb')

final_doc_obj.append(ocean_pdf)
final_doc_obj.merge(position=1, fileobj=desert_pdf, pages=[0])
final_doc_obj.write('page_insert_2.pdf')

ocean_pdf.close()
desert_pdf.close()

## Remove Alternate Pages

In [None]:
import os

for file in os.listdir():
    if file.endswith('.pdf'):
        print(file)

### One way

In [None]:
ocean_pdf = open('TheOcean.pdf', 'rb')
desert_pdf = open('TheDesert.pdf', 'rb')

final_pdf_writer = PyPDF2.PdfWriter()
final_pdf_writer.merge(position=0, fileobj=ocean_pdf, pages=[0, 2])
final_pdf_writer.write('ocean_without_page_2.pdf')

ocean_pdf.close()
desert_pdf.close()

### Another way 

In [None]:
final_pdf_writer = PyPDF2.PdfWriter()

mountain_pdf = open('Mountains.pdf', 'rb')

final_pdf_writer.merge(position=0, fileobj=mountain_pdf, pages=(0, 5, 2))
final_pdf_writer.write('Mountains_distilled.pdf')
mountain_pdf.close()