In [None]:
# 1. In what modes should the PdfFileReader() and PdfFileWriter() File objects 
# will be opened?


'''
 PyPDF2 library, which is commonly used for working with PDF files, you 
 typically create PdfFileReader and PdfFileWriter objects by opening 
 PDF files in binary mode ('rb' for read mode and 'wb' for write mode). 
 This is because PDF files are binary files


'''

from PyPDF2 import PdfFileReader, PdfFileWriter

# Opening a PDF file in read-binary mode
with open('example.pdf', 'rb') as file:
    pdf_reader = PdfFileReader(file)

    # Your code to read from the PDF using pdf_reader goes here

# Opening a PDF file in write-binary mode
with open('output.pdf', 'wb') as file:
    pdf_writer = PdfFileWriter()

    # Your code to write to the PDF using pdf_writer goes here

    # Save the changes to the output file
    pdf_writer.write(file)


'''
 = PdfFileReader is used to read from an existing PDF file, so it's opened in 
   read-binary mode ('rb').
 
 = PdfFileWriter is used to write to a new PDF file, so it's opened in 
   write-binary mode ('wb').

'''


In [None]:
# 2. From a PdfFileReader object, how do you get a Page object for page 5?


'''
To get a Page object for page 5 from a PdfFileReader object in the PyPDF2 
library, you can use the getPage method. The indexing for pages starts from
0, so if you want the Page object for page 5, you would use an index of 4.

'''
from PyPDF2 import PdfFileReader

# Open the PDF file in read-binary mode
with open('example.pdf', 'rb') as file:
    # Create a PdfFileReader object
    pdf_reader = PdfFileReader(file)

    # Get the Page object for page 5 (index 4)
    page_5 = pdf_reader.getPage(4)

    # Now you can use the 'page_5' object to access information about or manipulate the content of page 5
    # For example, you can extract text from the page:
    page_5_text = page_5.extract_text()
    print(page_5_text)
    
    
    '''
    Adjust the file path ('example.pdf') based on the actual location of your
    PDF file. The getPage method takes the page index as an argument, and it 
    returns a Page object that you can use to perform various operations 
    on that specific page
    
    '''


In [None]:
# 3. What PdfFileReader variable stores the number of pages in the PDF document?
'''
The PdfFileReader variable that stores the number of pages in the PDF document 
is numPages. You can access it as an attribute of a PdfFileReader object

'''


from PyPDF2 import PdfFileReader

# Open the PDF file in read-binary mode
with open('example.pdf', 'rb') as file:
    # Create a PdfFileReader object
    pdf_reader = PdfFileReader(file)

    # Get the number of pages in the PDF document
    num_pages = pdf_reader.numPages

    # Now you can use 'num_pages' to know the total number of pages in the PDF
    print(f'The PDF document has {num_pages} pages.')
    
    
    
'''
Adjust the file path ('example.pdf') based on the actual location of your
PDF file. The numPages attribute holds the total number of pages in the PDF 
document.

'''


In [None]:
# 4. If a PdfFileReader object’s PDF is encrypted with the password swordfish,
# what must you do before you can obtain Page objects from it?



'''
If a PdfFileReader object's PDF is encrypted with the password "swordfish," 
you need to decrypt the PDF by providing the correct password before you can 
obtain Page objects from it. You can use the decrypt method of the 
PdfFileReader object and pass the password as an argument.

'''

from PyPDF2 import PdfFileReader

# Open the encrypted PDF file in read-binary mode
with open('encrypted_document.pdf', 'rb') as file:
    # Create a PdfFileReader object
    pdf_reader = PdfFileReader(file)

    # Decrypt the PDF with the password 'swordfish'
    if pdf_reader.isEncrypted:
        pdf_reader.decrypt('swordfish')

    # Now you can access the pages of the PDF
    num_pages = pdf_reader.numPages
    for page_number in range(num_pages):
        page = pdf_reader.getPage(page_number)
        # Perform operations with the 'page' object as needed
        # For example, extracting text from the page:
        page_text = page.extractText()
        print(f'Text from Page {page_number + 1}:\n{page_text}')
        
        

'''
Make sure to replace 'encrypted_document.pdf' with the actual file path of 
your encrypted PDF. If the PDF is not encrypted, the decrypt method won't
have any effect, and you can proceed to obtain Page objects without 
providing a password

'''



In [None]:
# 5. What methods do you use to rotate a page?


'''
To rotate a page in PyPDF2, you can use the rotateClockwise and 
rotateCounterClockwise methods of the PageObject class. These methods allow
you to rotate a page by 90 degrees clockwise or counterclockwise, respectively.

'''

from PyPDF2 import PdfFileReader, PdfFileWriter

# Open the PDF file in read-binary mode
with open('example.pdf', 'rb') as file:
    # Create a PdfFileReader object
    pdf_reader = PdfFileReader(file)

    # Create a PdfFileWriter object to write the modified PDF
    pdf_writer = PdfFileWriter()

    # Iterate through all pages in the original PDF
    for page_number in range(pdf_reader.numPages):
        # Get the Page object
        page = pdf_reader.getPage(page_number)

        # Rotate the page 90 degrees clockwise
        page.rotateClockwise(90)

        # Add the rotated page to the PdfFileWriter object
        pdf_writer.addPage(page)

    # Save the changes to the output file
    with open('output_rotated.pdf', 'wb') as output_file:
        pdf_writer.write(output_file)



In [8]:
# 6. What is the difference between a Run object and a Paragraph object?

'''

In the context of document processing, particularly in libraries like Python's
python-docx (used for working with Microsoft Word documents), a Run object 
and a Paragraph object represent different levels of text formatting.

Paragraph Object:
 = A Paragraph object represents a paragraph in a document.
 = It can contain multiple Run objects.
 = A paragraph in a Word document typically represents a block of text with a 
   common set of formatting properties, such as font style, size, and 
   alignment.
 = You can apply overall paragraph formatting, like setting indentation or 
 line spacing, to a Paragraph object.

'''

from docx import Document

doc = Document()
paragraph = doc.add_paragraph("This is a paragraph.")



'''
Run Object:

 = A Run object represents a contiguous run of text with the same set of 
   formatting properties within a paragraph.
 = A paragraph can contain multiple runs, each with its own formatting.
 = You can apply specific formatting to individual runs, such as making a
   part of the text bold, italic, or changing the color.
 = Runs are useful when you have different formatting requirements within
   the same paragraph.
   Example (using python-docx):

'''

from docx import Document

doc = Document()
paragraph = doc.add_paragraph()
run1 = paragraph.add_run("This is ")
run1.bold = True

run2 = paragraph.add_run("a bold ")
run2.italic = True

run3 = paragraph.add_run("text.")


In [9]:
# 7. How do you obtain a list of Paragraph objects for a Document object that’s
# stored in a variable named doc?

'''

To obtain a list of Paragraph objects from a Document object in the python-docx
library (assuming doc is your Document object), you can use the paragraphs 
attribute. The paragraphs attribute contains a list of all the paragraphs
in the document.

'''

from docx import Document

# Assuming 'doc' is your Document object
doc = Document()

# Adding some paragraphs for demonstration purposes
doc.add_paragraph("First paragraph.")
doc.add_paragraph("Second paragraph.")
doc.add_paragraph("Third paragraph.")

# Accessing the list of Paragraph objects
paragraphs_list = doc.paragraphs

# Iterating through the list of Paragraph objects
for paragraph in paragraphs_list:
    print(paragraph.text)
    
    
    ''' 
    
    In this example, the paragraphs_list variable will contain a list of 
    Paragraph objects from the document stored in the doc variable. You 
    can then iterate through this list to access and work with each individual
    paragraph.
    
    '''



First paragraph.
Second paragraph.
Third paragraph.


In [10]:
# 8. What type of object has bold, underline, italic, 
# strike, and outline variables?

'''

The attributes bold, underline, italic, strike, and outline are properties 
of the Run object in the python-docx library. A Run object represents a 
contiguous run of text within a paragraph in a Word document.

'''


'''

bold: It controls the bold formatting of the text in the run.
underline: It controls whether the text is underlined.
italic: It controls the italic formatting of the text in the run.
strike: It controls whether the text has a strikethrough.
outline: It controls the outline formatting of the text.


'''

from docx import Document

doc = Document()
paragraph = doc.add_paragraph()

# Adding a Run with various formatting
run = paragraph.add_run("This is formatted text.")
run.bold = True
run.underline = True
run.italic = True
run.strike = True
run.outline = True

# Save the document
doc.save('formatted_document.docx')



'''

In this example, a Run object is created, and various formatting properties 
are applied to it. You can customize these properties based on your
specific requirements for text formatting in your Word document.

'''

'\n\nIn this example, a Run object is created, and various formatting properties \nare applied to it. You can customize these properties based on your\nspecific requirements for text formatting in your Word document.\n\n'

In [11]:
# 9. What is the difference between False, True, and None for the bold variable?

'''

In the context of the python-docx library, the bold variable for a Run object 
can be set to True, False, or None to control the bold formatting of the 
text. Let's explore the meanings of each value:



'''

'''
True:

Setting bold to True makes the text bold.

'''

run = paragraph.add_run("This is bold text.")
run.bold = True


'''
False:

Setting bold to False removes the bold formatting from the text (i.e.,
the text is not bold).
Example:

'''

run = paragraph.add_run("This is not bold text.")
run.bold = False

'''

None:

Setting bold to None means using the default setting, which typically follows 
the style or formatting of the surrounding text. If the surrounding text 
is bold, the specified text will be bold; if it's not bold, the specified
text won't be bold.
Example:

'''

run = paragraph.add_run("This inherits boldness from the surrounding text.")
run.bold = None





In [12]:
# 10. How do you create a Document object for a new Word document?

'''

To create a Document object for a new Word document using the python-docx
library, you can use the Document class. Here's an example:

'''

from docx import Document

# Create a new Document object
doc = Document()

# Add content to the document
doc.add_heading('My First Document', level=1)
doc.add_paragraph('Hello, this is a new Word document.')

# Save the document
doc.save('new_document.docx')



'''
In this example:

We import the Document class from the docx module.
We create a new Document object by calling the Document() constructor.
We add content to the document using methods like add_heading and add_paragraph.
Finally, we save the document to a file using the save method.

'''


'\nIn this example:\n\nWe import the Document class from the docx module.\nWe create a new Document object by calling the Document() constructor.\nWe add content to the document using methods like add_heading and add_paragraph.\nFinally, we save the document to a file using the save method.\n\n'

In [13]:
#  11. How do you add a paragraph with the text 'Hello, there!' to a Document
# object stored in a variable named doc?

'''

To add a paragraph with the text 'Hello, there!' to a Document object stored
in a variable named doc using the python-docx library, you can use the
add_paragraph method. Here's an example:

'''

from docx import Document

# Assuming 'doc' is your Document object
doc = Document()

# Add a paragraph with the text 'Hello, there!'
paragraph = doc.add_paragraph('Hello, there!')

# Save the document if needed
# doc.save('your_document_filename.docx')



'''
We create a new Document object (assuming 'doc' is your Document object).
We use the add_paragraph method to add a paragraph with the specified text 'Hello, there!'.
You can optionally save the document to a file using the save method.

'''


"\nWe create a new Document object (assuming 'doc' is your Document object).\nWe use the add_paragraph method to add a paragraph with the specified text 'Hello, there!'.\nYou can optionally save the document to a file using the save method.\n\n"

In [14]:
# 12. What integers represent the levels of headings available in Word documents?

'''

In Word documents, headings are typically organized into levels, and each level
is assigned a specific integer value. The integers representing the levels
of headings in Word documents are usually:



Heading 1: Level 1 (integer: 1)
Heading 2: Level 2 (integer: 2)
Heading 3: Level 3 (integer: 3)
Heading 4: Level 4 (integer: 4)
Heading 5: Level 5 (integer: 5)
Heading 6: Level 6 (integer: 6)
Heading 7: Level 7 (integer: 7)
Heading 8: Level 8 (integer: 8)
Heading 9: Level 9 (integer: 9)


When using the python-docx library, you can use these integer values to 
specify the level of a heading when using the add_heading method.
For example:

'''

from docx import Document

doc = Document()

# Adding Heading 1
doc.add_heading('Heading 1', level=1)

# Adding Heading 2
doc.add_heading('Heading 2', level=2)

# Adding Heading 3
doc.add_heading('Heading 3', level=3)

# Save the document
doc.save('headings_document.docx')


'''
In this example, we use the level parameter of the add_heading method to 
specify the level of each heading. Adjust the level according to the 
desired hierarchy in your document.

'''



'\nIn this example, we use the level parameter of the add_heading method to \nspecify the level of each heading. Adjust the level according to the \ndesired hierarchy in your document.\n\n'