# Step 1 User Uploads the File

# Step 2.1 Extractive Texts Summarization

## Install/Import Required Package/Libraries

In [1]:
! pip install pdfminer3k
! pip install sumy



In [2]:
from pdfminer.pdfinterp import PDFResourceManager, process_pdf
from pdfminer.converter import TextConverter
from pdfminer.layout import LAParams
from io import StringIO
from io import open  
import re 
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lex_rank import LexRankSummarizer # In this script, we use one summarizer to summarize the document as the example.
from summa import keywords  

## Convert PDF File into TXT File

In [3]:
# Create a Function to Read PDF File
def readPDF(pdfFile):
    rsrcmgr = PDFResourceManager()
    retstr = StringIO()
    laparams = LAParams()
    device = TextConverter(rsrcmgr, retstr, laparams=laparams)

    process_pdf(rsrcmgr, device, pdfFile)
    device.close()

    content = retstr.getvalue()
    retstr.close()
    
    return content

# Create a Function to Read TXT File
def saveTxt(txt):
    with open("USER MANUAL.txt", "w") as f:
        f.write(txt)
        
# Convert PDF file into TXT File and Save it in the Current Working Directory      
txt = readPDF(open('USER MANUAL.pdf', 'rb'))
saveTxt(txt)

In [4]:
# Read the TXT File
txt

"USER MANUAL \n\nChapter 1 Getting Started \n\nWelcome to your new Kindle. This short guide will familiarize you with all of the \n\nfeatures and functionality of your Kindle. Some Kindle features may not be available \n\nbased on your country of residence or location. Please visit \n\nwww.amazon.com/devicesupport for more information about specific features. \n\nSetting up your Kindle \n\nSetting up your Kindle takes a few simple steps: \n\n Select the device language. \n\n Connect to a wireless network. \n\n Register your Kindle to your Amazon account. \n\n Link your Kindle to your social network accounts. (Optional) \n\nIf you haven't done these steps yet, go to the Home screen now, select Set Up Your \n\nKindle, and follow the onscreen prompts. If you bought your Kindle online using your \n\nAmazon account, it is already registered to you. To verify, tap the Home icon and look \n\nfor your Amazon user name in the upper left corner of the Home screen. If it says My \n\nKindle or sho

## Summarize the Entire Document

In [5]:
# Create a Parser to Extract the File 
parser = PlaintextParser.from_file("USER MANUAL.txt", Tokenizer("english"))  

In [6]:
# Summarize the Document with 30 Sentences
summarizer = LexRankSummarizer() #Use LexRank Summarizer to summarize the document

summary = summarizer(parser.document, 30)  
for sentence in summary:
 print(sentence)

features and functionality of your Kindle.
Some Kindle features may not be available
Please visit
Setting up your Kindle
Select the device language.
Register your Kindle to your Amazon account.
Link your Kindle to your social network accounts.
If you haven't done these steps yet, go to the Home screen now, select Set Up Your
To verify, tap the Home icon and look
for your Amazon user name in the upper left corner of the Home screen.
Kindle or shows the name of a previous owner instead of your own Amazon account
name, you'll need to register it.
To register a Kindle that has been previously registered, tap the Quick Actions icon on
On the Settings page, tap My Account, then
If you have enabled Sign in Approval, you will need to
If you have difficulty connecting to your wireless network or need more information,
Your Kindle has an onscreen keyboard.
other actions that require you to type in information, the keyboard automatically
based upon your entries display above the keyboard.
Tap the

## Extract Keywords from the Document

In [7]:
text_file = open("USER MANUAL.txt", "r")
print(keywords.keywords(text_file.read()))
text_file.close()

tap
tapping
tapped
kindle
displays
display
network
setting
set
settings
option displayed
chapter
amazon
select
selection
selecting
account
accounts
features
feature
screen
keyboard
page
pages
optional
options
register
registered
icon
left
mode
code
available


# Step 2.2 Texts Splitting 

In [8]:
# Split the Document by the Keyword: Chapter
text = re.split(r'\n(?=Chapter)', txt) 

In [9]:
# Show Chapter 1
text[1]

"Chapter 1 Getting Started \n\nWelcome to your new Kindle. This short guide will familiarize you with all of the \n\nfeatures and functionality of your Kindle. Some Kindle features may not be available \n\nbased on your country of residence or location. Please visit \n\nwww.amazon.com/devicesupport for more information about specific features. \n\nSetting up your Kindle \n\nSetting up your Kindle takes a few simple steps: \n\n Select the device language. \n\n Connect to a wireless network. \n\n Register your Kindle to your Amazon account. \n\n Link your Kindle to your social network accounts. (Optional) \n\nIf you haven't done these steps yet, go to the Home screen now, select Set Up Your \n\nKindle, and follow the onscreen prompts. If you bought your Kindle online using your \n\nAmazon account, it is already registered to you. To verify, tap the Home icon and look \n\nfor your Amazon user name in the upper left corner of the Home screen. If it says My \n\nKindle or shows the name of a

In [10]:
# Show Chapter 2
text[2]

"Chapter 2 Navigating Your Kindle \n\nYour Kindle features a touchscreen interface that enables you to perform many tasks \n\nwith just a tap or swipe of a finger. To select an item, simply tap it. For example, tap a \n\nbook cover or title to open the book, or tap the Menu icon on the item's cover to \n\nview available options. \n\nTap zones \n\nThe EasyReach feature of your Kindle lets you effortlessly turn pages in a book or \n\nperiodical while holding the device with just one hand. Tapping almost anywhere in \n\nthe display area will take you to the next page; the area is large enough to be tapped \n\nwhether you hold the device with your left or right hand. Tap the left side of the \n\nscreen to go to the previous page. The EasyReach tap zones differ depending on \n\nwhether your Kindle is in portrait or landscape mode. When reading , you can change \n\nthe screen orientation by tapping the Menu icon and selecting Portrait or Landscape \n\nMode. Note that the option displayed dep

In [11]:
# Summarize the Fonts, Word Styles and Texts Formatting Used in Each Page of the Document
from __future__ import print_function
import fitz
doc = fitz.open("USER MANUAL.pdf")
xref_visited = [] # memorize already processed font xrefs here

for i in range(len(doc)):
    fontlist = doc.getPageFontList(i)
    if fontlist:
        print("Page", i+1)
    for font in fontlist:
        print("xref=%s, gen=%s, type=%s, basefont=%s, name=%s" % (font[0], font[1], font[2], font[3], font[4]))
        
print()
print("Extracted", len(fontlist), "font(s) from", doc.name)

Page 1
xref=5, gen=ttf, type=Type0, basefont=BCDEEE+Calibri-Bold, name=F1
xref=12, gen=ttf, type=TrueType, basefont=BCDFEE+Calibri-Bold, name=F2
xref=14, gen=ttf, type=Type0, basefont=BCDGEE+Calibri, name=F3
xref=19, gen=ttf, type=TrueType, basefont=BCDHEE+Calibri, name=F4
Page 2
xref=14, gen=ttf, type=Type0, basefont=BCDGEE+Calibri, name=F3
xref=19, gen=ttf, type=TrueType, basefont=BCDHEE+Calibri, name=F4
xref=5, gen=ttf, type=Type0, basefont=BCDEEE+Calibri-Bold, name=F1
xref=12, gen=ttf, type=TrueType, basefont=BCDFEE+Calibri-Bold, name=F2

Extracted 4 font(s) from USER MANUAL.pdf
