In [0]:
!pip list

### **Formatted strings** are a great alternative to regular format() method used since antiquity, providing you more powerful padding techniques among many other new features

In [0]:
name = "Rifat"
print(f"Hi {name}, how are you")

#older alternative: print("Hi {}, how are you".format(name))

# demo of f-string having worked with different data types
listA= ["Rifat", "Male", 23]
print(f"{listA[0]} is a {listA[1]} and with an age of {listA[2]}")



Hi Rifat, how are you
Rifat is a Male and with an age of 23


In [0]:
data = [("Name", "Age"), ("Rifat Bin Masud", 23), ("Sarah Dilnaaz", 22)]
for name, age in data:
  #next line shows how we can pad using an inside curlybrace with a colon
  # the number tells us the minimum spaces for the column, so regardless of the size
  # of the text the column will be 20 spaces.. if any text inside the column takes more than this
  # padding issue will occur
  print(f"{name:{50}} {age:.>{5}}")

Name                                               ..Age
Rifat Bin Masud                                    ...23
Sarah Dilnaaz                                      ...22


### Formatting date and time remains another issue. We can fix this with strf time-code coupled with regular f-strings  [link for strf time codes](http://strftime.org/)

In [0]:
from datetime import datetime

mydate = datetime(year = 2019, month = 9, day = 8, hour=00, minute=47,)
print(f"{mydate:%I:%M %p}")
print(f"{mydate:%A}, {mydate:%m} {mydate:%B}")

12:47 AM
Sunday, 09 September


### For writing textfiles in jupyter, you can use magic commands, with some assorted knowledges

In [0]:
%%writefile test.txt
Hello World

Writing test.txt


In [0]:
%%writefile -a test.txt

This is a second line from test.txt

Appending to test.txt


In [0]:
testText = open('test.txt'); testText

<_io.TextIOWrapper name='test.txt' mode='r' encoding='UTF-8'>

In [0]:
testText.read()

'Hello World\nThis is a second line from test.txt'

In [0]:
# why second read failed? since the cursor is at the end of the text file for one readcall,
# there is simply no characters remaining to read, hence it returned blank
testText.read()

''

In [0]:
# we reset the cursor by using seek method to set it at the beginning by setting 0
# as an argument. 1 and 2 can also be set to change the cursor at various positions
testText.seek(0); testText.read()

'Hello World\nThis is a second line from test.txt'

In [0]:
testText.seek(0)

0

In [0]:
# we can save the strign into a variable for later use case
content = testText.read(); content

'Hello World\nThis is a second line from test.txt'

In [0]:
# as you saw content var returned the raw string without any formatting hence it is unreadable to 
# humans, but by printing it we can get the true result save in test.txt
print(content);

Hello World
This is a second line from test.txt


In [0]:
# after closing a file we cant work with it in specially the I/O opeartions
testText.close();

In [0]:
# the readlines() give us the lines of the file as a list of strings/Lines
testText = open('test.txt'); lines = testText.readlines()

In [0]:
for line in lines:
  print(line)

Hello World

This is a second line from test.txt


In [0]:
writeOpen = open('test.txt', mode = 'w+')

In [0]:
# so you've now seen that using this mode, we have completely overwriitein
# what the contents was, the seek() even didnt help
# also the diff between w/w+ is just w+ gives extra read capacity
writeOpen.seek(0); writeOpen.read()

''

In [0]:
writeOpen.write("New text here, ")
writeOpen.seek(0)
writeOpen.read()

'New text here, '

In [0]:
writeOpen.write("New text here, ")
writeOpen.seek(0)
writeOpen.read()

'New text here, New text here, '

In [0]:
# if you've seen the pattern you may have understood it already but if you didnt
# the open() command w/ w+ lets the file to start over, not the actual writing in 
# ze files, so if the script contains a w+ mode open(), re-running the script
# will completely write over the file. proceed with caution!
writeOpen = open('test.txt', mode = 'w+')
writeOpen.write("New text here, ")
writeOpen.seek(0)
writeOpen.read()
writeOpen.close()

'New text here, '

In [0]:
# the append mode clearly lets us to append the files, notice you cant
# view the contents without closing it!
new = open('next.txt', mode='a+')

In [0]:
new.write('Try out ze betshit!\nHello'); new.close()

In [0]:
# so now the close() is getting really boring, its also unsafe because 
# you might forget to close
# fix this with context manager, the block ends -> auto close

with open('next.txt') as openedFile:
  lines = openedFile.readlines()

In [0]:
for i in lines:
  print(i, end = '')

Try out ze betshit!
Try out ze betshit!
Hello

## Working with pdf files::::
### we worked with text files before but thats not the only situation here for data analysis, pdf is another data source and we need to master data acqusution from there as-well

In [0]:
!pip install pypdf2

Collecting pypdf2
[?25l  Downloading https://files.pythonhosted.org/packages/b4/01/68fcc0d43daf4c6bdbc6b33cc3f77bda531c86b174cac56ef0ffdb96faab/PyPDF2-1.26.0.tar.gz (77kB)
[K     |████████████████████████████████| 81kB 3.1MB/s 
[?25hBuilding wheels for collected packages: pypdf2
  Building wheel for pypdf2 (setup.py) ... [?25l[?25hdone
  Created wheel for pypdf2: filename=PyPDF2-1.26.0-cp36-none-any.whl size=61085 sha256=2dde36c2b443b02e902d3c23f2b701258bc2ceab821df119f1d9c93c4b5e9137
  Stored in directory: /root/.cache/pip/wheels/53/84/19/35bc977c8bf5f0c23a8a011aa958acd4da4bbd7a229315c1b7
Successfully built pypdf2
Installing collected packages: pypdf2
Successfully installed pypdf2-1.26.0


In [0]:
import PyPDF2

In [0]:
# we are picking the mode as 'rb' = read binary for it being a pdf and not a text file with encodings
myfile = open('US_Declaration.pdf', mode = 'rb')

In [0]:
pdf_reader = PyPDF2.PdfFileReader(myfile); pdf_reader

<PyPDF2.pdf.PdfFileReader at 0x7fac291b6630>

In [0]:
# check page_count
pdf_reader.numPages

5

In [0]:
# return page one of the pdf, indexing 0 = 1
page_one = pdf_reader.getPage(0)

In [0]:
# we needed to print that out since it was a unicode string returned by extracText()
print(page_one.extractText())

In [0]:
# to create a new blank pdf and add a page from this pdf, lets say page_one
# open a new blank pdf, remember with write permission it auto creates one
# since its pdf, use wb
# we need a writerObject to write as well

blank_output_tobepdf = open('custom.pdf', mode = 'wb')
pdf_writer = PyPDF2.PdfFileWriter()
pdf_writer.addPage(page_one)
pdf_writer.write(blank_output_tobepdf)
blank_output_tobepdf.close()

In [0]:
text = []
for numOfpage in range(pdf_reader.numPages):
  page = pdf_reader.getPage(numOfpage)
  text.append(page.extractText())

# nice little script for taking all the page text, remember getPage returns text for a single page
# text as one large unicode string. hence 5 sized final text string 
print(len(text)) 

5


In [0]:
for page in text:
  print(page)