### Pattern Matching, Regular Expressions

In [0]:
import re
pattern = r"spam"

if re.match(pattern, "spamspamspam"): # match finds whether it matches at the \
                                      # beginning of a string 
   print("Match")
else:
   print("No match")

if re.match(pattern, "4spamspamspam"):
  print("Match")
else:
   print("No match")

Match
No match


In [0]:
pattern = r"spam"

if re.match(pattern, "eggspamsausagespam"):
   print("Match")
else:
   print("No match")

if re.search(pattern, "eggspamsausagespam"):
   print("Match")
else:
   print("No match")
    
print(re.findall(pattern, "eggspamsausagespam"))
#re.search(pattern, "eggspamsausagespam")

No match
Match
['spam', 'spam']


<_sre.SRE_Match object; span=(3, 7), match='spam'>

In [0]:

pattern = r"pam"

match = re.search(pattern, "eggspamsausage") # returns an object
if match:
   print(match.group()) # pattern that matches
   print(match.start()) # index of first letter of pattern
   print(match.end())   # index of last letter of pattern
   print(match.span())  

pam
4
7
(4, 7)


In [0]:


text = "My name is David. Hi David."
pattern = r"David"
newstr = re.sub(pattern, "Amy", text)
print(newstr)

My name is Amy. Hi Amy.


#### Metacharacter 

Metacharacters are what make regular expressions more powerful than normal string methods.

In [0]:
# .(dot) matches any character, other than a new line.
pattern = r"gr.y"

if re.match(pattern, "grey"):
   print("Match 1")

if re.match(pattern, "gray"):
   print("Match 2")

if re.match(pattern, "blue"):
   print("Match 3")

Match 1
Match 2


In [0]:
#^, $ match the start and end of a string, respectively.
pattern = r"^(g)r.y$"

if re.match(pattern, "grey"):
   print("Match 1")

if re.match(pattern, "gray"):
   print("Match 2")

if re.match(pattern, "stingray"):
   print("Match 3")

Match 1
Match 2


#### Grouping ()
a group can be given as an argument to metacharacters

In [0]:
# The * (zero or more) occurances
pattern = r"egg(spam)*"

if re.match(pattern, "egg"):
   print("Match 1")

if re.match(pattern, "eggspamspamspamegg"):
   print("Match 2")

if re.match(pattern, "spam"):
   print("Match 3")

Match 1
Match 2


In [0]:
# Special Sequences 
# \d, \s, \w, \D These match digits, whitespace,  word characters,anything that isn't a digit respectively. 

In [0]:
# + one or more occurances 
pattern = r"(\D+\d)"

match = re.match(pattern, "Hi 999!")

if match:
   print("Match 1")

match = re.match(pattern, "1, 23, 456!")
if match:
   print("Match 2")

match = re.match(pattern, " ! $?")
if match:
    print("Match 3")

Match 1


In [0]:
pattern = r"([\w\.-]+)@([\w\.-]+)(\.[\w\.]+)"
text = "Please contact intro@python.com for assistance"

match = re.search(pattern, text)
if match:
   print(match.group())

intro@python.com


Regex Cheat Sheet https://www.rexegg.com/regex-quickstart.html

More Regex https://docs.python.org/3.7/library/re.html

## Files

Before files to be manipulated they need to be opened 


In [0]:
ls 

[0m[01;34msample_data[0m/


In [0]:
myfile = open("filename.txt", "w") 

 w means that we open file in a writing mode, if you have content in your file, oppening with w mode will rewrite it

This created a filename.txt file in our working directory, the file is empty, yet.

In [0]:
ls

filename.txt  [0m[01;34msample_data[0m/


In [0]:
# let's write something to file
myfile.write("Hello World\n") # it will return the number of characters (= bytes) writen in file


12

In [0]:
myfile.close() # after the operation you should close the file


In [0]:
# let's read the content 
myfile = open("filename.txt", "r") # open in reading mode
myfile.read()

'Hello World\n'

In [0]:
myfile = open("filename.txt", "a") # open in append mode, it won't overwrite content
myfile.write("Hello Students\n")
myfile.close()


In [0]:
myfile = open("filename.txt", "r") # open in reading mode


In [0]:
myfile.readline(1) # read file line 

'H'

In [0]:
myfile.readline(  ) # read file line by line


'Hello Students\n'

In [0]:
myfile.close()

In [0]:
myfile = open("filename.txt", "r") # open in reading mode

In [0]:
myfile.readlines() # will return each line in a list

['Hello World\n', 'Hello Students\n', 'Hello Students\n']

In [0]:
myfile.close()

In [0]:
# oor
myfile = open("filename.txt")
for line in myfile:
  print(line, end="\n")

Hello World

Hello Students

Hello Students



__Never forget to close the file__

In [0]:
myfile.close()  

For lazy people like us, developers added a **with** statement, to not close the file after every operation

In [0]:
with open("filename.txt", "r") as file:
  for line in file.readlines():
    print(line)


Hello World

Hello Students

Hello Students



In [0]:
# add some more lines to file 
with open("filename.txt", "a") as file:
  file.write("This is the 3th line")

In [0]:
with open("filename.txt", "r") as file:
  for line in file.readlines():
    print(line)

Hello World

Hello Students

Hello Students

This is the 3th line


In [0]:
# something combined 
import os

In [0]:
for i in range(100):
  with open(f"file_#_{i}.txt", "w") as f:
    f.write("This file contains text and some numbers\n")
    for j in range(100):
        f.write(str(j))

In [0]:
ls

file_#_0.txt   file_#_29.txt  file_#_48.txt  file_#_67.txt  file_#_86.txt
file_#_10.txt  file_#_2.txt   file_#_49.txt  file_#_68.txt  file_#_87.txt
file_#_11.txt  file_#_30.txt  file_#_4.txt   file_#_69.txt  file_#_88.txt
file_#_12.txt  file_#_31.txt  file_#_50.txt  file_#_6.txt   file_#_89.txt
file_#_13.txt  file_#_32.txt  file_#_51.txt  file_#_70.txt  file_#_8.txt
file_#_14.txt  file_#_33.txt  file_#_52.txt  file_#_71.txt  file_#_90.txt
file_#_15.txt  file_#_34.txt  file_#_53.txt  file_#_72.txt  file_#_91.txt
file_#_16.txt  file_#_35.txt  file_#_54.txt  file_#_73.txt  file_#_92.txt
file_#_17.txt  file_#_36.txt  file_#_55.txt  file_#_74.txt  file_#_93.txt
file_#_18.txt  file_#_37.txt  file_#_56.txt  file_#_75.txt  file_#_94.txt
file_#_19.txt  file_#_38.txt  file_#_57.txt  file_#_76.txt  file_#_95.txt
file_#_1.txt   file_#_39.txt  file_#_58.txt  file_#_77.txt  file_#_96.txt
file_#_20.txt  file_#_3.txt   file_#_59.txt  file_#_78.txt  file_#_97.txt
file_#_21.txt  file_#_40.txt  file_#_5.

In [0]:
# let's read one of this
with open("file_#_0.txt", "r") as f:
  lines_0 = f.readlines()
  print(lines_0) 

['This file contains text and some numbers\n', '0123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899']


In [0]:
# let's make from it something like this 
# ['This file contains text and some numbers\n', '0']

In [0]:
lines_0[1] = lines_0[1][:1]

In [0]:
lines_0

['This file contains text and some numbers\n', '0']

In [0]:
with open("file_#_0.txt", "w") as f:
  for line in lines_0:
    f.write(line)

In [0]:
with open("file_#_0.txt", "r") as f:

  print(f.readlines()) 

['This file contains text and some numbers\n', '0']


In [0]:
ls -t

filename.txt  [0m[01;34msample_data[0m/


In [0]:
import os
#for i in range(100):  os.remove(f"file_#_{i}.txt")
os.remove("filename.txt")

In [0]:
ls


[0m[01;34msample_data[0m/


In [0]:
for i in range(100):
  with open(f"file_#_{i}.txt", "w") as f:
    f.write("This file contains text and some numbers\n")
    for j in range(100):
        f.write(str(j))

In [0]:
ls

file_#_0.txt   file_#_29.txt  file_#_48.txt  file_#_67.txt  file_#_86.txt
file_#_10.txt  file_#_2.txt   file_#_49.txt  file_#_68.txt  file_#_87.txt
file_#_11.txt  file_#_30.txt  file_#_4.txt   file_#_69.txt  file_#_88.txt
file_#_12.txt  file_#_31.txt  file_#_50.txt  file_#_6.txt   file_#_89.txt
file_#_13.txt  file_#_32.txt  file_#_51.txt  file_#_70.txt  file_#_8.txt
file_#_14.txt  file_#_33.txt  file_#_52.txt  file_#_71.txt  file_#_90.txt
file_#_15.txt  file_#_34.txt  file_#_53.txt  file_#_72.txt  file_#_91.txt
file_#_16.txt  file_#_35.txt  file_#_54.txt  file_#_73.txt  file_#_92.txt
file_#_17.txt  file_#_36.txt  file_#_55.txt  file_#_74.txt  file_#_93.txt
file_#_18.txt  file_#_37.txt  file_#_56.txt  file_#_75.txt  file_#_94.txt
file_#_19.txt  file_#_38.txt  file_#_57.txt  file_#_76.txt  file_#_95.txt
file_#_1.txt   file_#_39.txt  file_#_58.txt  file_#_77.txt  file_#_96.txt
file_#_20.txt  file_#_3.txt   file_#_59.txt  file_#_78.txt  file_#_97.txt
file_#_21.txt  file_#_40.txt  file_#_5.

In [0]:
import glob

text_files = glob.glob("file_#_*txt" ) # list all file_#_{i}.txt files
#text_files

In [0]:
for file_number in range(len(text_files)):
  with open(text_files[file_number], "r") as f: 
    lines = f.readlines()
   
    lines[1] = lines[1][:file_number]

  with open(text_files[file_number], "w") as g:
    for line in lines:
      g.write(line)
    
  with open(text_files[file_number], "r") as k: # w+ read and write mode
    print( k.readlines())
   

['This file contains text and some numbers\n']
['This file contains text and some numbers\n', '0']
['This file contains text and some numbers\n', '01']
['This file contains text and some numbers\n', '012']
['This file contains text and some numbers\n', '0123']
['This file contains text and some numbers\n', '01234']
['This file contains text and some numbers\n', '012345']
['This file contains text and some numbers\n', '0123456']
['This file contains text and some numbers\n', '01234567']
['This file contains text and some numbers\n', '012345678']
['This file contains text and some numbers\n', '0123456789']
['This file contains text and some numbers\n', '01234567891']
['This file contains text and some numbers\n', '012345678910']
['This file contains text and some numbers\n', '0123456789101']
['This file contains text and some numbers\n', '01234567891011']
['This file contains text and some numbers\n', '012345678910111']
['This file contains text and some numbers\n', '0123456789101112']
[

In [0]:
with open("file_#_28.txt", "r") as f:
  print(f.readlines())

['This file contains text and some numbers\n', '012345678910111213141516171819202122232425262728293031323334353637383940414243444546']


In [0]:
import os
os.path.isfile("file_#_28.txt")

False

In [0]:
# print different experience 
import sys
import time
with open("somefile.txt", "w") as file:
  file.write("This is sample text written in sample file")
  
with open("somefile.txt", "r") as file:
  outtext = file.readlines()


for i in outtext[0]:
    sys.stdout.write(i)
    sys.stdout.flush()
    time.sleep(0.3)

This is sample text written in sample file

# Exceptions 

Short: If you faced or will face in some probably errors in your code, it's smart move to use excpetion handling. 
Let's discuss it with examples. 

In [0]:
# the function just returns the item in given index
def fetcher(obj, index):
  return obj[index]

In [0]:
# works really well 
fetcher("spam", 2)

'a'

In [0]:
# but 
fetcher("spam", 15)

IndexError: ignored

For such things like catching errors, and helping your code to move forward and to not print errors we use Exceptions. Server programs, for instance, typically
need to remain active even after internal errors. 

### Exception Syntax 

```
try:
  statement
except:
  print("Error message")
else: # optional 
  statement
finally: # optional
  statement 

```

In [0]:
try:
  print(fetcher("Spa", 15))
except IndexError:
  print('got exception')

got exception


In [0]:
try:
  fetcher("Spa", 1)
except IndexError:
  print('got exception')
print("Continuing the code")

Continuing the code


In [0]:
try:
  fetcher("Spa", 4)
except:
  print('got exception')

got exception


In [0]:
try:
  fetcher("Spa", 4)
except ZeroDivisionError:
  print('got exception')

IndexError: ignored

### Some error types
```
ImportError: an import fails;
IndexError: a list is indexed with an out-of-range number;
NameError: an unknown variable is used;
SyntaxError: the code can't be parsed properly;
TypeError: a function is called on a value of an inappropriate type;
ValueError: a function is called on a value of the correct type, but with an inappropriate value.

```
There also other build-in Exceptions, and exceptions in third party libraries. 

In [0]:
try:
   variable = 10
   print(variable + "hello")
   print(variable /0)
except ZeroDivisionError:
   print("Divided by zero")
except (ValueError, TypeError):
   print("Error occurred")

Error occurred


### Finally Clause

In [0]:
# finnaly executed no matter what, and is generally used to release external resources.
try:
  fetcher("Spa", 4)
except IndexError:
  print('got exception')
finally:
  print("Continuing the code")

got exception
Continuing the code


In [0]:
try:
  fetcher("Spa", 4)
finally:
  print("Continuing the code")
print("Some text")

Continuing the code


IndexError: ignored

In [0]:
try:
  fetcher("Spa", 2)
finally:
  print("Continuing the code")

Continuing the code


In [0]:
try:
  fetcher("Spa", 4)
except Exception as e:
  print(e)
finally:
  print("Continuing the code")
print("Some text")

string index out of range
Continuing the code
Some text


In [0]:
# if you want your code to stop after exception
import sys

try:
    f = open('test_file.txt') #the file name is correct now
    var = wrong_var #we have a wrong variable assignment
except Exception:
    print('Some kind of exception occured')
    sys.exit(1)
print('some code')

Some kind of exception occured


SystemExit: ignored

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


### Raise 

In [0]:
# We can raise exceptions by ourself
print(1)
raise ValueError
print(2)

1


ValueError: ignored

ERROR! Session/line number was not unique in database. History logging moved to new session 60


In [0]:
try:
  raise IndexError
except IndexError:
  print('propagating')
  raise

propagating


IndexError: ignored

### Else Clause

In [1]:
try:
    f = open('testfile.txt') 
except FileNotFoundError as e:
    print(e)
else:
    print('Executing the else clause.')

[Errno 2] No such file or directory: 'testfile.txt'


In [0]:
try:
    f = open('testfile.txt') 
except FileNotFoundError as e:
    print(e)
else:
    print('Executing the else clause.')
print("This part is executed anyways.")

[Errno 2] No such file or directory: 'testfile.txt'
This part is executed anyways.


In [0]:
try:
    f = open('test_file.txt', "w") 
except FileNotFoundError as e:
    print(e)
else:
    try:
        f = open('newfile.txt') 
    except FileNotFoundError as e:
        print(e)
finally:
  f.close()
try:
  f = open('test_file.txt', "w") 
  f.write("text")
except Exception as e:
  print(e)
finally:
  f.close()
  


[Errno 2] No such file or directory: 'newfile.txt'


As you saw here, ``finally`` can be used to clean after our code, closing file, closing the server, etc. 

# Assertions

 Check if a fact is True and assert if its False. Used mainly for checking types, values of argument and the output of functions, also, as a debugging tool.

 If the condition is not satisfied, the program is stopped and gives an AssertionError.
 
 ``assert <condition> ``

If the condition is not satisfied, the program is stopped and gives an AssertionError + prints the specified error message.

`` assert <condition>,<error message>``

In [0]:
def avg(marks):
    assert len(marks) != 0
    return sum(marks)/len(marks)

mark1 = []
print("Average of mark1:",avg(mark1))

AssertionError: ignored

In [0]:
mark1 = [1,2,4]
print("Average of mark1:",avg(mark1))

Average of mark1: 2.3333333333333335


In [0]:
def avg(marks):
    assert len(marks) != 0,"List is empty."
    return sum(marks)/len(marks)

mark2 = [55,88,78,90,79]
print("Average of mark2:",avg(mark2))

mark1 = []
print("Average of mark1:",avg(mark1))

Average of mark2: 78.0


AssertionError: ignored