# re standard libary   <a href="https://docs.python.org/3.7/library/re.html">python doc</a>

Here are the basic functions from the re library:

* findall()
* search()
* split()
* sub()

In [2]:
import re

### findall - returns a list containing all the matches

In [77]:
s = "The train runs in the rain"
x = re.findall("in", s)
print(x)


['in', 'in', 'in']


In [3]:
x = re.findall("[Tt]he", s)
print(x)


['The', 'the']


In [4]:
x = re.findall("\S*in", s)
print(x)
# s= one non white space character
# *= 0 or more
# + = 1 or more

['train', 'in', 'rain']


In [5]:
x = re.findall("\S+in", s)
print(x)

['train', 'rain']


In [6]:
x = re.findall("\S+in$", s)
print(x)

['rain']


In [7]:
x = re.findall("\S+in|$", s)
print(x)

['train', 'rain', '']


#### Greedy vs Non-greedy

In [8]:
x = re.findall(".*he", s)
print(x)

['The train runs in the']


In [9]:
x = re.findall(".*?he", s)
print(x)

['The', ' train runs in the']


### search() - returns a Match object if there's a match in the string

In [55]:
s = "The train runs in the rain"
x = re.search("in", s)
print(x)

<re.Match object; span=(7, 9), match='in'>


#### Match object has the following methods:

* span()
* group()
* start()
* end()

In [None]:
x.span()

In [None]:
x.group()

In [None]:
x.start()

In [None]:
x.end()

### split() - returns a list of strings split at each match

In [79]:
s = "The train runs in the rain"
x = re.split("ai", s)
print(x)

['The tr', 'n runs in the r', 'n']


In [80]:
x = re.split("ai", s, 2)  # only first 2 occurence
print(x)

['The tr', 'n runs in the r', 'n']


In [78]:
x = re.split("\S+in\S*", s)
print(x)

['The ', ' runs in the ', '']


### sub() - replaces one or more matches with a substitution string

In [None]:
s = "The train runs in the rain"
x = re.sub("ai", "AI", s)
print(x)

In [None]:
x = re.subn("ai", "AI", s)
print(x)

### More complicated example

In [42]:
s = "From admin123@sjsu.edu to students.456@gmail.com"

y = re.findall('[a-zA-Z0-9]+@[a-zA-Z0-9]+\.[a-zA-Z0-9]+', s)
print(y)  

['admin123@sjsu.edu', 'students.456@gmail.com']


In [83]:
y = re.findall('[a-zA-Z0-9]+@[a-zA-Z0-9.]+', s)
print(y)

['admin123@sjsu.edu', 'students456@gmail.com']


### Example on searching a specific number pattern (i.e. phone number or SSN)

In [21]:
s  = "Hello my cell number is 408-123-4567 and my SSN is 123-45-6789"
y = re.findall('\d+-\d+-\d+', s)
print(y)

['408-123-4567', '123-45-6789']


In [22]:
y = re.findall('\d{3}.\d{3}.\d{4}', s)  # find pattern ###-###-##### d{3} means 3 digits
print(y)

['408-123-4567']


In [23]:
y = re.findall('\d{3}.\d{2}.\d{4}', s)  # find pattern ###-##-####
print(y)

['123-45-6789']


### Extracting part of a regex pattern

In [37]:
s = "From admin123@sjsu.edu to students456@gmail.com"

y = re.findall('@([^ ]*)\.', s)
print(y)

['sjsu', 'gmail']


In [15]:
y = re.sub('@([^ ]*)\.', "@abcde.", s)
print(y)

From admin123@abcde.edu to students456@abcde.com


### A more complicated search example

In [29]:
m = re.search("([a-zA-Z]+) (\d+)", "Thanksgiving Day this year is on November 28!")
print(m)

<re.Match object; span=(33, 44), match='November 28'>


In [30]:
m.group()

'November 28'

In [31]:
print(m.group(0))
print(m.group(1))
print(m.group(2))

November 28
November
28


In [68]:
import re
s = 'My name is Sukriti Mishra my Dream Big is this blishful'
x = re.findall('is',s)
print(x)

['is', 'is', 'is', 'is', 'is']


In [30]:
x= re.findall('[Mm]y',s)
print(x)

['My', 'my']


In [66]:
x = re.findall("\S*is", s)
print(x)

['is', 'Mis', 'is', 'this', 'blis']


In [70]:
x = re.findall("\S+ul$", s)
print(x)

['blishful']


In [73]:
x = re.findall("\S+ul", s)
print(x)

['blishful']


In [74]:
x = re.findall("\S+ri", s)
print(x)

['Sukri']


In [75]:
x = re.findall(".*is", s)
print(x)

['My name is Sukriti Mishra my Dream Big is this blis']


In [49]:
x = re.findall(".*is+.*by$", s)
print(x)

['My name is Sukriti Mishra my Dream Big my hobby']


In [52]:
x = re.findall(".*?is", s)
print(x)

['My name is', ' Sukriti Mis', 'hra my Dream Big is', ' this', ' blis']


In [54]:
x = re.search('is', s)
print(x)

<re.Match object; span=(8, 10), match='is'>


In [56]:
x.span()

(7, 9)

In [57]:
x.group()


'in'

In [58]:
x.start()

7

In [59]:
x.end()

9

In [64]:
x = re.split('is',s, 4)
print(x)

['My name ', ' Sukriti M', 'hra my Dream Big ', ' th', ' blishful']


In [4]:
import subprocess
cmd = ['ls', '-l']
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

o, e = proc.communicate()

print('Output: ' + o.decode('ascii'))
print('Error: '  + e.decode('ascii'))
print('code: ' + str(proc.returncode))

Output: total 9928
-rw-r--r--@ 1 sukritimishra  staff     5976 Oct 20 21:04 CSV.ipynb
-rw-r--r--@ 1 sukritimishra  staff  1393460 Sep 11 15:26 DATA 200 Lecture 1.pdf
-rw-r--r--@ 1 sukritimishra  staff   512871 Sep 11 15:26 DATA 200 Lecture 2.pdf
-rw-r--r--@ 1 sukritimishra  staff   204862 Sep 21 10:16 DATA 200 Lecture 3+.pdf
-rw-r--r--@ 1 sukritimishra  staff   651848 Sep 20 21:48 DATA 200 Lecture 3.pdf
-rw-r--r--@ 1 sukritimishra  staff   229809 Oct 24 13:38 DATA 200 Lecture 6 (OOP).pdf
-rw-r--r--@ 1 sukritimishra  staff     1671 Sep  7 00:45 DATA 200- PYTHON
-rw-r--r--@ 1 sukritimishra  staff   163483 Sep  9 12:50 DATA200 HW1.pdf
-rw-r--r--@ 1 sukritimishra  staff    78995 Sep 16 08:16 DATA200 HW2.pdf
-rw-r--r--@ 1 sukritimishra  staff   114964 Sep 29 20:43 DATA200 HW3.pdf
-rw-r--r--@ 1 sukritimishra  staff   108137 Oct 11 18:55 DATA200 HW4 (updated 2).pdf
-rw-r--r--@ 1 sukritimishra  staff    82301 Oct 18 10:16 DATA200 HW5.pdf
-rw-r--r--@ 1 sukritimishra  staff    90553 Oct 21 10:22

In [8]:
time = re.split(':', '2018')
print(time)

['2018']


In [1]:
import re

In [12]:
s = 'python his this risk bliss in the kiss'
x = re.findall(".*?is", s)
print(x)


['python his', ' this', ' ris', 'k blis', 's in the kis']


In [13]:
x = re.findall(".*is", s)
print(x)

['python his this risk bliss in the kis']


In [29]:
re.findall('<[/]?[a-z|0-9]*>', '<H1>HELLO, world</H1>'.lower())

['<h1>', '</h1>']