### Expressions :
***
`\d`                         Any numeric digit from `0` to `9`.

`\D`                         Matches any character which is not a decimal digit.
                           This is the opposite of `\d`.
                           
`\w`                         Any letter, numeric digit, or the underscore
                           character.  (Think of this as matching
                           "word" characters.)
                           
`\W`                         Any character that is not a letter,
                           numeric digit, or the underscore character.
                           
`\s`                         Any space, tab, or newline character.  (
                           Think of this as matching white-space
                           characters.)
                           
`\S`                         Any character that is not a space, tab,
                           or newline.
***

In [4]:
import pandas as pd
import numpy as np
import re

# search 

In [6]:
text = "A78L41K"

In [21]:
num = re.search('\d\d', text) 
num

<re.Match object; span=(1, 3), match='78'>

In [22]:
num.group(0)

'78'

In [23]:
text = "8PM19MIN"

In [29]:
nondigi = re.search("\D", text)
print(nondigi.group())

P


In [30]:
text = 'My phone number is 5556667777'

In [46]:
telno = re.search("\d\d\d\d\d\d\d\d\d\d",text)
print(telno.group())

5556667777


In [49]:
text = 'My phone number is 415-555-1212'

In [58]:
telno = re.search("\d\d\d-\d\d\d-\d\d\d\d",text)
print(telno.group())

415-555-1212


In [60]:
telno = re.search("\d"*3 + "-" + "\d"*3 + "-" + "\d"*4, text)
print(telno.group())

415-555-1212


In [67]:
telno = re.search("(\d\d\d)-(\d\d\d-\d\d\d\d)",text)
print(telno.group(2))

555-1212


In [68]:
telno = re.search("(\d\d\d)-(\d\d\d-\d\d\d\d)",text)
print(telno.group(1))

415


In [69]:
with open("text.txt","w") as file:
    file.write(text)

In [77]:
with open("text.txt","r") as file:
    txt = file.read()
print(txt)

My phone number is 415-555-1212


In [85]:
output = re.search("(\d\d\d)-(\d\d\d-\d\d\d\d)", txt)

print(output.group(1))
print(output.group(2))

415
555-1212


In [86]:
value = "O 1, t 10, o 100. 100000"

In [90]:
output = set(re.findall("\d", value))
output

{'0', '1'}

In [91]:
output = re.findall("\d{1}", value)
output

['1', '1', '0', '1', '0', '0', '1', '0', '0', '0', '0', '0']

In [92]:
output = re.findall("\d{1,6}", value)
output

['1', '10', '100', '100000']

In [93]:
phone = "2004-959-559 # This is Phone Number"

In [98]:
output = re.sub("\D", " ",phone)
print(output)

2004 959 559                       


In [100]:
output = re.sub("\d", "+", phone)
print(output)

++++-+++-+++ # This is Phone Number


### Special Characters
___
``"[]"``	  A set of characters	``"[a-m]"``

``"\"``	      Signals a special sequence (can also be used to escape special characters)

``"."``	      Any character (except newline character)

``"^"``	      Starts with	``"^hello"``

``"$"``	      Ends with	``"world$"``

``"*"``	      Zero or more occurrences

`"+"`	      One or more occurrences

`"{}"`	  Exactly the specified number of occurrences

`"|"`	      Either or	`"falls|stays"`

`"()"`	  Capture and group
___


In [101]:
txt = "1 person against 100 people"

In [105]:
output = re.findall("\d+",txt)
print(output)

['1', '100']


In [106]:
txt = "hello world"

In [110]:
output = re.findall("^hello",txt)
print(output)

['hello']


In [114]:
output = re.findall("world$",txt)
print(output)

['world']


In [115]:
s = pd.Series(['a3', 'b4', 'c5', 'd'])

In [117]:
s.str.extract('(\d)')

Unnamed: 0,0
0,3.0
1,4.0
2,5.0
3,


In [118]:
s = pd.Series(['a3aa', 'b4aa', 'c5aa'])

In [121]:
s.str.extract('(\w)\d(\w)(\w)')

Unnamed: 0,0,1,2
0,a,a,a
1,b,a,a
2,c,a,a


In [122]:
s= pd.Series(['40 l/100 km (comb)',
        '38 l/100 km (comb)', '6.4 l/100 km (comb)',
       '8.3 kg/100 km (comb)', '5.1 kg/100 km (comb)',
       '5.4 l/100 km (comb)', '6.7 l/100 km (comb)',
       '6.2 l/100 km (comb)', '7.3 l/100 km (comb)',
       '6.3 l/100 km (comb)', '5.7 l/100 km (comb)',
       '6.1 l/100 km (comb)', '6.8 l/100 km (comb)',
       '7.5 l/100 km (comb)', '7.4 l/100 km (comb)',
       '3.6 kg/100 km (comb)', '0 l/100 km (comb)',
       '7.8 l/100 km (comb)'])

In [123]:
s

0       40 l/100 km (comb)
1       38 l/100 km (comb)
2      6.4 l/100 km (comb)
3     8.3 kg/100 km (comb)
4     5.1 kg/100 km (comb)
5      5.4 l/100 km (comb)
6      6.7 l/100 km (comb)
7      6.2 l/100 km (comb)
8      7.3 l/100 km (comb)
9      6.3 l/100 km (comb)
10     5.7 l/100 km (comb)
11     6.1 l/100 km (comb)
12     6.8 l/100 km (comb)
13     7.5 l/100 km (comb)
14     7.4 l/100 km (comb)
15    3.6 kg/100 km (comb)
16       0 l/100 km (comb)
17     7.8 l/100 km (comb)
dtype: object

In [128]:
s.str.extract('(\d\d|\d.\d|\d)')

Unnamed: 0,0
0,40.0
1,38.0
2,6.4
3,8.3
4,5.1
5,5.4
6,6.7
7,6.2
8,7.3
9,6.3


In [129]:
s.str.extract('(\d\d|\d.\d|\d).+(\d\d\d)')

Unnamed: 0,0,1
0,40.0,100
1,38.0,100
2,6.4,100
3,8.3,100
4,5.1,100
5,5.4,100
6,6.7,100
7,6.2,100
8,7.3,100
9,6.3,100


In [130]:
s.str.extract('(^\d*.\d*) \w*/(\d*)')

Unnamed: 0,0,1
0,40.0,100
1,38.0,100
2,6.4,100
3,8.3,100
4,5.1,100
5,5.4,100
6,6.7,100
7,6.2,100
8,7.3,100
9,6.3,100


In [131]:
s = pd.Series(['06/2020\n\n4.9 l/100 km (comb)',
'11/2020\n\n166 g CO2/km (comb)',
'10/2019\n\n5.3 l/100 km (comb)',
'05/2022\n\n6.3 l/100 km (comb)',
'07/2019\n\n128 g CO2/km (comb)',
'06/2022\n\n112 g CO2/km (comb)',
'01/2022\n\n5.8 l/100 km (comb)',
'11/2020\n\n106 g CO2/km (comb)',
'04/2019\n\n105 g CO2/km (comb)',
'08/2020\n\n133 g CO2/km (comb)',
'04/2022\n\n133 g CO2/km (comb)'])

In [148]:
s.str.extract('(\d+).(\d+)')

Unnamed: 0,0,1
0,6,2020
1,11,2020
2,10,2019
3,5,2022
4,7,2019
5,6,2022
6,1,2022
7,11,2020
8,4,2019
9,8,2020


In [149]:
s.str.extract('(\d{2}).(\d{4})')

Unnamed: 0,0,1
0,6,2020
1,11,2020
2,10,2019
3,5,2022
4,7,2019
5,6,2022
6,1,2022
7,11,2020
8,4,2019
9,8,2020
