### Expressions :
***
`\d`                         Any numeric digit from `0` to `9`.

`\D`                         Matches any character which is not a decimal digit.
                           This is the opposite of `\d`.

`\w`                         Any letter, numeric digit, or the underscore
                           character.  (Think of this as matching
                           "word" characters.)

`\W`                         Any character that is not a letter,
                           numeric digit, or the underscore character.

`\s`                         Any space, tab, or newline character.  (
                           Think of this as matching white-space
                           characters.)

`\S`                         Any character that is not a space, tab,
                           or newline.
***

In [1]:
import re
import pandas as pd


### > search

In [2]:
text = "12October"

In [3]:
num = re.search('\d\d', text)  # ""\d"" find a digit value in the text
num

<re.Match object; span=(0, 2), match='12'>

In [4]:
num.group()

'12'

In [5]:
text = "8PM19MIN"

In [6]:
nondigit = re.search("\d\d\D\D\D", text)
print(nondigit.group())


19MIN


In [7]:
text = "My phone number is 5556667777"

In [8]:
telno=re.search("\d"*10, text)
print(telno.group())


5556667777


In [9]:
text = "My phone number is 555-666-7777"

In [10]:
telno = re.search("\d"*3 + "-" + "\d"*3 + "-" + "\d"*4, text)
print(telno.group())


555-666-7777


In [11]:
telno = re.search("(\d\d\d)-(\d\d\d)-(\d\d\d\d)", text)
print(telno.group())
print(telno.group(1))
print(telno.group(2))
print(telno.group(3))

555-666-7777
555
666
7777


In [12]:
with open ("text.txt", "w") as file:
    file.write(text)
    

In [13]:
with open ("text.txt", "r") as file:
    txt = file.read()
print(txt)
output = re.search("(\d\d\d)-(\d\d\d-\d\d\d\d)", txt)

print(output.group(1))
print(output.group(2))


My phone number is 555-666-7777
555
666-7777


In [14]:
value = "0 1, t 10, o 100. 100000"


In [15]:
output = re.findall("\d{1}", value)
print(output)


['0', '1', '1', '0', '1', '0', '0', '1', '0', '0', '0', '0', '0']


In [16]:
output = re.findall("\d{1,6}", value)
print(output)


['0', '1', '10', '100', '100000']


In [17]:
phone = "2004-959-959 # This is Phone Number"


In [18]:
output = re.sub("\D","*",phone)
print(output)

2004*959*959***********************


In [19]:
phone = "2004-959-959 # This is Phone Number"

In [20]:
output = re.sub("\D","",phone)
print(output)

2004959959


### Special Characters
___
``"[]"``	  A set of characters	``"[a-m]"``

``"\"``	      Signals a special sequence (can also be used to escape special characters)

``"."``	      Any character (except newline character)

``"^"``	      Starts with	``"^hello"``

``"$"``	      Ends with	``"world$"``

``"*"``	      Zero or more occurrences

`"+"`	      One or more occurrences

`"{}"`	  Exactly the specified number of occurrences

`"|"`	      Either or	`"falls|stays"`

`"()"`	  Capture and group
___

In [21]:
txt = '1 person against 100 people'


In [22]:
output = re.findall('\d',txt)
print(output)


['1', '1', '0', '0']


In [23]:
txt = "hello world"


In [24]:
output =re.findall('^hello',txt)
print(output)

['hello']


In [25]:
output =re.findall('^tello', txt)
print(output)


[]


In [26]:
output = re.findall("world$",txt)
output

['world']

In [27]:
s = pd.Series(['a3', 'b4', 'c5', 'd'])


In [28]:
s.str.contains("(\d)")


  return func(self, *args, **kwargs)


0     True
1     True
2     True
3    False
dtype: bool

In [29]:
s.str.extract('(\d)')


Unnamed: 0,0
0,3.0
1,4.0
2,5.0
3,


In [30]:
s.str.extract('(\w)')

Unnamed: 0,0
0,a
1,b
2,c
3,d


In [31]:
s = pd.Series(['a3aa', 'b4aa', 'c5aa'])


In [32]:
s.str.extract('(\w)')

Unnamed: 0,0
0,a
1,b
2,c


In [33]:
s= pd.Series(['40 l/100 km (comb)',
        '38 l/100 km (comb)', '6.4 l/100 km (comb)',
       '8.3 kg/100 km (comb)', '5.1 kg/100 km (comb)',
       '5.4 l/100 km (comb)', '6.7 l/100 km (comb)',
       '6.2 l/100 km (comb)', '7.3 l/100 km (comb)',
       '6.3 l/100 km (comb)', '5.7 l/100 km (comb)',
       '6.1 l/100 km (comb)', '6.8 l/100 km (comb)',
       '7.5 l/100 km (comb)', '7.4 l/100 km (comb)',
       '3.6 kg/100 km (comb)', '0 l/100 km (comb)',
       '7.8 l/100 km (comb)'])


In [34]:
s.str.extract('(\d\d)').T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
0,40,38,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10


In [35]:
result = s.str.extract('(\d\d|\d.\d|\d)').T
result


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
0,40,38,6.4,8.3,5.1,5.4,6.7,6.2,7.3,6.3,5.7,6.1,6.8,7.5,7.4,3.6,0,7.8


In [36]:
result = s.str.extract('(\d\d|\d.\d|\d).+(\d\d\d)').T
result


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
0,40,38,6.4,8.3,5.1,5.4,6.7,6.2,7.3,6.3,5.7,6.1,6.8,7.5,7.4,3.6,0,7.8
1,100,100,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100,100.0


In [37]:
result = s.str.extract("(^\d*.\d|^\d*) \w*/(\d*)").T
result


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
0,40,38,6.4,8.3,5.1,5.4,6.7,6.2,7.3,6.3,5.7,6.1,6.8,7.5,7.4,3.6,0,7.8
1,100,100,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100,100.0


In [38]:
result2 = s.str.extract('(\S+)\s+').T
result2


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
0,40,38,6.4,8.3,5.1,5.4,6.7,6.2,7.3,6.3,5.7,6.1,6.8,7.5,7.4,3.6,0,7.8


In [39]:
result2  = s.str.extract('(\S+).*/(\S+)').T
result2


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
0,40,38,6.4,8.3,5.1,5.4,6.7,6.2,7.3,6.3,5.7,6.1,6.8,7.5,7.4,3.6,0,7.8
1,100,100,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100,100.0


In [40]:
s = pd.Series(['06/2020\n\n4.9 l/100 km (comb)',
'11/2020\n\n166 g CO2/km (comb)',
'10/2019\n\n5.3 l/100 km (comb)',
'05/2022\n\n6.3 l/100 km (comb)',
'07/2019\n\n128 g CO2/km (comb)',
'06/2022\n\n112 g CO2/km (comb)',
'01/2022\n\n5.8 l/100 km (comb)',
'11/2020\n\n106 g CO2/km (comb)',
'04/2019\n\n105 g CO2/km (comb)',
'08/2020\n\n133 g CO2/km (comb)',
'04/2022\n\n133 g CO2/km (comb)'])


In [41]:
result = s.str.extract('(\d+).(\d\d\d\d)')
result


Unnamed: 0,0,1
0,6,2020
1,11,2020
2,10,2019
3,5,2022
4,7,2019
5,6,2022
6,1,2022
7,11,2020
8,4,2019
9,8,2020


In [42]:
result = s.str.extract('(\S+)/(\S+)').T
result 

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,6,11,10,5,7,6,1,11,4,8,4
1,2020,2020,2019,2022,2019,2022,2022,2020,2019,2020,2022


In [43]:
result = s.str.extract('(\d{2})/(\d{4})').T
result 

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,6,11,10,5,7,6,1,11,4,8,4
1,2020,2020,2019,2022,2019,2022,2022,2020,2019,2020,2022


### karaman