# Regular expressions

https://regex101.com/


## re.match()
- **Description:** Checks for a match **only at the beginning** of the string.
- **Syntax:** re.match(pattern, string, flags=0)
- **Returns:** A match object if the pattern is found; otherwise, None.



In [7]:
import re

matched = re.match(r"\[\d+\.\d+\]", "[1896295.009285]: LQM-WIFI: (2G) txRTSFrm=17 txRTSFail=0 rxCTSUcast=10 rxRTSUcast=0 txCTSFrm=0 txAMP")
if matched:
   print(matched)

<re.Match object; span=(10, 26), match='[1896295.009285]'>


## re.search()
- **Description:** Searches for a match **anywhere** in the string.
- **Syntax:** re.search(pattern, string, flags=0)
- **Returns:** A match object if the pattern is found; otherwise, None.

In [8]:

matched = re.search(r"\d", "[1896295.009285]: LQM-WIFI: (2G) txRTSFail=17 txRTSFail=0 rxCTSUcast=10 rxRTSUFail=0 txCTSFrm=0 txAMP")
if matched:
   print(matched)

<re.Match object; span=(0, 1), match='1'>


# re.findall()
- **Description:** Returns a **list of all matches** of the pattern in the string.
- **Syntax:** re.findall(pattern, string, flags=0)
- **Returns:** A list of matched strings.

In [22]:


matches = re.findall(r"([0-9a-zA-z]+Fail=\d+){1,3}", "[1896295.009285]: LQM-WIFI: (2G) tx01RTSFail=17 txRTSFail=0 rxCTSUcast=10 rxRTSUFail=0 rxRTSUFail=0 txCTSFrm=0 txAMP")
time = re.findall(r"\[(\d+\.\d+)\]", "[1896295.009285]: LQM-WIFI: (2G) txRTSFail=17 txRTSFail=0 rxCTSUcast=10 rxRTSUFail=0 rxRTSUFail=0 txCTSFrm=0 txAMP")

failed_mesgs = {}
for match in matches:
    print(match, time)
    

tx01RTSFail=17 ['1896295.009285']
txRTSFail=0 ['1896295.009285']
rxRTSUFail=0 ['1896295.009285']
rxRTSUFail=0 ['1896295.009285']


In [33]:
# Other than SSH
matches = re.findall(r"([^SSH][a-zA-Z]+Fail){1,3}", "[1896295.009285]: SSHFail BTFail LQM-WIFI: (2G) tx01RTSFail=17 txRTSFail=0 rxCTSUcast=10 rxRTSUFail=0 rxRTSUFail=0 txCTSFrm=0 txAMP")

failed_mesgs = {}
for match in matches:
    print(match)
    

 BTFail
1RTSFail
 txRTSFail
 rxRTSUFail
 rxRTSUFail


# re.finditer()
- **Description:** Returns an **iterator yielding match objects for all matches** of the pattern in the string.
- **Syntax:** re.finditer(pattern, string, flags=0)
- **Returns:** An iterator of match objects.

In [41]:
matches = re.finditer(r"\d", "112 abc \n123 xyz")
for match in matches:
   print(match)

<re.Match object; span=(0, 1), match='1'>
<re.Match object; span=(1, 2), match='1'>
<re.Match object; span=(2, 3), match='2'>
<re.Match object; span=(9, 10), match='1'>
<re.Match object; span=(10, 11), match='2'>
<re.Match object; span=(11, 12), match='3'>


# re.split()
- **Description:** **Splits** the string at each occurrence of the pattern.
- **Syntax:** re.split(pattern, string, maxsplit=0, flags=0)
- **Returns**: A list of substrings.

In [48]:
matches = re.split(r"\n|\t", "112 abc \n123 \txyz")
for match in matches:
   print(match)

112 abc 
123 
xyz


# re.sub()
- **Description:** **Replaces occurrences** of the pattern with a specified string.
- **Syntax:** re.sub(pattern, repl, string, count=0, flags=0)
- **Returns:** A string with the substitutions applied.

In [51]:
matches = re.sub(r"\n|\t", "**", "112 abc \n123 \txyz")
print(matches)
matches = re.subn(r"\n|\t", "**", "112 abc \n123 \txyz")
print(matches)

112 abc **123 **xyz
('112 abc **123 **xyz', 2)


# re.fullmatch()
- **Description:** Checks if the entire string matches the pattern.
- **Syntax:** re.fullmatch(pattern, string, flags=0)
- **Returns:** A match object if the pattern matches the whole string; otherwise, None.

In [63]:
matches = re.fullmatch(r"\d+.*", "112 \t")
print(matches)

<re.Match object; span=(0, 5), match='112 \t'>


# re.compile()
- **Description:** Compiles a regular expression pattern into a regex object for repeated use.
- **Syntax:** re.compile(pattern, flags=0)
- **Returns:** A regex object that can be reused with methods like .match() and .search().

In [73]:
expression = re.compile(r"\d+")
matches = expression.findall("112 abc \n123 \txyz")
print(matches)

['112', '123']


In [90]:
content = '''


^
kamal kumar mukiri (you)
    kamal-bec2004-@gmail.co.in
* *
^kamal 
Hariprasad. Tavarekere
tavarekere.hariprasad@gmail.com

mymail id kadiyalapavani04@gmailacom
       kadiyalapavani04@gmail.com

Kapil Tarani
kapil.tarani@gmail.com

Lakshmi Mocherla
lakshmi.mocherla@gmail.com

monikabm200@gmail.com
monikabm200@gmail.com

MOUNISHA KOMMURI
mounishakommuri@gmail.com

pavani kadiyala
pavanikadiyalachowdary@gmail.com

rajesh rama
rajeshramaraok@gmail.com

Ramana Pokala
ramana.pokala@gmail.com

Raviteja K
raviteja.kollapur@gmail.com

siddavatam salmafirdose
ssalmafirdose@gmail.com

mustafa123@outlook.com

'''
new_content = re.sub(r"([\.\-\w]+)@(\w+\.[comn\.i]*)", "***", content, count=2)
re.sub
print(new_content)




^
kamal kumar mukiri (you)
    ***
* *
^kamal 
Hariprasad. Tavarekere
***

mymail id kadiyalapavani04@gmailacom
       kadiyalapavani04@gmail.com

Kapil Tarani
kapil.tarani@gmail.com

Lakshmi Mocherla
lakshmi.mocherla@gmail.com

monikabm200@gmail.com
monikabm200@gmail.com

MOUNISHA KOMMURI
mounishakommuri@gmail.com

pavani kadiyala
pavanikadiyalachowdary@gmail.com

rajesh rama
rajeshramaraok@gmail.com

Ramana Pokala
ramana.pokala@gmail.com

Raviteja K
raviteja.kollapur@gmail.com

siddavatam salmafirdose
ssalmafirdose@gmail.com

mustafa123@outlook.com




In [82]:
import re

expression = r"([\.\-\w]+)@(\w+\.[comn\.i]*)" # \n\r\t\v
print(expression)

mailds = re.findall(expression, text)
for mailid in mailds:
    print(mailid)


([\.\-\w]+)@(\w+\.[comn\.i]*)
('kamal-bec2004-', 'gmail.co.in')
('tavarekere.hariprasad', 'gmail.com')
('kadiyalapavani04', 'gmail.com')
('kapil.tarani', 'gmail.com')
('lakshmi.mocherla', 'gmail.com')
('monikabm200', 'gmail.com')
('monikabm200', 'gmail.com')
('mounishakommuri', 'gmail.com')
('pavanikadiyalachowdary', 'gmail.com')
('rajeshramaraok', 'gmail.com')
('ramana.pokala', 'gmail.com')
('raviteja.kollapur', 'gmail.com')
('ssalmafirdose', 'gmail.com')
('mustafa123', 'outlook.com')


In [86]:
text = "Kamal, Phone: +91-0000011111 I am working in Harman international, "
#new_test = text.replace("+91-1234567890", "+91-1234567899")
new_text = re.sub(r"\s\+\d{2}\-\d{10}\s", " +91-1234567899 ", text)
print(new_text)

Kamal, Phone: +91-1234567899 I am working in Harman international, 


In [111]:
text = '''Wifi
Wifi: connection success
  fdsafds Wifi1234
Wifi: connection failed
fafdsafs
'''
matches = re.findall(r"^Wifi", text, re.VERBOSE)
print(matches)

['Wifi']


In [22]:
import re
text = '''Wifi 12 line
Wifi: connection line success
  fdsafds line +91-1234567890
Wifi: connection failed
fafdsafs
'''

result = re.search(r"\s(\+\d{2})\-(\d{10})\s", text)
print(result.groups())

('+91', '1234567890')


In [25]:
match = re.compile(r"\s(\+\d{2})\-(\d{10})\s")
print(type(match))

<class 're.Pattern'>


In [26]:
re.split(r":|line", text)

['Wifi 12\nWifi',
 ' connection success\n  fdsafds Wifi1234 +91-1234567890\nWifi',
 ' connection failed\nfafdsafs\n']

In [33]:
bill = '''
rice 1kg 100 per kg: 2kgs
oil 1kg 200 per kg: 2kgs
onion 1kg 20 per kg: 2kgs
wheat 1kg 120 per kg: 2kgs
'''

import re
bill = '''
rice 1kg 100.5 per kg: 2kgs
oil 1kg 200 per kg: 2kgs
onion 1kg 20 per kg: 2.5kgs
wheat 1kg 120 per kg: 2kgs
'''
reg= re.compile(r"(\w+)\s+(\d)kg\s([\d\.]+)\s\w+\skg:\s([\.\d])kgs")
items=re.findall(reg, bill)
bill=0
print(items)
for i in items:
    print(i)
    i=list(i)
    bill+=float(i[-2])*int(i[-1])
print(bill)

[('rice', '1', '100.5', '2'), ('oil', '1', '200', '2'), ('wheat', '1', '120', '2')]
('rice', '1', '100.5', '2')
('oil', '1', '200', '2')
('wheat', '1', '120', '2')
841.0


In [2]:
import re
content = '''
lease 192.168.0.196 {
  starts 5 2025/01/24 05:50:12;
  ends 5 2025/01/24 05:52:12;
  tstp 5 2025/01/24 05:52:12;
  cltt 5 2025/01/24 05:50:12;
  binding state free;
  hardware ethernet 08:00:27:7f:e8:6e;
  uid "\001\010\000'\177\350n";
  client-hostname "client1";
}
lease 192.168.0.197 {
  starts 5 2025/01/24 06:06:21;
  ends 5 2025/01/24 06:08:21;
  tstp 5 2025/01/24 06:08:21;
  cltt 5 2025/01/24 06:06:21;
  binding state free;
  hardware ethernet bc:09:1b:dd:f1:e9;
  uid "\001\274\011\033\335\361\351";
  client-hostname "kamal-ThinkPad-P15s-Gen-2i";
}
lease 192.168.0.158 {
  starts 5 2025/01/24 06:29:13;
  ends 5 2025/01/24 06:39:13;
  tstp 5 2025/01/24 06:39:13;
  cltt 5 2025/01/24 06:29:13;
  binding state free;
  hardware ethernet 08:00:27:7f:e8:6e;
  uid "\001\010\000'\177\350n";
}
lease 192.168.0.122 {
  starts 5 2025/01/24 13:33:10;
  ends 5 2025/01/24 15:33:10;
  tstp 5 2025/01/24 15:33:10;
  cltt 5 2025/01/24 13:41:00;
  binding state free;
  hardware ethernet 9e:10:d4:30:af:89;
  uid "\001\236\020\3240\257\211";
}
lease 192.168.0.179 {
  starts 6 2025/01/25 01:01:27;
  ends 6 2025/01/25 01:11:27;
  tstp 6 2025/01/25 01:11:27;
  cltt 6 2025/01/25 01:01:27;
  binding state free;
  hardware ethernet 5a:3d:79:1b:92:15;
  uid "\001Z=y\033\222\025";
}
lease 192.168.0.200 {
  starts 6 2025/01/25 02:13:39;
  ends 6 2025/01/25 02:15:39;
  tstp 6 2025/01/25 02:15:39;
  cltt 6 2025/01/25 02:13:39;
  binding state free;
  hardware ethernet 30:c9:ab:35:f0:e1;
  uid "\0010\311\2535\360\341";
  set vendor-class-identifier = "android-dhcp-9";
}
lease 192.168.0.173 {
  starts 6 2025/01/25 02:14:02;
  ends 6 2025/01/25 02:24:02;
  tstp 6 2025/01/25 02:24:02;
  cltt 6 2025/01/25 02:14:02;
  binding state free;
  hardware ethernet 74:a7:ea:9c:78:fa;
  set vendor-class-identifier = "dhcpcd-6.8.2:Linux-4.4.22+:armv7l:MT8167B";
}
lease 192.168.0.190 {
  starts 6 2025/01/25 06:53:19;
  ends 6 2025/01/25 07:03:19;
  tstp 6 2025/01/25 07:03:19;
  cltt 6 2025/01/25 06:53:19;
  binding state free;
  hardware ethernet 5a:3d:79:1b:92:15;
  uid "\001Z=y\033\222\025";
}
lease 192.168.0.191 {
  starts 6 2025/01/25 08:45:24;
  ends 6 2025/01/25 08:47:24;
  tstp 6 2025/01/25 08:47:24;
  cltt 6 2025/01/25 08:45:24;
  binding state free;
  hardware ethernet 14:13:0b:dc:21:7c;
}
lease 192.168.0.157 {
  starts 6 2025/01/25 08:45:24;
  ends 6 2025/01/25 08:55:24;
  tstp 6 2025/01/25 08:55:24;
  cltt 6 2025/01/25 08:45:24;
  binding state free;
  hardware ethernet 14:13:0b:dc:21:7c;
}
lease 192.168.0.189 {
  starts 6 2025/01/25 08:59:51;
  ends 6 2025/01/25 09:09:51;
  tstp 6 2025/01/25 09:09:51;
  cltt 6 2025/01/25 08:59:51;
  binding state free;
  hardware ethernet 44:65:0d:fd:94:42;
  uid "\001De\015\375\224B";
  set vendor-class-identifier = "udhcp 1.34.1";
}
lease 192.168.0.199 {
  starts 6 2025/01/25 09:45:31;
  ends 6 2025/01/25 09:47:31;
  tstp 6 2025/01/25 09:47:31;
  cltt 6 2025/01/25 09:45:31;
  binding state free;
  hardware ethernet 44:65:0d:fd:94:42;
  uid "\001De\015\375\224B";
  set vendor-class-identifier = "udhcp 1.34.1";
}
lease 192.168.0.167 {
  starts 6 2025/01/25 09:55:42;
  ends 6 2025/01/25 10:05:42;
  tstp 6 2025/01/25 10:05:42;
  cltt 6 2025/01/25 09:56:20;
  binding state free;
  hardware ethernet 50:2f:9b:f5:7c:60;
  uid "\001P/\233\365|`";
}
lease 192.168.0.192 {
  starts 6 2025/01/25 14:01:43;
  ends 6 2025/01/25 14:11:43;
  tstp 6 2025/01/25 14:11:43;
  cltt 6 2025/01/25 14:01:43;
  binding state free;
  hardware ethernet 38:37:8b:d3:48:ab;
  uid "\00187\213\323H\253";
  set vendor-class-identifier = "HUAWEI:android:BND-L22";
}
lease 192.168.0.195 {
  starts 6 2025/01/25 12:38:10;
  ends 6 2025/01/25 14:38:10;
  tstp 6 2025/01/25 14:38:10;
  cltt 6 2025/01/25 12:38:10;
  binding state free;
  hardware ethernet 26:93:d5:b1:6d:5f;
  uid "\001&\223\325\261m_";
}
lease 192.168.0.116 {
  starts 6 2025/01/25 15:53:44;
  ends 6 2025/01/25 16:03:44;
  tstp 6 2025/01/25 16:03:44;
  cltt 6 2025/01/25 15:53:44;
  binding state free;
  hardware ethernet bc:09:1b:dd:f1:e9;
  uid "\001\274\011\033\335\361\351";
}
lease 192.168.0.169 {
  starts 0 2025/01/26 00:58:35;
  ends 0 2025/01/26 02:58:35;
  tstp 0 2025/01/26 02:58:35;
  cltt 0 2025/01/26 00:58:35;
  binding state free;
  hardware ethernet 00:e0:4c:36:28:c0;
  uid "\001\000\340L6(\300";
}
'''
leases = re.findall(r"lease\s(?P<ip>\d+\.\d+\.\d+\.\d+)\s\{[\s\S]*?starts\s\d\s(?P<start_time>[\d\/\:\s]+);[\s\S]*?ends\s\d\s(?P<end_time>[\d\/\:\s]+);[\s\S]*?hardware\s\w+\s(?P<mac>[a-fA-F0-9:]+);(?:[\s\S]*?client-hostname\s\"(?P<hostname>.*?)\";)?", content, re.DOTALL)
print(leases)

[('192.168.0.196', '2025/01/24 05:50:12', '2025/01/24 05:52:12', '08:00:27:7f:e8:6e', 'client1'), ('192.168.0.197', '2025/01/24 06:06:21', '2025/01/24 06:08:21', 'bc:09:1b:dd:f1:e9', 'kamal-ThinkPad-P15s-Gen-2i'), ('192.168.0.158', '2025/01/24 06:29:13', '2025/01/24 06:39:13', '08:00:27:7f:e8:6e', ''), ('192.168.0.122', '2025/01/24 13:33:10', '2025/01/24 15:33:10', '9e:10:d4:30:af:89', ''), ('192.168.0.179', '2025/01/25 01:01:27', '2025/01/25 01:11:27', '5a:3d:79:1b:92:15', ''), ('192.168.0.200', '2025/01/25 02:13:39', '2025/01/25 02:15:39', '30:c9:ab:35:f0:e1', ''), ('192.168.0.173', '2025/01/25 02:14:02', '2025/01/25 02:24:02', '74:a7:ea:9c:78:fa', ''), ('192.168.0.190', '2025/01/25 06:53:19', '2025/01/25 07:03:19', '5a:3d:79:1b:92:15', ''), ('192.168.0.191', '2025/01/25 08:45:24', '2025/01/25 08:47:24', '14:13:0b:dc:21:7c', ''), ('192.168.0.157', '2025/01/25 08:45:24', '2025/01/25 08:55:24', '14:13:0b:dc:21:7c', ''), ('192.168.0.189', '2025/01/25 08:59:51', '2025/01/25 09:09:51', '4