In [1]:
import re

| **Special Character** | **Functionality**                                             |
|-----------------------|--------------------------------------------------------------|
| `.`                   | Matches any character except a newline                       |
| `\d`                  | Matches any digit (equivalent to `[0-9]`)                   |
| `\D`                  | Matches any non-digit character                               |
| `\w`                  | Matches any word character (equivalent to `[a-zA-Z0-9_]`)   |
| `\W`                  | Matches any non-word character                                |
| `\s`                  | Matches any whitespace character (spaces, tabs, newlines)    |
| `\S`                  | Matches any non-whitespace character                          |
| `\b`                  | Matches any word which has a word-boundary                         |
| `\B`                  | Matches any non word boundary                         |
| `^`                   | Matches the start of a string                                 |
| `$`                   | Matches the end of a string                                   |
| `*`                   | Matches 0 or more repetitions of the preceding element        |
| `+`                   | Matches 1 or more repetitions of the preceding element        |
| `?`                   | Matches 0 or 1 repetition of the preceding element            |
| `{n}`                 | Matches exactly `n` repetitions of the preceding element      |
| `{n,}`                | Matches `n` or more repetitions of the preceding element      |
| `{n,m}`               | Matches between `n` and `m` repetitions of the preceding element |
| `(...)`               | Captures the matched text for later use                      |
| `(?:...)`             | Non-capturing group                                          |
| `(?P<name>...)`      | Named capturing group                                        |
| `(?=...)`             | Positive lookahead assertion                                   |
| `(?!...)`             | Negative lookahead assertion                                   |
| `(?<=...)`            | Positive lookbehind assertion                                  |
| `(?<!...)`            | Negative lookbehind assertion                                  |
| `\`                   | Escapes the next character, allowing literal matching         |
|  `\|`                  | Acts as a logical OR operator                               |
| `[...]`               | Matches any single character within the brackets              |
| `[^...]`              | Matches any single character not within the brackets          |


In [34]:
data = """ 
Name: John Doe
Email: john.doe@example.com
Phone: (555) 123--4567
Date of Birth: 1990-05-21
Address: 123 Main St, Anytown, USA
Website: https://www.johndoe.com
Skills: Python, Java, SQL, HTML
Last Login: 2024-09-20 14:35:00
"""


Practice Questions
1. `Extract Names:` ->  Write a regex to extract the name "John Doe".
2. `Find Emails:` ->  Write a regex to find the email address "john.doe@example.com".
3. `Extract Phone Numbers:` ->  Write a regex to match the phone number "(555) 123-4567".
4. `Extract Date of Birth:` ->  Write a regex to find the date of birth in the format YYYY-MM-DD.
5. `Match Addresses:` ->  Write a regex to extract the address "123 Main St, Anytown, USA".
6. `Extract Websites:` -> Write a regex to find the website URL "https://www.johndoe.com".
7. `List Skills:` ->  Write a regex to extract the skills listed (e.g., "Python, Java, SQL, HTML").
8. `Find Last Login Date:` ->  Write a regex to extract the last login timestamp "2024-09-20 14:35:00".
9. `Validate Phone Numbers:` ->  Write a regex to validate if a string is a properly formatted phone number.  
10. `Check Date Format:` ->  Write a regex to check if a string matches the YYYY-MM-DD date format.

In [3]:
# Phone number
pattern = re.compile(r'.\d{3,}.\s\d+[-]+\d{4}')

matches = pattern.finditer(data)

for match in matches:
    print(match)

<re.Match object; span=(52, 67), match='(555) 123--4567'>


In [43]:
emails = """
CoreyMSchafer@gmail.com
Corey.schafer@university.edu
Corey-321-schafer@my-work.net
"""

In [52]:
#Emails
pattern  =re.compile(r'[\w\.-]+@[\w-]+\.\w+')

matches = pattern.finditer(emails)

for match in matches:
    print(match)

emails[25:53]

<re.Match object; span=(1, 24), match='CoreyMSchafer@gmail.com'>
<re.Match object; span=(25, 53), match='Corey.schafer@university.edu'>
<re.Match object; span=(54, 83), match='Corey-321-schafer@my-work.net'>


'Corey.schafer@university.edu'

In [7]:
#date of birth

pattern = re.compile(r'\d{4}-\d{2}-\d{2}')

matches = pattern.finditer(data)

for match in matches:
    print(match)

<re.Match object; span=(82, 93), match=' 1990-05-21'>
<re.Match object; span=(205, 216), match=' 2024-09-20'>


In [10]:
#extract websites
pattern = re.compile(r'https?://\w*.\w+.com')

matches = pattern.finditer(data)

for match in matches:
    print(match)

<re.Match object; span=(138, 161), match='https://www.johndoe.com'>


In [40]:
#skills
data = '(e.g., "Python, Java, SQL, HTML")'
pattern = re.compile(r'(\w+,)\s(\w+,)\s(\w+,)\s(\w+)')

matches = pattern.finditer(data)

for match in matches:
    print(match.group(0))
    print(match.group(1))
    print(match.group(2))
    print(match.group(3))
    print(match.group(4))

Python, Java, SQL, HTML
Python,
Java,
SQL,
HTML


In [41]:
pattern = re.compile(r'\"\w+,\s\w+,\s\w+,\s\w+\"')

matches = pattern.finditer(data)

for match in matches:
    print(match)

<re.Match object; span=(7, 32), match='"Python, Java, SQL, HTML"'>


In [30]:
data = "2 hours 38 mins"
pattern = re.compile(r'\d+')
matches = pattern.finditer(data)

for match in matches:
    print(match)

<re.Match object; span=(0, 1), match='2'>
<re.Match object; span=(8, 10), match='38'>


In [11]:
with open("C:/Users\Acer/Downloads/fileInput (1).txt", 'r') as f:
    
    Content = f.read()
    
    print(Content, end="")

Hello 
my name is mohit kumar
I am creating this file in order to read it in python
and implement some python regular expression method on it
phone: 790-192-8521
Address: 118, New Kartar Nagar, Model House, Jalandhar
Age: 23
Qualification: B.tech

In [66]:
memory = ['256GB SSD + 1TB HDD','256GB SSD','512GB SSD +  512GB SSD','1.0TB HDD','64GB Flash Storage +  1TB HDD']

In [59]:
pattern = re.compile(r'(\d+)GB|(\d+)TB|(\d+.\d*)TB')

for mem in memory:
    matches = pattern.finditer(mem)
    
    storage = 0
    for match in matches:
        
        if(match.group(1) != None):
            storage += int(match.group(1))
            
        if(match.group(2) != None):
            storage += int(match.group(2))*1024
            
        if(match.group(3) != None):
            storage += float(match.group(3))*1024

    storage
        

256GB SDD + 1TB HDD  ->  1280
256GB SDD  ->  256
512GB SSD +  512GB SSD  ->  1024
1.0TB HDD  ->  1024
64GB Flash Storage +  1TB HDD  ->  1088


In [67]:
def getSDDStorage(memory):
    pattern = re.compile(r'(\d+)GB\s*SSD')
    
    matches = pattern.finditer(memory)
    
    storage = 0
    
    for match in matches:
        
        if(match.group(1) != None):
            storage += int(match.group(1))
    
    return storage
    
for mem in memory:
    print(getSDDStorage(mem))

256
256
1024
0
0


In [None]:
def getSDDStorage(memory):
    pattern = re.compile(r'(\d+)GB\s*SSD')
    
    matches = pattern.finditer(memory)
    
    storage = 0
    
    for match in matches:
        
        if(match.group(1) != None):
            storage += int(match.group(1))
    
    return storage

In [12]:
topic = "['Two Pointers', 'String', '1+']"

pattern = re.findall('\d\+',topic)

if(pattern):
    print(pattern[0][0])

1


In [17]:
import re

name = "Keane, Miss. Nora A"                         

pattern = '(\w+)\.'

print(re.findall(pattern, name)[0])

Miss


In [7]:
class MyClass:
    val = 10
    @staticmethod
    def static_method():
        MyClass.val += 1
        return f"I don’t need an instance! {MyClass.val}"
    @classmethod
    def class_method(cls):
        MyClass.val += 1
        return f"I work with {cls} {MyClass.val}!"
print(MyClass.static_method())
print(MyClass.class_method())





I don’t need an instance! 11
I work with <class '__main__.MyClass'> 12!
