# REGULAR EXPRESSIONS (REGEX)

In [1]:
using System.Text.RegularExpressions;

## `IsMatch()` --> bool 

*kinda similar to Python's `re.search()`*

In [5]:
string sentence = "this kitten has been convicted of 7 counts of tax fraud";

Regex pattern = new Regex("[0-9]+");

if (pattern.IsMatch(sentence))
{
    Console.WriteLine("Sentence contains numbers");
}
else 
{
    Console.WriteLine("Sentence does NOT contain numbers");
}

Sentence contains numbers


## `Match()`

*kinda similar to Python's `re.find()`*

In [25]:
string sentence2 = "breaking news, water has been found in the ocean, more at 11";

Regex pattern = new Regex(@"[,]+");
Match match = pattern.Match(sentence2);

if (match.Success)
    Console.WriteLine("First comma found at index " + match.Index + ". Length: " + match.Length);

First comma found at index 13. Length: 1


## Capture Groups (Multiple)

In [26]:
string sentence3 = "**********Squiward Tentacles, 315 years old***********";

Regex pattern = new Regex(@"([^*,]+),\s([0-9]+)");
Match match = pattern.Match(sentence3);

if (match.Success)
    Console.WriteLine("Name: " + match.Groups[1].Value + "\nAge: " + match.Groups[2].Value);

Name: Squiward Tentacles
Age: 315


### Named Capture Groups

syntax: `?<NAME>` right after left parenthesis

In [28]:
string sentence4 = "**********Squiward Tentacles, 315 years old***********";

Regex pattern = new Regex(@"(?<name>[^*,]+),\s(?<age>[0-9]+)");
Match match = pattern.Match(sentence4);

if (match.Success)
    Console.WriteLine("Name: " + match.Groups["name"].Value + "\nAge: " + match.Groups["age"].Value);

Name: Squiward Tentacles
Age: 315


## `Matches()` --> `MatchCollection` Class

*kinda similar to Python's `re.findall()`*

In [7]:
string stupidexample = "how much could a wood chuck wood if a wood chuck could chuck wood";

Regex pattern = new Regex(@"\swood");
MatchCollection matches = pattern.Matches(stupidexample);

foreach (Match match in matches)
    Console.WriteLine("the word 'wood' found at index " + match.Index + ": " + match.Value);

the word 'wood' found at index 16:  wood
the word 'wood' found at index 27:  wood
the word 'wood' found at index 37:  wood
the word 'wood' found at index 60:  wood


In [15]:
string emails = "cool emails: banana_apple@banana.com, word.some-thing@words.org, hfyurgyad2@ncfhdgdehjk.com \n \n bleh123@bleh.gov \n iamNOTanemail(AT).com";

Regex pattern = new Regex(@"[A-Za-z0-9_\-.]+[@][A-Za-z0-9.]+");
MatchCollection matches = pattern.Matches(emails);

foreach (Match match in matches)
    Console.WriteLine("an email was found at index " + match.Index + ": " + match.Value);

an email was found at index 13: banana_apple@banana.com
an email was found at index 38: word.some-thing@words.org
an email was found at index 65: hfyurgyad2@ncfhdgdehjk.com
an email was found at index 96: bleh123@bleh.gov


In [17]:
string favorite_social_security_numbers = "favorite social security #'s: 123-456-7890, 555-555-5555, 555-55-5555555, 098-765-4321, abc-xyz-lmno, 111-222-3333, 1-2-3-4-5-6-7-8-9-0";
Regex regex = new Regex(@"([0-9]{3}[\-][0-9]{3}[\-][0-9]{4})");
MatchCollection matchCollection = regex.Matches(favorite_social_security_numbers);
foreach (Match match in matchCollection)
    Console.WriteLine("stolen social security found at index " + match.Index + ": " + match.Value);

stolen social security found at index 30: 123-456-7890
stolen social security found at index 44: 555-555-5555
stolen social security found at index 74: 098-765-4321
stolen social security found at index 102: 111-222-3333
