In [1]:
import pandas as pd
from pandas import Series, DataFrame

In [2]:
s = Series('this is a bunch of words for my video all about using regular expressions to extract text and find patterns in Pandas'.split())
s

0            this
1              is
2               a
3           bunch
4              of
5           words
6             for
7              my
8           video
9             all
10          about
11          using
12        regular
13    expressions
14             to
15        extract
16           text
17            and
18           find
19       patterns
20             in
21         Pandas
dtype: object

In [4]:
# let's find all of the words containing the letter "i"

s.loc[s.str.contains('i')]

0            this
1              is
8           video
11          using
13    expressions
18           find
20             in
dtype: object

In [5]:
# let's find all of the words containing the letter "e"
s.loc[s.str.contains('e')]

8           video
12        regular
13    expressions
15        extract
16           text
19       patterns
dtype: object

In [6]:
# let's find all of the words containing either "e" or "i"

# option 1: Use | to combine our two boolean series, for a new one that we apply as a mask index
s.loc[s.str.contains('e') | s.str.contains('i')]

0            this
1              is
8           video
11          using
12        regular
13    expressions
15        extract
16           text
18           find
19       patterns
20             in
dtype: object

In [7]:
# option 2: Use a regular expression!
# create a character class, meaning a small set of characters that any one of which can/should match our string

s.loc[s.str.contains('[ei]')]    # [ei] means: one of these characters must match


0            this
1              is
8           video
11          using
12        regular
13    expressions
15        extract
16           text
18           find
19       patterns
20             in
dtype: object

In [8]:
# let's be explicit about using regular expressions -- in the future, the default might switch to False


s.loc[s.str.contains('[ei]', regex=True)]    # clearly state/show we're using regular expressions

0            this
1              is
8           video
11          using
12        regular
13    expressions
15        extract
16           text
18           find
19       patterns
20             in
dtype: object

In [10]:
s.loc[s.str.contains('^...[aeiou]')]  # anchor to the start, and then find the fourth character is a vowel

8       video
10      about
12    regular
dtype: object

In [11]:
s

0            this
1              is
2               a
3           bunch
4              of
5           words
6             for
7              my
8           video
9             all
10          about
11          using
12        regular
13    expressions
14             to
15        extract
16           text
17            and
18           find
19       patterns
20             in
21         Pandas
dtype: object

In [12]:
s.replace('is', 'IS')   # change lowercase "is" to all-caps "IS"

0            this
1              IS
2               a
3           bunch
4              of
5           words
6             for
7              my
8           video
9             all
10          about
11          using
12        regular
13    expressions
14             to
15        extract
16           text
17            and
18           find
19       patterns
20             in
21         Pandas
dtype: object

In [13]:
#  what if I want to do this to any word containing the two characters "is"?

s.replace('.*is.*', 'IS', regex=True)   

0              IS
1              IS
2               a
3           bunch
4              of
5           words
6             for
7              my
8           video
9             all
10          about
11          using
12        regular
13    expressions
14             to
15        extract
16           text
17            and
18           find
19       patterns
20             in
21         Pandas
dtype: object