In [10]:
import pandas as pd
import numpy as np

# https://pandas.pydata.org/pandas-docs/stable/reference/series.html

In [2]:
s = pd.Series(['lower', 'CAPITALS', 'this is a sentence', 'SwApCaSe', 'caseßfold'])
s

0                 lower
1              CAPITALS
2    this is a sentence
3              SwApCaSe
4             caseßfold
dtype: object

In [3]:
# Convert strings in the Series/Index to lowercase.
s.str.lower()

0                 lower
1              capitals
2    this is a sentence
3              swapcase
4             caseßfold
dtype: object

In [4]:
# Convert strings in the Series/Index to uppercase.
s.str.upper()

0                 LOWER
1              CAPITALS
2    THIS IS A SENTENCE
3              SWAPCASE
4            CASESSFOLD
dtype: object

In [5]:
# Converts first character of each word to uppercase and remaining to lowercase.
s.str.title()

0                 Lower
1              Capitals
2    This Is A Sentence
3              Swapcase
4             Caseßfold
dtype: object

In [6]:
# Converts first character to uppercase and remaining to lowercase.
s.str.capitalize()

0                 Lower
1              Capitals
2    This is a sentence
3              Swapcase
4             Caseßfold
dtype: object

In [7]:
# Converts uppercase to lowercase and lowercase to uppercase.
s.str.swapcase()

0                 LOWER
1              capitals
2    THIS IS A SENTENCE
3              sWaPcAsE
4            CASESSFOLD
dtype: object

In [8]:
# Removes all case distinctions in the string.
# Return a casefolded copy of the string. Casefolded strings may be used for caseless matching.

# Casefolding is similar to lowercasing but more aggressive because it is intended to remove all case distinctions in a string. 
# For example, the German lowercase letter 'ß' is equivalent to "ss". 
# Since it is already lowercase, lower() would do nothing to 'ß'; casefold() converts it to "ss".

# The casefolding algorithm is described in section 3.13 of the Unicode Standard.
s.str.casefold()

0                 lower
1              capitals
2    this is a sentence
3              swapcase
4            casessfold
dtype: object

Series.str.cat(others=None, sep=None, na_rep=None, join='left')  
    Concatenate strings in the Series/Index with given separator.

    If others is specified, this function concatenates the Series/Index and elements of others element-wise.   
    If others is not passed, then all values in the Series/Index are concatenated into a single string with a given sep.

In [13]:
s = pd.Series(['a', 'b', np.nan, 'd'])
s

0      a
1      b
2    NaN
3      d
dtype: object

In [12]:
s.str.cat(sep=' ')

'a b d'

In [14]:
s.str.cat(sep=' ', na_rep='?')

'a b ? d'

In [15]:
s.str.cat(['A', 'B', 'C', 'D'], sep=',')

0    a,A
1    b,B
2    NaN
3    d,D
dtype: object

In [16]:
s.str.cat(['A', 'B', 'C', 'D'], sep=',', na_rep='-')

0    a,A
1    b,B
2    -,C
3    d,D
dtype: object

In [17]:
s.str.cat(['A', 'B', 'C', 'D'], na_rep='-')

0    aA
1    bB
2    -C
3    dD
dtype: object

In [20]:
t = pd.Series(['d', 'a', 'e', 'c'], index=[3, 0, 4, 2])
t

3    d
0    a
4    e
2    c
dtype: object

In [19]:
s.str.cat(t, join='left', na_rep='-')

0    aa
1    b-
2    -c
3    dd
dtype: object

In [21]:
s.str.cat(t, join='outer', na_rep='-')

0    aa
1    b-
2    -c
3    dd
4    -e
dtype: object

In [22]:
s.str.cat(t, join='right', na_rep='-')

3    dd
0    aa
4    -e
2    -c
dtype: object

In [23]:
s = pd.Series(['lower', 'CAPITALS', 'this is a sentence', 'SwApCaSe', 'caseßfold'])
s

0                 lower
1              CAPITALS
2    this is a sentence
3              SwApCaSe
4             caseßfold
dtype: object

In [26]:
# Pad left and right side of strings in the Series/Index.
s.str.center(10, '-')

0            --lower---
1            -CAPITALS-
2    this is a sentence
3            -SwApCaSe-
4            caseßfold-
dtype: object

Series.str.contains(pat, case=True, flags=0, na=None, regex=True)  
   Test if pattern or regex is contained within a string of a Series or Index.  
   Return boolean Series or Index based on whether a given pattern or regex is contained within a string of a Series or Index.

In [28]:
s1 = pd.Series(['Mouse', 'dog', 'house and parrot', '23', np.NaN])
s1

0               Mouse
1                 dog
2    house and parrot
3                  23
4                 NaN
dtype: object

In [29]:
s1.str.contains('og', regex=False)

0    False
1     True
2    False
3    False
4      NaN
dtype: object

In [30]:
ind = pd.Index(['Mouse', 'dog', 'house and parrot', '23.0', np.NaN])
ind

Index(['Mouse', 'dog', 'house and parrot', '23.0', nan], dtype='object')

In [31]:
ind.str.contains('23', regex=False)

Index([False, False, False, True, nan], dtype='object')

Series.str.count(pat, flags=0)   
Count occurrences of pattern in each string of the Series/Index.  
This function is used to count the number of times a particular regex pattern is repeated in each of the string elements of the Series.  

In [32]:
s = pd.Series(['A', 'B', 'Aaba', 'Baca', np.nan, 'CABA', 'cat'])
s

0       A
1       B
2    Aaba
3    Baca
4     NaN
5    CABA
6     cat
dtype: object

In [33]:
s.str.count('a')

0    0.0
1    0.0
2    2.0
3    2.0
4    NaN
5    0.0
6    1.0
dtype: float64

In [34]:
s = pd.Series(['$', 'B', 'Aab$', '$$ca', 'C$B$', 'cat'])
s.str.count('\\$')

0    1
1    0
2    1
3    2
4    2
5    0
dtype: int64

In [35]:
pd.Index(['A', 'A', 'Aaba', 'cat']).str.count('a')

Int64Index([0, 0, 2, 1], dtype='int64')

In [36]:
s = pd.Series(['bat', 'bear', 'caT', np.nan])
s

0     bat
1    bear
2     caT
3     NaN
dtype: object

In [37]:
s.str.endswith('t')

0     True
1    False
2    False
3      NaN
dtype: object

In [38]:
s.str.startswith('t')

0    False
1    False
2    False
3      NaN
dtype: object

In [39]:
# Same as endswith, but tests the start of string.
s.str.endswith('t', na=False)

0     True
1    False
2    False
3    False
dtype: bool

In [40]:
s.str.endswith('t', na=True)

0     True
1    False
2    False
3     True
dtype: bool

Series.str.extract(pat, flags=0, expand=True)   
    Extract capture groups in the regex pat as columns in a DataFrame.

    For each subject string in the Series, extract groups from the first match of regular expression pat.

In [41]:
s = pd.Series(['a1', 'b2', 'c3'])
s

0    a1
1    b2
2    c3
dtype: object

In [42]:
s.str.extract(r'([ab])(\d)')

Unnamed: 0,0,1
0,a,1.0
1,b,2.0
2,,


In [43]:
s.str.extract(r'([ab])?(\d)')

Unnamed: 0,0,1
0,a,1
1,b,2
2,,3


In [44]:
s.str.extract(r'(?P<letter>[ab])(?P<digit>\d)')

Unnamed: 0,letter,digit
0,a,1.0
1,b,2.0
2,,


In [45]:
s.str.extract(r'[ab](\d)', expand=True)

Unnamed: 0,0
0,1.0
1,2.0
2,


In [46]:
s.str.extract(r'[ab](\d)', expand=False)

0      1
1      2
2    NaN
dtype: object

In [47]:
s = pd.Series(["a1a2", "b1", "c1"], index=["A", "B", "C"])
s

A    a1a2
B      b1
C      c1
dtype: object

In [48]:
s.str.extractall(r"[ab](\d)")

Unnamed: 0_level_0,Unnamed: 1_level_0,0
Unnamed: 0_level_1,match,Unnamed: 2_level_1
A,0,1
A,1,2
B,0,1


In [49]:
s.str.extractall(r"[ab](?P<digit>\d)")

Unnamed: 0_level_0,Unnamed: 1_level_0,digit
Unnamed: 0_level_1,match,Unnamed: 2_level_1
A,0,1
A,1,2
B,0,1


In [50]:
s.str.extractall(r"(?P<letter>[ab])(?P<digit>\d)")

Unnamed: 0_level_0,Unnamed: 1_level_0,letter,digit
Unnamed: 0_level_1,match,Unnamed: 2_level_1,Unnamed: 3_level_1
A,0,a,1
A,1,a,2
B,0,b,1


In [51]:
s.str.extractall(r"(?P<letter>[ab])?(?P<digit>\d)")

Unnamed: 0_level_0,Unnamed: 1_level_0,letter,digit
Unnamed: 0_level_1,match,Unnamed: 2_level_1,Unnamed: 3_level_1
A,0,a,1
A,1,a,2
B,0,b,1
C,0,,1


In [52]:
s = pd.Series(["a1a2", "b1", "c1"], index=["A", "B", "C"])
s

A    a1a2
B      b1
C      c1
dtype: object

In [53]:
s.str.find('1')

A    1
B    1
C    1
dtype: int64

In [54]:
s.str.find('a')

A    0
B   -1
C   -1
dtype: int64

In [56]:
s.str.rfind('a')

A    2
B   -1
C   -1
dtype: int64

In [57]:
s = pd.Series(['Lion', 'Monkey', 'Rabbit'])
s

0      Lion
1    Monkey
2    Rabbit
dtype: object

In [58]:
s.str.findall('Monkey')

0          []
1    [Monkey]
2          []
dtype: object

In [59]:
s.str.findall('MONKEY')

0    []
1    []
2    []
dtype: object

In [60]:
import re
s.str.findall('MONKEY', flags=re.IGNORECASE)

0          []
1    [Monkey]
2          []
dtype: object

In [61]:
s.str.findall('on')

0    [on]
1    [on]
2      []
dtype: object

In [62]:
s.str.findall('on$')

0    [on]
1      []
2      []
dtype: object

In [63]:
s.str.findall('b')

0        []
1        []
2    [b, b]
dtype: object

Series.str.join(sep)  
Join lists contained as elements in the Series/Index with passed delimiter.

In [64]:
s = pd.Series(["String",
              (1, 2, 3),
              ["a", "b", "c"],
              123,
              -456,
              {1: "Hello", "2": "World"}])
s

0                        String
1                     (1, 2, 3)
2                     [a, b, c]
3                           123
4                          -456
5    {1: 'Hello', '2': 'World'}
dtype: object

In [65]:
s.str.get(1)

0        t
1        2
2        b
3      NaN
4      NaN
5    Hello
dtype: object

In [66]:
s = pd.Series([['lion', 'elephant', 'zebra'],
               [1.1, 2.2, 3.3],
               ['cat', np.nan, 'dog'],
               ['cow', 4.5, 'goat'],
               ['duck', ['swan', 'fish'], 'guppy']])
s

0        [lion, elephant, zebra]
1                [1.1, 2.2, 3.3]
2                [cat, nan, dog]
3               [cow, 4.5, goat]
4    [duck, [swan, fish], guppy]
dtype: object

In [67]:
s.str.join('-')

0    lion-elephant-zebra
1                    NaN
2                    NaN
3                    NaN
4                    NaN
dtype: object

In [68]:
s = pd.Series(['Linda van der Berg', 'George Pitt-Rivers'])
s

0    Linda van der Berg
1    George Pitt-Rivers
dtype: object

In [69]:
s.str.partition()

Unnamed: 0,0,1,2
0,Linda,,van der Berg
1,George,,Pitt-Rivers


In [70]:
s.str.rpartition()

Unnamed: 0,0,1,2
0,Linda van der,,Berg
1,George,,Pitt-Rivers


In [71]:
s.str.partition('-')

Unnamed: 0,0,1,2
0,Linda van der Berg,,
1,George Pitt,-,Rivers


In [72]:
s.str.partition('-', expand=False)

0    (Linda van der Berg, , )
1    (George Pitt, -, Rivers)
dtype: object

In [73]:
s.str.partition('-', expand=True)

Unnamed: 0,0,1,2
0,Linda van der Berg,,
1,George Pitt,-,Rivers


In [74]:
s = pd.Series(["str_foo", "str_bar", "no_prefix"])
s

0      str_foo
1      str_bar
2    no_prefix
dtype: object

In [75]:
s.str.removeprefix("str_")

0          foo
1          bar
2    no_prefix
dtype: object

In [76]:
s = pd.Series(["foo_str", "bar_str", "no_suffix"])
s

0      foo_str
1      bar_str
2    no_suffix
dtype: object

In [77]:
s.str.removesuffix("_str")

0          foo
1          bar
2    no_suffix
dtype: object

In [78]:
s = pd.Series(['a', 'b', 'c'])
s

0    a
1    b
2    c
dtype: object

In [79]:
s.str.repeat(repeats=2)

0    aa
1    bb
2    cc
dtype: object

In [80]:
s.str.repeat(repeats=[1, 2, 3])

0      a
1     bb
2    ccc
dtype: object

In [82]:
pd.Series(['foo', 'fuz', np.nan]).str.replace('f.', 'ba', regex=True)

0    bao
1    baz
2    NaN
dtype: object

In [83]:
s = pd.Series(["koala", "dog", "chameleon"])
s

0        koala
1          dog
2    chameleon
dtype: object

In [84]:
s.str.slice(start=1)

0        oala
1          og
2    hameleon
dtype: object

In [85]:
s.str.slice(start=-1)

0    a
1    g
2    n
dtype: object

In [86]:
s = pd.Series(['a', 'ab', 'abc', 'abdc', 'abcde'])
s

0        a
1       ab
2      abc
3     abdc
4    abcde
dtype: object

In [87]:
s.str.slice_replace(1, repl='X')

0    aX
1    aX
2    aX
3    aX
4    aX
dtype: object

In [88]:
s.str.slice_replace(start=1, stop=3, repl='X')

0      aX
1      aX
2      aX
3     aXc
4    aXde
dtype: object

In [89]:
s = pd.Series(['-1', '1', '1000', 10, np.nan])
s

0      -1
1       1
2    1000
3      10
4     NaN
dtype: object

In [90]:
s.str.zfill(3)

0     0-1
1     001
2    1000
3     NaN
4     NaN
dtype: object

In [91]:
s = pd.Series(['line to be wrapped', 'another line to be wrapped'])
s

0            line to be wrapped
1    another line to be wrapped
dtype: object

In [92]:
s.str.wrap(12)

0             line to be\nwrapped
1    another line\nto be\nwrapped
dtype: object

In [93]:
pd.Series(['a|b', 'a', 'a|c']).str.get_dummies()

Unnamed: 0,a,b,c
0,1,1,0
1,1,0,0
2,1,0,1


In [94]:
pd.Series(['a|b', np.nan, 'a|c']).str.get_dummies()

Unnamed: 0,a,b,c
0,1,1,0
1,0,0,0
2,1,0,1
