## pandasで使えるテキストデータのデータ型
- <m-b>object</m-b>: numpy.ndarrayのnumpy.object_型
- <m-b>string</m-b>: pandas.StringDtype拡張型

In [1]:
# string型の指定
import pandas as pd

obj_ser = pd.Series(["spam", "ham"])
obj_ser

0    spam
1     ham
dtype: object

In [2]:
str_ser = pd.Series(["spam", "ham"], dtype="string")
str_ser

0    spam
1     ham
dtype: string

In [4]:
str_ser = pd.Series(["spam", "ham"], dtype=pd.StringDtype())
str_ser

0    spam
1     ham
dtype: string

In [5]:
obj_ser.astype("string")

0    spam
1     ham
dtype: string

In [7]:
# .strアクセサ
s = pd.Series(
    ["spam", "ham", None, "egg"],
    dtype=pd.StringDtype(),
)
s.str

<pandas.core.strings.accessor.StringMethods at 0x27937de36e0>

In [8]:
s.str[0]

0       s
1       h
2    <NA>
3       e
dtype: string

In [9]:
s.str[1:]

0     pam
1      am
2    <NA>
3      gg
dtype: string

In [10]:
s.str.upper()

0    SPAM
1     HAM
2    <NA>
3     EGG
dtype: string

In [11]:
s.str.replace(r"g+", "G", regex=True)

0    spam
1     ham
2    <NA>
3      eG
dtype: string

In [12]:
s.str.replace(r"g+", "G", regex=False)

0    spam
1     ham
2    <NA>
3     egg
dtype: string

In [13]:
s.str.cat()

'spamhamegg'

In [14]:
s.str.cat(sep="+", na_rep="***")

'spam+ham+***+egg'

In [17]:
s.str.cat(
    pd.Series(["hoge", "fuga"]),
    sep=",",
    join="left",
)

0    spam,hoge
1     ham,fuga
2         <NA>
3         <NA>
dtype: string

In [18]:
s.str.cat(
    pd.Series(["hoge", "fuga"]),
    sep=",",
    join="right",
)

0    spam,hoge
1     ham,fuga
dtype: string

In [19]:
s.str.cat(
    pd.Series(["hoge", "fuga"]),
    sep=",",
    join="inner",
)

0    spam,hoge
1     ham,fuga
dtype: string

In [20]:
pd.Series(["a,A", "b,B", "c.C"]).str.split(",")

0    [a, A]
1    [b, B]
2     [c.C]
dtype: object

In [21]:
pd.Series(["a,A", "b,B", "c.C"]).str.split(",", expand=True)

Unnamed: 0,0,1
0,a,A
1,b,B
2,c.C,


In [27]:
pd.Series(["a,A", "b,B", "c.C"]).str.split(r",|\.", expand=True, regex=True)

Unnamed: 0,0,1
0,a,A
1,b,B
2,c,C
