In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('data/sample_df.csv')
df.head()

Unnamed: 0,Name,Birth,email
0,Captain,2019-01-01 9:10,happy@gmail.com
1,Hulk,2019-01-08 9:20,1004@NAVER.COM
2,Iron,2019-02-01 10:20,Iron at yahoo.co.kr
3,Widow,2019-02-02 11:40,\tWidow@gmail.com
4,Thor,2019-02-28 15:10,thor@daum.net\t


In [3]:
email = df['email']
email

0         happy@gmail.com 
1           1004@NAVER.COM
2     Iron at yahoo.co.kr 
3        \tWidow@gmail.com
4          thor@daum.net\t
5       HAWK@kookmin.ac.kr
6         loki88@gmail.com
7          fury@NAVER.COM 
8          FALCON@daum.net
9     strange at gmail.com
Name: email, dtype: object

In [4]:
# Strip, LStrip, RStrip
email = email.str.strip()
email
# email.str.lstrip()
# email.str.rstrip()

0         happy@gmail.com
1          1004@NAVER.COM
2     Iron at yahoo.co.kr
3         Widow@gmail.com
4           thor@daum.net
5      HAWK@kookmin.ac.kr
6        loki88@gmail.com
7          fury@NAVER.COM
8         FALCON@daum.net
9    strange at gmail.com
Name: email, dtype: object

In [5]:
email == 'happy@gmail.com'

0     True
1    False
2    False
3    False
4    False
5    False
6    False
7    False
8    False
9    False
Name: email, dtype: bool

In [6]:
# Padding (fix the string length all across the string values)
email.str.pad(width=20, fillchar='#')
    # string less then 20 length will have # in front of itselt,
    # string more than 20 length will keep itself even thout it has more than 20 length!

0    #####happy@gmail.com
1    ######1004@NAVER.COM
2    #Iron at yahoo.co.kr
3    #####Widow@gmail.com
4    #######thor@daum.net
5    ##HAWK@kookmin.ac.kr
6    ####loki88@gmail.com
7    ######fury@NAVER.COM
8    #####FALCON@daum.net
9    strange at gmail.com
Name: email, dtype: object

In [7]:
# When you wanna fill them on their right hand side, then try this!
email.str.pad(width=20, fillchar='#', side='right')

0    happy@gmail.com#####
1    1004@NAVER.COM######
2    Iron at yahoo.co.kr#
3    Widow@gmail.com#####
4    thor@daum.net#######
5    HAWK@kookmin.ac.kr##
6    loki88@gmail.com####
7    fury@NAVER.COM######
8    FALCON@daum.net#####
9    strange at gmail.com
Name: email, dtype: object

In [8]:
# when you wanna fill them on both of their right and left hand sides, then try this!
email.str.pad(width=20, fillchar='#', side='both') # basically, the right hand side has the priority!

0    ##happy@gmail.com###
1    ###1004@NAVER.COM###
2    Iron at yahoo.co.kr#
3    ##Widow@gmail.com###
4    ###thor@daum.net####
5    #HAWK@kookmin.ac.kr#
6    ##loki88@gmail.com##
7    ###fury@NAVER.COM###
8    ##FALCON@daum.net###
9    strange at gmail.com
Name: email, dtype: object

# Replace

In [9]:
email = email.str.replace(' at ', '@')
email

0       happy@gmail.com
1        1004@NAVER.COM
2      Iron@yahoo.co.kr
3       Widow@gmail.com
4         thor@daum.net
5    HAWK@kookmin.ac.kr
6      loki88@gmail.com
7        fury@NAVER.COM
8       FALCON@daum.net
9     strange@gmail.com
Name: email, dtype: object

Split

In [10]:
email.str.split('@')

0       [happy, gmail.com]
1        [1004, NAVER.COM]
2      [Iron, yahoo.co.kr]
3       [Widow, gmail.com]
4         [thor, daum.net]
5    [HAWK, kookmin.ac.kr]
6      [loki88, gmail.com]
7        [fury, NAVER.COM]
8       [FALCON, daum.net]
9     [strange, gmail.com]
Name: email, dtype: object

In [11]:
email.str.split('@', expand=True) # 'expand=True' will expand new columns in the existing DataFrame! Useful!

Unnamed: 0,0,1
0,happy,gmail.com
1,1004,NAVER.COM
2,Iron,yahoo.co.kr
3,Widow,gmail.com
4,thor,daum.net
5,HAWK,kookmin.ac.kr
6,loki88,gmail.com
7,fury,NAVER.COM
8,FALCON,daum.net
9,strange,gmail.com


In [12]:
# When the number of splitted items is different from each row, it can return 'None' when it's lack of the length!
email.str.split('.', expand=True)

Unnamed: 0,0,1,2
0,happy@gmail,com,
1,1004@NAVER,COM,
2,Iron@yahoo,co,kr
3,Widow@gmail,com,
4,thor@daum,net,
5,HAWK@kookmin,ac,kr
6,loki88@gmail,com,
7,fury@NAVER,COM,
8,FALCON@daum,net,
9,strange@gmail,com,


In [13]:
# When you want to fix the max number of splitted items, try this!
email.str.split('.', expand=True, n=1)

Unnamed: 0,0,1
0,happy@gmail,com
1,1004@NAVER,COM
2,Iron@yahoo,co.kr
3,Widow@gmail,com
4,thor@daum,net
5,HAWK@kookmin,ac.kr
6,loki88@gmail,com
7,fury@NAVER,COM
8,FALCON@daum,net
9,strange@gmail,com


In [14]:
# When you wanna split the data based on the last separator, try this!
email.str.rsplit('.', expand=True, n=1)

Unnamed: 0,0,1
0,happy@gmail,com
1,1004@NAVER,COM
2,Iron@yahoo.co,kr
3,Widow@gmail,com
4,thor@daum,net
5,HAWK@kookmin.ac,kr
6,loki88@gmail,com
7,fury@NAVER,COM
8,FALCON@daum,net
9,strange@gmail,com


Length of String

In [15]:
email.str.len()

0    15
1    14
2    16
3    15
4    13
5    18
6    16
7    14
8    15
9    17
Name: email, dtype: int64

Count

In [16]:
# You wanna know how many 'a' in each string? then try this!
email.str.count('a')

0    2
1    0
2    1
3    1
4    1
5    1
6    1
7    0
8    1
9    2
Name: email, dtype: int64

Find the iLoc of some letters!

In [17]:
email.str.find('@')

0    5
1    4
2    4
3    5
4    4
5    4
6    6
7    4
8    6
9    7
Name: email, dtype: int64

In [18]:
# When you wanna find the iLoc in REVERSE mode, then try this!
email.str.rfind('.')

0    11
1    10
2    13
3    11
4     9
5    15
6    12
7    10
8    11
9    13
Name: email, dtype: int64

Indexing and Slicing of Strings!

In [19]:
# get the first letter!
email.str.get(0)
# email.str[0]

0    h
1    1
2    I
3    W
4    t
5    H
6    l
7    f
8    F
9    s
Name: email, dtype: object

In [21]:
# slice
email.str.slice(1, 4, 1)
# email.str[1:4:1]

0    app
1    004
2    ron
3    ido
4    hor
5    AWK
6    oki
7    ury
8    ALC
9    tra
Name: email, dtype: object