In [58]:
import numpy as np
import pandas as pd

In [2]:
email = 'subhendu@email.com'

In [3]:
email.split('@')

['subhendu', 'email.com']

In [5]:
names  = pd.Series(['andrew','bobo','claire','david','5'])

In [6]:
names

0    andrew
1      bobo
2    claire
3     david
4         5
dtype: object

In [8]:
names.str.upper()

0    ANDREW
1      BOBO
2    CLAIRE
3     DAVID
4         5
dtype: object

In [10]:
email.isdigit()

False

In [11]:
'5'.isdigit()

True

In [12]:
names.str.isdigit()

0    False
1    False
2    False
3    False
4     True
dtype: bool

In [13]:
tech_finance = ['GOOG,APPL,AMZN','JPM,BAC,GS']

In [14]:
len(tech_finance)

2

In [16]:
tickers = pd.Series(tech_finance)

In [17]:
tickers

0    GOOG,APPL,AMZN
1        JPM,BAC,GS
dtype: object

In [18]:
tickers.str.split(',')

0    [GOOG, APPL, AMZN]
1        [JPM, BAC, GS]
dtype: object

In [21]:
tickers.str.split(',').str[0]

0    GOOG
1     JPM
dtype: object

In [19]:
tech = 'GOOG,APPL,AMZN'

In [20]:
tech.split(',')[0]

'GOOG'

In [22]:
tickers.str.split(',',expand=True)

Unnamed: 0,0,1,2
0,GOOG,APPL,AMZN
1,JPM,BAC,GS


In [38]:
messy_names = pd.Series(["andrew ","bo;bo","  clarie  "])

In [39]:
messy_names

0       andrew 
1         bo;bo
2      clarie  
dtype: object

In [40]:
messy_names.str.replace(';','')

0       andrew 
1          bobo
2      clarie  
dtype: object

In [41]:
messy_names.str.replace(';','').str.strip()

0    andrew
1      bobo
2    clarie
dtype: object

In [42]:
messy_names.str.replace(';','').str.strip()[0]

'andrew'

In [43]:
messy_names.str.replace(';','').str.strip().str.capitalize()

0    Andrew
1      Bobo
2    Clarie
dtype: object

In [44]:
def cleanup(name):
    name = name.replace(";","")
    name = name.strip()
    name = name.capitalize()
    return name

In [45]:
messy_names.apply(cleanup)

0    Andrew
1      Bobo
2    Clarie
dtype: object

In [59]:
import timeit

# code snippet to be execute only once
setup = '''
import pandas as pd
import numpy as np
messy_names = pd.Series(['andrew ','bo;bo',"  clarie  "])
def cleanup(name):
    name = name.replace(";","")
    name = name.strip()
    name = name.capitalize()
    return name
'''
# code snippet whose execution time is to be measured
stmt_pandas_str = '''
messy_names.str.replace(';','').str.strip().str.capitalize()
'''

stmt_pandas_apply = '''
messy_names.apply(cleanup)
'''

stmt_pandas_vectorize = '''
np.vectorize(cleanup)(messy_names)
'''

In [66]:
timeit.timeit(setup = setup,
             stmt = stmt_pandas_str,
             number = 10000)

1.790559300003224

In [67]:
timeit.timeit(setup = setup,
             stmt = stmt_pandas_apply,
             number = 10000)

0.6280716999899596

In [68]:
timeit.timeit(setup = setup,
             stmt = stmt_pandas_vectorize,
             number = 10000)

0.1684464999998454