In [1]:
import pandas as pd
import numpy as np

In [3]:
email = "vazgenosipov@gmail.com"

In [4]:
email.split('@')

['vazgenosipov', 'gmail.com']

In [5]:
name = pd.Series(["Andrew", "Bobo", "Vazgen", "Vartan", "6"])

In [6]:
name

0    Andrew
1      Bobo
2    Vazgen
3    Vartan
4         6
dtype: object

# STRING METHODS

In [8]:
name.str.upper()

# specify the method call on str libriarry
# search for the necessary method in the list

0    ANDREW
1      BOBO
2    VAZGEN
3    VARTAN
4         6
dtype: object

In [9]:
name.str.isdigit()

0    False
1    False
2    False
3    False
4     True
dtype: bool

In [10]:
# expand a series on a data frame

tech_finance = ["Goog,Appl,Amzn","JPM,BAC,GS"]

In [11]:
tickers = pd.Series(tech_finance)

In [12]:
tickers

0    Goog,Appl,Amzn
1        JPM,BAC,GS
dtype: object

In [13]:
# split on commas

tickers.str.split (",")

0    [Goog, Appl, Amzn]
1        [JPM, BAC, GS]
dtype: object

In [16]:
tickers.str.split (",").str[0]

'JPM'

In [18]:
tickers.str.split (",").str[0][1]

'JPM'

In [19]:
### Make a dataframe

tickers.str.split (",", expand=True)

Unnamed: 0,0,1,2
0,Goog,Appl,Amzn
1,JPM,BAC,GS


In [20]:
### Staking string calls

# lets create a list with typos

messy_names = pd.Series(["andrew  ","var;tan", "  vazgen  "])

In [21]:
messy_names

0      andrew  
1       var;tan
2      vazgen  
dtype: object

In [22]:
messy_names[0]

'andrew  '

In [23]:
### Clean up the data

# 1 step 
# replace the sign 

messy_names.str.replace(";","")

# replaces everything mentioned in first string 
# with the symbol in second string

0      andrew  
1        vartan
2      vazgen  
dtype: object

In [24]:
# 2 step 
# clean the white spaces

messy_names.str.replace(";","").str.strip()

0    andrew
1    vartan
2    vazgen
dtype: object

In [25]:
# 3 step 
# capitalize the first letter

messy_names.str.replace(";","").str.strip().str.capitalize()

0    Andrew
1    Vartan
2    Vazgen
dtype: object

In [None]:
# basically every string method and call 
# is avaliable to use on a series 
# use syntax Series.str.method()


In [28]:
# as a side note we are able to call apply function call

def cleanup(name):
    name = name.replace(";","")
    name = name.strip()
    name = name.capitalize()
    return name

In [36]:
# use vectorize

# 1 create a series
# 2 vectorize the function on array inside

pd.Series(np.vectorize(cleanup)(messy_names))

0    Andrew
1    Vartan
2    Vazgen
dtype: object

In [30]:
# or use apply function

messy_names.apply(cleanup)

0    Andrew
1    Vartan
2    Vazgen
dtype: object