# Text Methods

Often text data needs to be cleaned or manipulated for processing

In [1]:
import pandas as pd
import numpy as np

https://pandas.pydata.org/docs/user_guide/text.html

In [2]:
email = "xyz@email.com"

In [3]:
email.split('@')

['xyz', 'email.com']

In [4]:
email.isdigit()

False

In [5]:
"5".isdigit()

True

In [6]:
names = pd.Series(["andrew", "bob", "claire", "david", "5"])
names

0    andrew
1       bob
2    claire
3     david
4         5
dtype: object

In [7]:
names.str.upper()

0    ANDREW
1       BOB
2    CLAIRE
3     DAVID
4         5
dtype: object

In [8]:
names.str.isdigit()

0    False
1    False
2    False
3    False
4     True
dtype: bool

In [9]:
tech_finance = ["GOOG,APPL,AMZN", "JPM,BAC,GS"]

In [10]:
len(tech_finance)

2

In [11]:
tickers = pd.Series(tech_finance)
tickers

0    GOOG,APPL,AMZN
1        JPM,BAC,GS
dtype: object

In [12]:
tickers.str.split(",")

0    [GOOG, APPL, AMZN]
1        [JPM, BAC, GS]
dtype: object

In [13]:
tickers.str.split(",").str[0]

0    GOOG
1     JPM
dtype: object

In [14]:
tickers.str.split(",", expand=True)

Unnamed: 0,0,1,2
0,GOOG,APPL,AMZN
1,JPM,BAC,GS


## Stacking String Calls

In [15]:
messy_names = pd.Series(["andrew  ", "bo:bo", "  claire  "])

In [16]:
messy_names

0      andrew  
1         bo:bo
2      claire  
dtype: object

In [17]:
messy_names.str.replace(':', '')

0      andrew  
1          bobo
2      claire  
dtype: object

In [18]:
messy_names.str.replace(':', '').str.strip()

0    andrew
1      bobo
2    claire
dtype: object

In [19]:
messy_names.str.replace(':', '').str.strip().str.capitalize()

0    Andrew
1      Bobo
2    Claire
dtype: object

## Custom Calls

In [20]:
def cleanup(name):
    name = name.replace(":", "")
    name = name.strip()
    name = name.capitalize()
    
    return name

In [21]:
messy_names.apply(cleanup)

0    Andrew
1      Bobo
2    Claire
dtype: object