In [1]:
import pandas as pd

In [2]:
family = pd.DataFrame([
    ['Paul','E','Boal',193,110,'314-598-5494'],
    ['Teddy','J','Lester-Boal',101,23,'314-555-1234'],
    ['John','P','Doe',160,100,'490-982-5555']],
    columns=['First','Middle','Last','Height','Weight','Phone'])

In [3]:
family

Unnamed: 0,First,Middle,Last,Height,Weight,Phone
0,Paul,E,Boal,193,110,314-598-5494
1,Teddy,J,Lester-Boal,101,23,314-555-1234
2,John,P,Doe,160,100,490-982-5555


In [4]:
family['Full'] = family.First + ' ' + family.Middle + '. ' + family.Last

In [5]:
family

Unnamed: 0,First,Middle,Last,Height,Weight,Phone,Full
0,Paul,E,Boal,193,110,314-598-5494,Paul E. Boal
1,Teddy,J,Lester-Boal,101,23,314-555-1234,Teddy J. Lester-Boal
2,John,P,Doe,160,100,490-982-5555,John P. Doe


In [6]:
family['Name'] = family.Last + ', ' + family.First + ' ' + family.Middle + '.'

In [7]:
family

Unnamed: 0,First,Middle,Last,Height,Weight,Phone,Full,Name
0,Paul,E,Boal,193,110,314-598-5494,Paul E. Boal,"Boal, Paul E."
1,Teddy,J,Lester-Boal,101,23,314-555-1234,Teddy J. Lester-Boal,"Lester-Boal, Teddy J."
2,John,P,Doe,160,100,490-982-5555,John P. Doe,"Doe, John P."


In [8]:
family.sort_values('Name')

Unnamed: 0,First,Middle,Last,Height,Weight,Phone,Full,Name
0,Paul,E,Boal,193,110,314-598-5494,Paul E. Boal,"Boal, Paul E."
2,John,P,Doe,160,100,490-982-5555,John P. Doe,"Doe, John P."
1,Teddy,J,Lester-Boal,101,23,314-555-1234,Teddy J. Lester-Boal,"Lester-Boal, Teddy J."


# Splitting

In [9]:
family['Area Code'] = family.Phone.str.split('-')

In [10]:
family

Unnamed: 0,First,Middle,Last,Height,Weight,Phone,Full,Name,Area Code
0,Paul,E,Boal,193,110,314-598-5494,Paul E. Boal,"Boal, Paul E.","[314, 598, 5494]"
1,Teddy,J,Lester-Boal,101,23,314-555-1234,Teddy J. Lester-Boal,"Lester-Boal, Teddy J.","[314, 555, 1234]"
2,John,P,Doe,160,100,490-982-5555,John P. Doe,"Doe, John P.","[490, 982, 5555]"


In [11]:
family.Phone.str.split('-',expand=True)

Unnamed: 0,0,1,2
0,314,598,5494
1,314,555,1234
2,490,982,5555


In [12]:
family['Area'] = family.Phone.str.split('-',expand=True)[0]

In [13]:
family

Unnamed: 0,First,Middle,Last,Height,Weight,Phone,Full,Name,Area Code,Area
0,Paul,E,Boal,193,110,314-598-5494,Paul E. Boal,"Boal, Paul E.","[314, 598, 5494]",314
1,Teddy,J,Lester-Boal,101,23,314-555-1234,Teddy J. Lester-Boal,"Lester-Boal, Teddy J.","[314, 555, 1234]",314
2,John,P,Doe,160,100,490-982-5555,John P. Doe,"Doe, John P.","[490, 982, 5555]",490


# Rank / Percentile

In [14]:
family.Height.rank()

0    3.0
1    1.0
2    2.0
Name: Height, dtype: float64

In [15]:
family['Height Rank'] = family.Height.rank()

In [16]:
family

Unnamed: 0,First,Middle,Last,Height,Weight,Phone,Full,Name,Area Code,Area,Height Rank
0,Paul,E,Boal,193,110,314-598-5494,Paul E. Boal,"Boal, Paul E.","[314, 598, 5494]",314,3.0
1,Teddy,J,Lester-Boal,101,23,314-555-1234,Teddy J. Lester-Boal,"Lester-Boal, Teddy J.","[314, 555, 1234]",314,1.0
2,John,P,Doe,160,100,490-982-5555,John P. Doe,"Doe, John P.","[490, 982, 5555]",490,2.0


In [17]:
family['Height Pct'] = family.Height.rank(pct=True)

In [18]:
family

Unnamed: 0,First,Middle,Last,Height,Weight,Phone,Full,Name,Area Code,Area,Height Rank,Height Pct
0,Paul,E,Boal,193,110,314-598-5494,Paul E. Boal,"Boal, Paul E.","[314, 598, 5494]",314,3.0,1.0
1,Teddy,J,Lester-Boal,101,23,314-555-1234,Teddy J. Lester-Boal,"Lester-Boal, Teddy J.","[314, 555, 1234]",314,1.0,0.333333
2,John,P,Doe,160,100,490-982-5555,John P. Doe,"Doe, John P.","[490, 982, 5555]",490,2.0,0.666667


In [19]:
family.sort_values('Height Rank')

Unnamed: 0,First,Middle,Last,Height,Weight,Phone,Full,Name,Area Code,Area,Height Rank,Height Pct
1,Teddy,J,Lester-Boal,101,23,314-555-1234,Teddy J. Lester-Boal,"Lester-Boal, Teddy J.","[314, 555, 1234]",314,1.0,0.333333
2,John,P,Doe,160,100,490-982-5555,John P. Doe,"Doe, John P.","[490, 982, 5555]",490,2.0,0.666667
0,Paul,E,Boal,193,110,314-598-5494,Paul E. Boal,"Boal, Paul E.","[314, 598, 5494]",314,3.0,1.0


# Arbitrary Functions

In [20]:
def bmi(vals):
    """Series -> float
    
    vals is a Pandas Series with two elements, Height and Weight
    
    """
    height = vals.Height
    weight = vals.Weight
    return weight / (height/100)**2

In [21]:
family[['Height','Weight']].apply(bmi, axis=1)

0    29.530994
1    22.546809
2    39.062500
dtype: float64

In [22]:
family['BMI'] = family[['Height','Weight']].apply(bmi, axis=1)

In [23]:
family

Unnamed: 0,First,Middle,Last,Height,Weight,Phone,Full,Name,Area Code,Area,Height Rank,Height Pct,BMI
0,Paul,E,Boal,193,110,314-598-5494,Paul E. Boal,"Boal, Paul E.","[314, 598, 5494]",314,3.0,1.0,29.530994
1,Teddy,J,Lester-Boal,101,23,314-555-1234,Teddy J. Lester-Boal,"Lester-Boal, Teddy J.","[314, 555, 1234]",314,1.0,0.333333,22.546809
2,John,P,Doe,160,100,490-982-5555,John P. Doe,"Doe, John P.","[490, 982, 5555]",490,2.0,0.666667,39.0625


In [24]:
family['BMI'].rank()

0    2.0
1    1.0
2    3.0
Name: BMI, dtype: float64

In [25]:
family

Unnamed: 0,First,Middle,Last,Height,Weight,Phone,Full,Name,Area Code,Area,Height Rank,Height Pct,BMI
0,Paul,E,Boal,193,110,314-598-5494,Paul E. Boal,"Boal, Paul E.","[314, 598, 5494]",314,3.0,1.0,29.530994
1,Teddy,J,Lester-Boal,101,23,314-555-1234,Teddy J. Lester-Boal,"Lester-Boal, Teddy J.","[314, 555, 1234]",314,1.0,0.333333,22.546809
2,John,P,Doe,160,100,490-982-5555,John P. Doe,"Doe, John P.","[490, 982, 5555]",490,2.0,0.666667,39.0625


In [27]:
def bmi(height, weight):
    """float, float -> float
    """
    return weight / (height/100)**2

In [28]:
family.apply(lambda x: bmi(x['Height'], x['Weight']), axis=1)

0    29.530994
1    22.546809
2    39.062500
dtype: float64