In [1]:
import pandas as pd

In [2]:
df = pd.DataFrame({
    "name": ["shaw", "mary", "alex", "bob"],
    "edu": ["bachelor", "master", "bachelor", "phd"],
    "sex": ["m", "f", "m", "m"]
})

In [3]:
df

Unnamed: 0,edu,name,sex
0,bachelor,shaw,m
1,master,mary,f
2,bachelor,alex,m
3,phd,bob,m


In [4]:
# takes entire dataframe as input
pd.get_dummies(df)

Unnamed: 0,edu_bachelor,edu_master,edu_phd,name_alex,name_bob,name_mary,name_shaw,sex_f,sex_m
0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0
1,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
2,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
3,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0


In [5]:
# take subset of columns as input
pd.get_dummies(df[['edu', 'sex']])

Unnamed: 0,edu_bachelor,edu_master,edu_phd,sex_f,sex_m
0,1.0,0.0,0.0,0.0,1.0
1,0.0,1.0,0.0,1.0,0.0
2,1.0,0.0,0.0,0.0,1.0
3,0.0,0.0,1.0,0.0,1.0


In [6]:
# take a series as input
pd.get_dummies(df.edu)

Unnamed: 0,bachelor,master,phd
0,1.0,0.0,0.0
1,0.0,1.0,0.0
2,1.0,0.0,0.0
3,0.0,0.0,1.0


##### Utility Function

In [7]:
def encode_columns(df, cols):
    """
    @param df (DataFrame): a pandas dataframe
    @param cols (list[str]): list of columns to be encoded
    
    @returns df_ (DataFrame): with categorical columns replaced
    """
    df_ = df.drop(cols, axis=1)
    encoded = pd.get_dummies(df[cols])
    df_ = pd.concat([df_, encoded], axis=1)
    return df_

In [8]:
encode_columns(df, cols=["edu"])

Unnamed: 0,name,sex,edu_bachelor,edu_master,edu_phd
0,shaw,m,1.0,0.0,0.0
1,mary,f,0.0,1.0,0.0
2,alex,m,1.0,0.0,0.0
3,bob,m,0.0,0.0,1.0


In [9]:
# note that it does not modify the original data frame
df

Unnamed: 0,edu,name,sex
0,bachelor,shaw,m
1,master,mary,f
2,bachelor,alex,m
3,phd,bob,m


In [10]:
encode_columns(df, cols=["sex", "edu"])

Unnamed: 0,name,sex_f,sex_m,edu_bachelor,edu_master,edu_phd
0,shaw,0.0,1.0,1.0,0.0,0.0
1,mary,1.0,0.0,0.0,1.0,0.0
2,alex,0.0,1.0,1.0,0.0,0.0
3,bob,0.0,1.0,0.0,0.0,1.0


In [11]:
# quick hack: if all columns are categorical
# pass in entire data drame, all columns except index is encoded
df = df.set_index("name")
pd.get_dummies(df)

Unnamed: 0_level_0,edu_bachelor,edu_master,edu_phd,sex_f,sex_m
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
shaw,1.0,0.0,0.0,0.0,1.0
mary,0.0,1.0,0.0,1.0,0.0
alex,1.0,0.0,0.0,0.0,1.0
bob,0.0,0.0,1.0,0.0,1.0
