In [1]:
import pandas as pd

### Categorical variables

In [2]:
# Let's use pandas to create Categorical Series. One way is by 
# specifying dtype="category" when constructing a Series:

s = pd.Series(["a","b","c","a"], dtype="category")
s

0    a
1    b
2    c
3    a
dtype: category
Categories (3, object): [a, b, c]

In [4]:
# Another way is to convert an existing Series or column to a 
# category dtype:
# A column 'a' 'a' not associated
# B column  'a', 'a' in same category
df = pd.DataFrame({"A":["a","b","c","a"]})
df["B"] = df["A"].astype('category')
df.dtypes

A      object
B    category
dtype: object

In [8]:
# You can also pass a pandas.Categorical object to a Series 

raw_cat = pd.Categorical(["a","b","c","a"], categories=["b","c","d"],
                          ordered=False)# ordered is useful for sorting if there is a heirarchy

In [9]:
 s = pd.Series(raw_cat)
 s

0    NaN
1      b
2      c
3    NaN
dtype: category
Categories (3, object): [b, c, d]

### Dummy variables

In [12]:
# Let's use pd.get_dummies to convert categorical variables into dummy 
# variables. First let's create a small DataFrame with categorical variables. 

df = pd.DataFrame({'key': list('bbacab'), 'data1': range(6)})#data printed first besause its alphabetical
df

Unnamed: 0,data1,key
0,0,b
1,1,b
2,2,a
3,3,c
4,4,a
5,5,b


In [15]:
# Now, let's convert the categorical variables into dummy variables. 

pd.get_dummies(df['key'])

df.dtypes

data1     int32
key      object
dtype: object