In [1]:
import pandas as pd

### Categorical variables

In [2]:
# Let's use pandas to create Categorical Series. One way is by 
# specifying dtype="category" when constructing a Series:

s = pd.Series(["a","b","c","a"], dtype="category")
s

0    a
1    b
2    c
3    a
dtype: category
Categories (3, object): [a, b, c]

In [3]:
# Another way is to convert an existing Series or column to a 
# category dtype:

df = pd.DataFrame({"A":["a","b","c","a"]})
df["B"] = df["A"].astype('category')
df.dtypes

A      object
B    category
dtype: object

In [4]:
# You can also pass a pandas.Categorical object to a Series 

raw_cat = pd.Categorical(["a","b","c","a"], categories=["b","c","d"],
                          ordered=False)
raw_cat

[NaN, b, c, NaN]
Categories (3, object): [b, c, d]

In [5]:
 s = pd.Series(raw_cat)
 s

0    NaN
1      b
2      c
3    NaN
dtype: category
Categories (3, object): [b, c, d]

### Dummy variables

In [6]:
# Let's use pd.get_dummies to convert categorical variables into dummy 
# variables. First let's create a small DataFrame with categorical variables. 

df = pd.DataFrame({'key': list('bbacab'), 'data1': range(6)})


In [7]:
# Now, let's convert the categorical variables into dummy variables. 

pd.get_dummies(df['key'])


Unnamed: 0,a,b,c
0,0.0,1.0,0.0
1,0.0,1.0,0.0
2,1.0,0.0,0.0
3,0.0,0.0,1.0
4,1.0,0.0,0.0
5,0.0,1.0,0.0


In [8]:
countries = pd.DataFrame({'Countries': ['USA', 'China', 'Brazil', 'Japan', ''], 'Continent': ['NAmerica', 'Asia', 'SAmerica', 'Asia']})
countries

ValueError: arrays must all be same length

In [None]:
dummies = pd.get_dummies(countries['Continent'])
dummies

In [None]:
new = countries.join(dummies)
new

In [None]:
new.drop('Continent', axis=1)