In [6]:
import pandas as pd
import numpy as np

pd.__version__

'0.23.1'

## Add/Drop

In [None]:
# Add column to dataframe
df['col'] = pd.Series(['A', 'B', 'C'])

# Drop columns by index
df.drop(df.columns[[1, 2]], axis=1)

# Drop columns by name
df.drop(['B', 'C'], axis=1)

## Missing Values

In [20]:
df = pd.DataFrame({'col1': ['a', np.NaN, 'c', 'd'],
                   'col2': [1, 2, np.NaN, 4]})

In [11]:
# Which values are missing?
df.isnull()

Unnamed: 0,col1,col2
0,False,False
1,True,False
2,False,True
3,False,False


In [12]:
# How many missing values in each column?
df.isnull().sum()

col1    1
col2    1
dtype: int64

In [13]:
# How many missing values in total?
df.isnull().sum().sum()

2

In [14]:
# Select rows with missing values
df[df['col1'].isnull()]

Unnamed: 0,col1,col2
1,,2.0


In [15]:
# Exclude row if value is missing
df[df['col1'].notnull()]

Unnamed: 0,col1,col2
0,a,1.0
2,c,
3,d,4.0


### Fill In Missing

In [17]:
# Fill with blank
df['col1'] = df['col1'].fillna('')
df

Unnamed: 0,col1,col2
0,a,1.0
1,,2.0
2,c,
3,d,4.0


In [19]:
# Backward fill - last non-N/A value upward
df['col1'] = df['col1'].fillna(method='bfill')  
df

Unnamed: 0,col1,col2
0,a,1.0
1,c,2.0
2,c,
3,d,4.0


In [21]:
# Forward fill - first non-N/A value downward
df['col1'] = df['col1'].fillna(method='ffill')  
df

Unnamed: 0,col1,col2
0,a,1.0
1,a,2.0
2,c,
3,d,4.0


## Numeric Data
Columns are Series, can apply math on them

In [61]:
df = pd.DataFrame({'baz': [1, 3, 5, 7],
                   'zoo': [2, 4, 6, 8]})

In [62]:
df['baz'] + 10

0    11
1    13
2    15
3    17
Name: baz, dtype: int64

In [63]:
df['zoo'] > 5

0    False
1    False
2     True
3     True
Name: zoo, dtype: bool

## String Data
Apply string functions to a Series using 'str' namespace

In [64]:
df = pd.DataFrame({'foo': ['one', 'one', 'two', 'two'],
                   'bar': ['A', 'B', 'A', 'B']})

In [65]:
# Flag elements which start with a particular substring
df['foo'].str.startswith('on')

0     True
1     True
2    False
3    False
Name: foo, dtype: bool

In [66]:
# Find elements that match regex, return NaN for non-matches
df['foo'].str.extract('(w)')

Unnamed: 0,0
0,
1,
2,w
3,w


In [67]:
# Replace values based on a condition
df.loc[df['foo']=='one', 'bar'] = 'C'
df

Unnamed: 0,foo,bar
0,one,C
1,one,C
2,two,A
3,two,B


## Date/Time Data
Apply date/time functions to a Series using 'dt' namespace

In [None]:
# Get month numbers
df['datevar'].dt.month