In [1]:
import numpy as np
import pandas as pd

In [3]:
s = pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e'])
s

a    0.176339
b   -0.204168
c   -1.139687
d    1.389806
e   -1.884472
dtype: float64

In [4]:
s.index

Index([u'a', u'b', u'c', u'd', u'e'], dtype='object')

In [5]:
d = {'a': 0., 'b':1., 'c':2.}
d

{'a': 0.0, 'b': 1.0, 'c': 2.0}

In [6]:
pd.Series(d)

a    0.0
b    1.0
c    2.0
dtype: float64

In [7]:
pd.Series(d, index=['A', 'B', 'C'])

A   NaN
B   NaN
C   NaN
dtype: float64

In [8]:
pd.Series(5., index=['a', 'b', 'c', 'd', 'e'])

a    5.0
b    5.0
c    5.0
d    5.0
e    5.0
dtype: float64

In [9]:
# series is nd-array like

In [10]:
s

a    0.176339
b   -0.204168
c   -1.139687
d    1.389806
e   -1.884472
dtype: float64

In [11]:
s[0]

0.17633905861794777

In [12]:
s.median()

-0.20416766713841247

In [13]:
s[s > s.median()]

a    0.176339
d    1.389806
dtype: float64

In [14]:
s[[4, 3, 1]]

e   -1.884472
d    1.389806
b   -0.204168
dtype: float64

In [15]:
np.exp(s)

a    1.192842
b    0.815326
c    0.319919
d    4.014069
e    0.151909
dtype: float64

In [16]:
# series is dict-like

In [17]:
s['a']

0.17633905861794777

In [18]:
s['e'] = 12
s

a     0.176339
b    -0.204168
c    -1.139687
d     1.389806
e    12.000000
dtype: float64

In [19]:
'e' in s

True

In [20]:
'f' in s

False

In [23]:
# If a label is not contained, an exception is raised:
# s['f']

# Using the get method, a missing label will return None or specified default:
s.get('f')

s.get('f', np.NaN)

nan

In [24]:
# Vectorized operations and label alignment with Series

In [25]:
s

a     0.176339
b    -0.204168
c    -1.139687
d     1.389806
e    12.000000
dtype: float64

In [26]:
s * 2

a     0.352678
b    -0.408335
c    -2.279375
d     2.779611
e    24.000000
dtype: float64

In [27]:
np.exp(s)

a         1.192842
b         0.815326
c         0.319919
d         4.014069
e    162754.791419
dtype: float64

In [29]:
s[1:] + s[:-1]

a         NaN
b   -0.408335
c   -2.279375
d    2.779611
e         NaN
dtype: float64

In [30]:
s

a     0.176339
b    -0.204168
c    -1.139687
d     1.389806
e    12.000000
dtype: float64

In [31]:
s = pd.Series(np.random.randn(5), name='something')
s

0   -1.683062
1   -0.433189
2    1.295104
3   -0.334588
4   -0.319575
Name: something, dtype: float64

In [32]:
s.name

'something'

In [33]:
s2 = s.rename('different')
s2.name

'different'

In [34]:
# dataframe
d = {'one' : pd.Series([1., 2., 3.], index=['a', 'b', 'c']), 'two' : pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])}
d

{'one': a    1.0
 b    2.0
 c    3.0
 dtype: float64, 'two': a    1.0
 b    2.0
 c    3.0
 d    4.0
 dtype: float64}

In [35]:
df = pd.DataFrame(d)
df

Unnamed: 0,one,two
a,1.0,1.0
b,2.0,2.0
c,3.0,3.0
d,,4.0


In [36]:
df = pd.DataFrame(d, index=['d', 'b', 'a'])
df

Unnamed: 0,one,two
d,,4.0
b,2.0,2.0
a,1.0,1.0


In [37]:
df = pd.DataFrame(d, index=['d', 'b', 'a'], columns=['two', 'three'])
df

Unnamed: 0,two,three
d,4.0,
b,2.0,
a,1.0,


In [38]:
df.index

Index([u'd', u'b', u'a'], dtype='object')

In [39]:
df.columns

Index([u'two', u'three'], dtype='object')

In [40]:
data2 = [{'a': 1, 'b': 2}, {'a': 5, 'b': 10, 'c': 20}]
data2

[{'a': 1, 'b': 2}, {'a': 5, 'b': 10, 'c': 20}]

In [42]:
pd.DataFrame(data2, index=['first', 'second'])

Unnamed: 0,a,b,c
first,1,2,
second,5,10,20.0


In [43]:
pd.DataFrame.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])])

Unnamed: 0,A,B
0,1,4
1,2,5
2,3,6


In [3]:
df = pd.DataFrame(np.random.randn(10, 4), columns=['A', 'B', 'C', 'D'])

In [4]:
df2 = pd.DataFrame(np.random.randn(7, 3), columns=['A', 'B', 'C'])

In [5]:
df + df2

Unnamed: 0,A,B,C,D
0,1.762158,-1.35644,-0.971267,
1,-0.643382,1.978687,1.234418,
2,0.004554,1.089468,-1.139394,
3,0.886493,-1.921404,-0.485943,
4,-1.428452,0.327194,-1.57722,
5,-0.185201,1.125732,-0.061346,
6,0.628389,1.133353,-0.828206,
7,,,,
8,,,,
9,,,,


In [6]:
df

Unnamed: 0,A,B,C,D
0,-0.3738,-1.087134,0.570258,-0.694361
1,0.133677,1.683059,0.405279,-0.878707
2,0.576367,0.072537,-1.143693,0.106675
3,-0.316887,-0.362592,-1.365615,-0.515746
4,1.146219,1.335188,0.117429,-0.203574
5,0.193325,-0.420413,0.426066,-0.31572
6,1.448909,-0.194188,-1.338263,-0.798579
7,2.057915,-0.54213,0.877649,2.589239
8,0.17654,-0.16168,-1.155054,-0.383988
9,-0.173009,0.804199,0.895327,-1.396924


In [7]:
df - df.iloc[0]

Unnamed: 0,A,B,C,D
0,0.0,0.0,0.0,0.0
1,0.507476,2.770193,-0.164979,-0.184346
2,0.950167,1.159672,-1.713951,0.801036
3,0.056913,0.724542,-1.935874,0.178615
4,1.520018,2.422322,-0.452829,0.490787
5,0.567125,0.666721,-0.144193,0.378641
6,1.822708,0.892946,-1.908521,-0.104218
7,2.431715,0.545004,0.307391,3.2836
8,0.55034,0.925454,-1.725313,0.310373
9,0.20079,1.891333,0.325069,-0.702563


In [8]:
index = pd.date_range('1/1/2000', periods=8)

In [9]:
df = pd.DataFrame(np.random.randn(8, 3), index=index, columns=list('ABC'))

In [10]:
df

Unnamed: 0,A,B,C
2000-01-01,0.805233,-0.315572,-2.530603
2000-01-02,-0.949536,-1.944263,-1.408812
2000-01-03,-0.66642,-0.708008,-2.015721
2000-01-04,0.228398,0.300454,-0.838683
2000-01-05,-0.647989,0.2987,-1.151111
2000-01-06,1.843812,-0.176656,0.319169
2000-01-07,1.272651,0.949797,0.414883
2000-01-08,0.303631,-0.261996,3.324506


In [11]:
type(df['A'])

pandas.core.series.Series

In [12]:
df - df['A']

Unnamed: 0,2000-01-01 00:00:00,2000-01-02 00:00:00,2000-01-03 00:00:00,2000-01-04 00:00:00,2000-01-05 00:00:00,2000-01-06 00:00:00,2000-01-07 00:00:00,2000-01-08 00:00:00,A,B,C
2000-01-01,,,,,,,,,,,
2000-01-02,,,,,,,,,,,
2000-01-03,,,,,,,,,,,
2000-01-04,,,,,,,,,,,
2000-01-05,,,,,,,,,,,
2000-01-06,,,,,,,,,,,
2000-01-07,,,,,,,,,,,
2000-01-08,,,,,,,,,,,


In [13]:
df

Unnamed: 0,A,B,C
2000-01-01,0.805233,-0.315572,-2.530603
2000-01-02,-0.949536,-1.944263,-1.408812
2000-01-03,-0.66642,-0.708008,-2.015721
2000-01-04,0.228398,0.300454,-0.838683
2000-01-05,-0.647989,0.2987,-1.151111
2000-01-06,1.843812,-0.176656,0.319169
2000-01-07,1.272651,0.949797,0.414883
2000-01-08,0.303631,-0.261996,3.324506


In [15]:
# subtract a col value from the dataframe
df.sub(df['A'], axis=0)

Unnamed: 0,A,B,C
2000-01-01,0.0,-1.120805,-3.335836
2000-01-02,0.0,-0.994726,-0.459276
2000-01-03,0.0,-0.041588,-1.349301
2000-01-04,0.0,0.072056,-1.067081
2000-01-05,0.0,0.946689,-0.503122
2000-01-06,0.0,-2.020468,-1.524643
2000-01-07,0.0,-0.322855,-0.857768
2000-01-08,0.0,-0.565628,3.020875


In [16]:
df + 1

Unnamed: 0,A,B,C
2000-01-01,1.805233,0.684428,-1.530603
2000-01-02,0.050464,-0.944263,-0.408812
2000-01-03,0.33358,0.291992,-1.015721
2000-01-04,1.228398,1.300454,0.161317
2000-01-05,0.352011,1.2987,-0.151111
2000-01-06,2.843812,0.823344,1.319169
2000-01-07,2.272651,1.949797,1.414883
2000-01-08,1.303631,0.738004,4.324506


In [17]:
df1 = pd.DataFrame({'a' : [1, 0, 1], 'b' : [0, 1, 1] }, dtype=bool)

In [18]:
df2 = pd.DataFrame({'a' : [0, 1, 1], 'b' : [1, 1, 0] }, dtype=bool)

In [19]:
df1 & df2

Unnamed: 0,a,b
0,False,False
1,False,True
2,True,False


In [20]:
df.T

Unnamed: 0,2000-01-01 00:00:00,2000-01-02 00:00:00,2000-01-03 00:00:00,2000-01-04 00:00:00,2000-01-05 00:00:00,2000-01-06 00:00:00,2000-01-07 00:00:00,2000-01-08 00:00:00
A,0.805233,-0.949536,-0.66642,0.228398,-0.647989,1.843812,1.272651,0.303631
B,-0.315572,-1.944263,-0.708008,0.300454,0.2987,-0.176656,0.949797,-0.261996
C,-2.530603,-1.408812,-2.015721,-0.838683,-1.151111,0.319169,0.414883,3.324506


In [21]:
np.asarray(df)

array([[ 0.80523325, -0.31557215, -2.53060306],
       [-0.94953627, -1.94426265, -1.40881179],
       [-0.66641998, -0.70800842, -2.01572068],
       [ 0.2283982 ,  0.30045393, -0.83868268],
       [-0.64798936,  0.29870012, -1.15111119],
       [ 1.843812  , -0.17665574,  0.31916864],
       [ 1.27265138,  0.94979666,  0.41488322],
       [ 0.30363121, -0.26199642,  3.3245062 ]])

In [22]:
df.T.dot(df)

Unnamed: 0,A,B,C
A,7.577667,2.742428,3.323575
B,2.742428,5.562476,3.835676
C,3.323575,3.835676,25.806616


In [23]:
s1 = pd.Series(np.arange(5, 10))

In [24]:
s1

0    5
1    6
2    7
3    8
4    9
dtype: int64

In [25]:
s1.dot(s1)

255

In [28]:
s1

0    5
1    6
2    7
3    8
4    9
dtype: int64

In [30]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 8 entries, 2000-01-01 to 2000-01-08
Freq: D
Data columns (total 3 columns):
A    8 non-null float64
B    8 non-null float64
C    8 non-null float64
dtypes: float64(3)
memory usage: 256.0 bytes


In [31]:
wp = pd.Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'],major_axis=pd.date_range('1/1/2000', periods=5),minor_axis=['A', 'B', 'C', 'D'])
wp

<class 'pandas.core.panel.Panel'>
Dimensions: 2 (items) x 5 (major_axis) x 4 (minor_axis)
Items axis: Item1 to Item2
Major_axis axis: 2000-01-01 00:00:00 to 2000-01-05 00:00:00
Minor_axis axis: A to D

In [32]:
wp.loc['Item1']

Unnamed: 0,A,B,C,D
2000-01-01,1.59948,-0.357055,1.626113,-0.006114
2000-01-02,1.318293,-1.105657,-0.58053,2.082384
2000-01-03,-1.898096,-0.304726,-0.433705,-0.025529
2000-01-04,1.465622,-0.701683,-0.071912,-0.331005
2000-01-05,0.529878,-0.53793,-0.523586,-0.404327


In [33]:
df = pd.DataFrame({'a': ['foo', 'bar', 'baz'],'b': np.random.randn(3)})
df

Unnamed: 0,a,b
0,foo,1.359754
1,bar,-1.520726
2,baz,0.014434


In [34]:
data = {'item1': df, 'item2': df}

In [35]:
data

{'item1':      a         b
 0  foo  1.359754
 1  bar -1.520726
 2  baz  0.014434, 'item2':      a         b
 0  foo  1.359754
 1  bar -1.520726
 2  baz  0.014434}

In [36]:
import pprint

In [37]:
pprint.pprint(data)

{'item1':      a         b
0  foo  1.359754
1  bar -1.520726
2  baz  0.014434,
 'item2':      a         b
0  foo  1.359754
1  bar -1.520726
2  baz  0.014434}


In [38]:
panel = pd.Panel.from_dict(data, orient='minor')
panel

<class 'pandas.core.panel.Panel'>
Dimensions: 2 (items) x 3 (major_axis) x 2 (minor_axis)
Items axis: a to b
Major_axis axis: 0 to 2
Minor_axis axis: item1 to item2

In [41]:
panel['a']['item1']

0    foo
1    bar
2    baz
Name: item1, dtype: object

In [44]:
panel['a'].iloc[0]

item1    foo
item2    foo
Name: 0, dtype: object

In [45]:
df


Unnamed: 0,a,b
0,foo,1.359754
1,bar,-1.520726
2,baz,0.014434


In [46]:
df.ix[1]

a        bar
b   -1.52073
Name: 1, dtype: object

In [49]:
df.iloc[1]

a        bar
b   -1.52073
Name: 1, dtype: object

In [50]:
idx = pd.Index(np.arange(10))

In [51]:
idx

Int64Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype='int64')

In [52]:
div, rem = divmod(idx, 3)
div, rem

(Int64Index([0, 0, 0, 1, 1, 1, 2, 2, 2, 3], dtype='int64'),
 Int64Index([0, 1, 2, 0, 1, 2, 0, 1, 2, 0], dtype='int64'))

In [54]:
df.empty

False

In [55]:
np.array([2])

array([2])

In [56]:
df

Unnamed: 0,a,b
0,foo,1.359754
1,bar,-1.520726
2,baz,0.014434


In [59]:
df = pd.DataFrame({'a': [1,2,3], 'b':[4,5,6]})
df

Unnamed: 0,a,b
0,1,4
1,2,5
2,3,6


In [60]:
df.idxmin(axis=0)

a    0
b    0
dtype: int64

In [61]:
df.idxmin(axis=1)

0    a
1    a
2    a
dtype: object

In [62]:
df.idxmax(axis=0)

a    2
b    2
dtype: int64

In [63]:
df.idxmax(axis=1)

0    b
1    b
2    b
dtype: object

In [65]:
df.values

array([[1, 4],
       [2, 5],
       [3, 6]])

In [67]:
df

Unnamed: 0,a,b
0,1,4
1,2,5
2,3,6


In [70]:
df

Unnamed: 0,a,b
0,1,4
1,2,5
2,3,6


In [71]:
df.apply(np.mean)

a    2.0
b    5.0
dtype: float64

In [72]:
df.apply(np.mean, axis=1)

0    2.5
1    3.5
2    4.5
dtype: float64

In [73]:
df.apply(lambda x: x.max() - x.min())

a    2
b    2
dtype: int64

In [74]:
df.apply(lambda x: x.max() - x.min(), axis=1)

0    3
1    3
2    3
dtype: int64

In [76]:
df.apply(lambda x: x.idxmax(), axis=1)

0    b
1    b
2    b
dtype: object

In [77]:
df

Unnamed: 0,a,b
0,1,4
1,2,5
2,3,6


In [78]:
f = lambda x: len(str(x))

In [80]:
df['a'].map(f)

0    1
1    1
2    1
Name: a, dtype: int64

In [82]:
df.applymap(f)

Unnamed: 0,a,b
0,1,1
1,1,1
2,1,1


In [83]:
s = pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e'])
s

a   -0.439967
b   -0.528729
c   -1.816689
d    1.097467
e   -0.632419
dtype: float64

In [84]:
s1 = s[:4]
s1

a   -0.439967
b   -0.528729
c   -1.816689
d    1.097467
dtype: float64

In [85]:
s2 = s[1:]
s2

b   -0.528729
c   -1.816689
d    1.097467
e   -0.632419
dtype: float64

In [86]:
s1.align(s2)

(a   -0.439967
 b   -0.528729
 c   -1.816689
 d    1.097467
 e         NaN
 dtype: float64, a         NaN
 b   -0.528729
 c   -1.816689
 d    1.097467
 e   -0.632419
 dtype: float64)

In [87]:
s1.align(s2, join='inner')

(b   -0.528729
 c   -1.816689
 d    1.097467
 dtype: float64, b   -0.528729
 c   -1.816689
 d    1.097467
 dtype: float64)

In [88]:
# iteration


In [89]:
df = pd.DataFrame({'a': [1, 2, 3], 'b': ['a', 'b', 'c']})
df

Unnamed: 0,a,b
0,1,a
1,2,b
2,3,c


In [90]:
for index, row in df.iterrows():
    row['a'] = 10

In [91]:
row

a    10
b     c
Name: 2, dtype: object

In [92]:
df

Unnamed: 0,a,b
0,1,a
1,2,b
2,3,c


In [93]:
for row in df.iteritems():
    print row

('a', 0    1
1    2
2    3
Name: a, dtype: int64)
('b', 0    a
1    b
2    c
Name: b, dtype: object)


In [94]:
for row in df.itertuples():
    print row

Pandas(Index=0, a=1, b='a')
Pandas(Index=1, a=2, b='b')
Pandas(Index=2, a=3, b='c')


In [2]:
s = pd.Series(pd.date_range('20130101 09:10:12', periods=4))
s

0   2013-01-01 09:10:12
1   2013-01-02 09:10:12
2   2013-01-03 09:10:12
3   2013-01-04 09:10:12
dtype: datetime64[ns]

In [4]:
s.dt.date

0    2013-01-01
1    2013-01-02
2    2013-01-03
3    2013-01-04
dtype: object

In [5]:
s.dt.day

0    1
1    2
2    3
3    4
dtype: int64

In [6]:
s.dt.month

0    1
1    1
2    1
3    1
dtype: int64

In [7]:
s.dt.hour
s.dt.minute

0    10
1    10
2    10
3    10
dtype: int64

In [8]:
s = pd.Series(['A', 'B', 'C', 'Aaba', 'Baca', np.nan, 'CABA', 'dog', 'cat'])
s

0       A
1       B
2       C
3    Aaba
4    Baca
5     NaN
6    CABA
7     dog
8     cat
dtype: object

In [9]:
s.str.capitalize()

0       A
1       B
2       C
3    Aaba
4    Baca
5     NaN
6    Caba
7     Dog
8     Cat
dtype: object

In [10]:
s.str.upper()

0       A
1       B
2       C
3    AABA
4    BACA
5     NaN
6    CABA
7     DOG
8     CAT
dtype: object

In [11]:
s.str.lower()

0       a
1       b
2       c
3    aaba
4    baca
5     NaN
6    caba
7     dog
8     cat
dtype: object

In [12]:
df

NameError: name 'df' is not defined

In [13]:
s.sort_values()

0       A
3    Aaba
1       B
4    Baca
2       C
6    CABA
8     cat
7     dog
5     NaN
dtype: object

In [14]:
s.str.upper()

0       A
1       B
2       C
3    AABA
4    BACA
5     NaN
6    CABA
7     DOG
8     CAT
dtype: object

In [15]:
s = pd.Series(np.arange(10))
s

0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
8    8
9    9
dtype: int64

In [16]:
s.nsmallest(2)

0    0
1    1
dtype: int64

In [17]:
s.nlargest(2)

9    9
8    8
dtype: int64

In [19]:
dft = pd.DataFrame(dict(A = np.random.rand(3), B = 1, C = 'foo', \
                        D = pd.Timestamp('20010102'), E = pd.Series([1.0]*3).astype('float32'),\
                        F = False, G = pd.Series([1]*3,dtype='int8')))
dft

Unnamed: 0,A,B,C,D,E,F,G
0,0.007684,1,foo,2001-01-02,1.0,False,1
1,0.131609,1,foo,2001-01-02,1.0,False,1
2,0.809984,1,foo,2001-01-02,1.0,False,1


In [20]:
pd.Series([1, 2, 3.0])

0    1.0
1    2.0
2    3.0
dtype: float64

In [22]:
dft.get_dtype_counts()

bool              1
datetime64[ns]    1
float32           1
float64           1
int64             1
int8              1
object            1
dtype: int64

In [23]:
s = pd.Series(['A', 'B', 'C', 'Aaba', 'Baca', np.nan, 'CABA', 'dog', 'cat'])
s

0       A
1       B
2       C
3    Aaba
4    Baca
5     NaN
6    CABA
7     dog
8     cat
dtype: object

In [24]:
s.str.len()

0    1.0
1    1.0
2    1.0
3    4.0
4    4.0
5    NaN
6    4.0
7    3.0
8    3.0
dtype: float64

In [46]:
s2 = pd.Series(['a_b_c', 'c_d_e', np.nan, 'f_g_h'])
s2

0    a_b_c
1    c_d_e
2      NaN
3    f_g_h
dtype: object

In [48]:
s2.str.split('_').str.get(1)

0      b
1      d
2    NaN
3      g
dtype: object

In [51]:
s2.str.split('_').str[1]

0      b
1      d
2    NaN
3      g
dtype: object

In [53]:
# expand and return a dataframe
s2.str.split('_', expand=True)

Unnamed: 0,0,1,2
0,a,b,c
1,c,d,e
2,,,
3,f,g,h


In [56]:
# limit number of splits
s2.str.split('_', expand=True, n=1)

Unnamed: 0,0,1
0,a,b_c
1,c,d_e
2,,
3,f,g_h


In [57]:
# rsplit - same like split but in reverse direction
s2.str.rsplit('_', expand=True, n=1)

Unnamed: 0,0,1
0,a_b,c
1,c_d,e
2,,
3,f_g,h


In [64]:
s2

0    a_b_c
1    c_d_e
2      NaN
3    f_g_h
dtype: object

In [65]:
s2.replace('a_b_c', 'd')

0        d
1    c_d_e
2      NaN
3    f_g_h
dtype: object

In [70]:
e = s2.str.findall('d')

In [71]:
e

0     []
1    [d]
2    NaN
3     []
dtype: object

In [76]:
s = pd.Series([1,2,3,4,5,6])
s

0    1
1    2
2    3
3    4
4    5
5    6
dtype: int64

In [77]:
s[s == 3]

2    3
dtype: int64

In [78]:
s.where(s == 3)

0    NaN
1    NaN
2    3.0
3    NaN
4    NaN
5    NaN
dtype: float64

In [79]:
# selection without where does not preserve input data shape in series, to achieve it use .where
# but, in dataframes selection without where is equivalent to .where, preserves input data shapes

In [80]:
df = pd.DataFrame({
    'first': range(5),
    'second': range(5)
})
df

Unnamed: 0,first,second
0,0,0
1,1,1
2,2,2
3,3,3
4,4,4


In [82]:
df.to_csv('test.csv', index=False)

In [83]:
# missing data
df = pd.DataFrame(np.random.randn(5, 3), index=['a', 'c', 'e', 'f', 'h'], columns=['one', 'two', 'three'])
df

Unnamed: 0,one,two,three
a,0.566054,0.165922,-0.999806
c,-1.325831,-0.881484,-0.538181
e,0.245524,0.661737,-2.18651
f,-0.463903,0.591954,-0.487143
h,-0.745475,1.779751,-0.571589


In [84]:
df['four'] = 'bar'

In [85]:
df['five'] = df['one'] > 0
df

Unnamed: 0,one,two,three,four,five
a,0.566054,0.165922,-0.999806,bar,True
c,-1.325831,-0.881484,-0.538181,bar,False
e,0.245524,0.661737,-2.18651,bar,True
f,-0.463903,0.591954,-0.487143,bar,False
h,-0.745475,1.779751,-0.571589,bar,False


In [86]:
df2 = df.reindex(['a','b','c','d','e','f','g','h'])

In [87]:
df2

Unnamed: 0,one,two,three,four,five
a,0.566054,0.165922,-0.999806,bar,True
b,,,,,
c,-1.325831,-0.881484,-0.538181,bar,False
d,,,,,
e,0.245524,0.661737,-2.18651,bar,True
f,-0.463903,0.591954,-0.487143,bar,False
g,,,,,
h,-0.745475,1.779751,-0.571589,bar,False


In [88]:
df2['one']

a    0.566054
b         NaN
c   -1.325831
d         NaN
e    0.245524
f   -0.463903
g         NaN
h   -0.745475
Name: one, dtype: float64

In [89]:
pd.isnull(df2['one'])

a    False
b     True
c    False
d     True
e    False
f    False
g     True
h    False
Name: one, dtype: bool

In [90]:
df2['four'].notnull()

a     True
b    False
c     True
d    False
e     True
f     True
g    False
h     True
Name: four, dtype: bool

In [92]:
df2.isnull().head()

Unnamed: 0,one,two,three,four,five
a,False,False,False,False,False
b,True,True,True,True,True
c,False,False,False,False,False
d,True,True,True,True,True
e,False,False,False,False,False
