In [3]:
import pandas as pd
import numpy as np

In [26]:
df = pd.DataFrame({
        'one': pd.Series(np.random.randn(3), index=['a', 'b', 'c']),
        'two': pd.Series(np.random.randn(4), index=['a', 'b', 'c', 'd']),
        'three': pd.Series(np.random.randn(3), index=['b', 'c', 'd'])})
df

Unnamed: 0,one,two,three
a,0.246619,-0.570219,
b,0.67631,0.482506,-0.150805
c,-1.317104,0.135745,-1.575886
d,,-0.262067,-0.540236


In [27]:
df2 = df.copy()
df.gt(df2)

Unnamed: 0,one,two,three
a,False,False,False
b,False,False,False
c,False,False,False
d,False,False,False


In [28]:
# np.nan == np.nan returns False.
df2.ne(df)

Unnamed: 0,one,two,three
a,False,False,True
b,False,False,False
c,False,False,False
d,True,False,False


In [29]:
(df > 0).all()

one      False
two      False
three    False
dtype: bool

In [30]:
(df > 0).any()

one       True
two       True
three    False
dtype: bool

In [31]:
(df > 0).any().any()

True

In [32]:
(df > 0).any().all()

False

In [33]:
pd.Series(['foo', 'bar', 'baz']) == 'foo'


0     True
1    False
2    False
dtype: bool

In [34]:
pd.Series(['foo', 'bar', 'baz']) == pd.Index(['foo', 'bar', 'qux'])

0     True
1     True
2    False
dtype: bool

In [36]:
dates = pd.date_range('20130101', periods=6)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [40]:
dates = pd.date_range('20130101', periods=6)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [41]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
2013-01-01,-0.134772,0.670491,0.614065,-0.079269
2013-01-02,-0.51516,0.29328,-0.621966,0.592382
2013-01-03,0.852644,0.726139,-0.115264,-1.664506
2013-01-04,0.338062,1.170494,0.068594,-0.232622
2013-01-05,-0.874027,-0.31534,-0.515381,-0.734613
2013-01-06,0.756373,-0.255169,0.692513,-0.931905


In [46]:
df2 = pd.DataFrame({'A': 1.0,
                        'B': pd.Timestamp('20130102'),
                        'C': pd.Series(1, index=list(range(4)), dtype='float32'),
                        'D': np.array([3] * 4, dtype='int32'),
                        'E': pd.Categorical(["test", "train", "test", "train"]),
                        'F': 'foo'})
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [47]:
 df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

In [49]:
df2.columns

Index(['A', 'B', 'C', 'D', 'E', 'F'], dtype='object')

In [50]:
df['A'] #or df.A

2013-01-01   -0.134772
2013-01-02   -0.515160
2013-01-03    0.852644
2013-01-04    0.338062
2013-01-05   -0.874027
2013-01-06    0.756373
Freq: D, Name: A, dtype: float64

In [52]:
df[0:3]

Unnamed: 0,A,B,C,D
2013-01-01,-0.134772,0.670491,0.614065,-0.079269
2013-01-02,-0.51516,0.29328,-0.621966,0.592382
2013-01-03,0.852644,0.726139,-0.115264,-1.664506


In [53]:
df['20130102':'20130104']

Unnamed: 0,A,B,C,D
2013-01-02,-0.51516,0.29328,-0.621966,0.592382
2013-01-03,0.852644,0.726139,-0.115264,-1.664506
2013-01-04,0.338062,1.170494,0.068594,-0.232622


# how to access the data in Pandas

Cheat Sheet:
![Pandas cheat-sheet](/pics/Get-values-DataFrame.png)

## Selection by Label

In [54]:
df.loc["2013-01-01"]

A   -0.134772
B    0.670491
C    0.614065
D   -0.079269
Name: 2013-01-01 00:00:00, dtype: float64

In [55]:
df.loc[:, ['A', 'B']]

Unnamed: 0,A,B
2013-01-01,-0.134772,0.670491
2013-01-02,-0.51516,0.29328
2013-01-03,0.852644,0.726139
2013-01-04,0.338062,1.170494
2013-01-05,-0.874027,-0.31534
2013-01-06,0.756373,-0.255169


In [56]:
df.loc['20130102':'20130104', ['A', 'B']]

Unnamed: 0,A,B
2013-01-02,-0.51516,0.29328
2013-01-03,0.852644,0.726139
2013-01-04,0.338062,1.170494


In [57]:
df.loc['20130102', ['A', 'B']]

A   -0.51516
B    0.29328
Name: 2013-01-02 00:00:00, dtype: float64

In [59]:
df.loc[dates[0], 'A']

-0.13477219424408182

In [60]:
df.iloc[3]

A    0.338062
B    1.170494
C    0.068594
D   -0.232622
Name: 2013-01-04 00:00:00, dtype: float64

In [63]:
df.iloc[3,2]

0.0685936682504372

In [61]:
df.iloc[3:5, 0:2]

Unnamed: 0,A,B
2013-01-04,0.338062,1.170494
2013-01-05,-0.874027,-0.31534


In [62]:
df.iloc[1:3, :]

Unnamed: 0,A,B,C,D
2013-01-02,-0.51516,0.29328,-0.621966,0.592382
2013-01-03,0.852644,0.726139,-0.115264,-1.664506


## Selection by dtype