In [1]:
import pandas as pd
import numpy as np

In [2]:
# a dataframe is simply multiple series that share the same index
# a tabular data storage format

In [3]:
from numpy.random import randn
np.random.seed(101)

rand_mat = randn(5,4)
print(rand_mat)

[[ 2.70684984  0.62813271  0.90796945  0.50382575]
 [ 0.65111795 -0.31931804 -0.84807698  0.60596535]
 [-2.01816824  0.74012206  0.52881349 -0.58900053]
 [ 0.18869531 -0.75887206 -0.93323722  0.95505651]
 [ 0.19079432  1.97875732  2.60596728  0.68350889]]


In [4]:
dframe = pd.DataFrame(data=rand_mat, index=['a', 'b', 'c', 'd', 'e'], columns=['w', 'x', 'y', 'z'])

In [5]:
print(dframe)

          w         x         y         z
a  2.706850  0.628133  0.907969  0.503826
b  0.651118 -0.319318 -0.848077  0.605965
c -2.018168  0.740122  0.528813 -0.589001
d  0.188695 -0.758872 -0.933237  0.955057
e  0.190794  1.978757  2.605967  0.683509


In [6]:
type(dframe)

pandas.core.frame.DataFrame

In [7]:
# select by column
dframe['w']

a    2.706850
b    0.651118
c   -2.018168
d    0.188695
e    0.190794
Name: w, dtype: float64

In [8]:
# select by index
dframe.loc['a']

w    2.706850
x    0.628133
y    0.907969
z    0.503826
Name: a, dtype: float64

In [9]:
# select by numeric index
dframe.iloc[0]

w    2.706850
x    0.628133
y    0.907969
z    0.503826
Name: a, dtype: float64

In [10]:
# custom selection list for rows ['', ''], list for columns [''. '']
dframe.loc[['a', 'b'],['y', 'z']]

Unnamed: 0,y,z
a,0.907969,0.503826
b,-0.848077,0.605965


In [11]:
df_bool = dframe > 0

In [12]:
# filter by logical condition
dframe[df_bool]

Unnamed: 0,w,x,y,z
a,2.70685,0.628133,0.907969,0.503826
b,0.651118,,,0.605965
c,,0.740122,0.528813,
d,0.188695,,,0.955057
e,0.190794,1.978757,2.605967,0.683509


In [13]:
dframe['w'] > 0

a     True
b     True
c    False
d     True
e     True
Name: w, dtype: bool

In [15]:
dframe[dframe['w'] > 0]


a    0.907969
b   -0.848077
d   -0.933237
e    2.605967
Name: y, dtype: float64

In [16]:
# multiple conditional selection use & and | instead 'and', 'or'
dframe[(dframe['w']>0) & (dframe['y']>1)]

Unnamed: 0,w,x,y,z
e,0.190794,1.978757,2.605967,0.683509
