In [3]:
import pandas as pd 
import numpy as np

In [4]:
dates = pd.date_range('20180701',periods=6) #dates in pandas
dates

DatetimeIndex(['2018-07-01', '2018-07-02', '2018-07-03', '2018-07-04',
               '2018-07-05', '2018-07-06'],
              dtype='datetime64[ns]', freq='D')

In [5]:
df = pd.DataFrame(np.random.randn(6,4),index=dates,columns=list('ABCD'))
df #dataframe with index as dates and columns as A B C D 

Unnamed: 0,A,B,C,D
2018-07-01,0.677224,-0.007172,0.223911,0.861312
2018-07-02,0.481526,-3.062182,0.555586,0.097942
2018-07-03,0.853391,1.486362,-1.157538,0.769359
2018-07-04,-0.402229,-0.921763,-0.516047,1.586753
2018-07-05,1.604596,0.464708,0.57635,-1.028631
2018-07-06,0.356193,-0.283564,-0.849374,-0.629533


<h1 style='color:#775588'>Selection</h1>
<br/>
<p style='color:red'>Note: While standard Python / Numpy expressions for selecting and setting are intuitive and come in handy for
interactive work, for production code, we recommend the optimized pandas data access methods, .at, .iat, .loc
and .iloc.</p>
<p>See the indexing documentation Indexing and Selecting Data and MultiIndex / Advanced Indexing</p>
<br />
<h3 style='color:#123456'>Selecting a single column, which yields a Series, equivalent to df.A:</h3>

In [7]:
df['A']

2018-07-01    0.677224
2018-07-02    0.481526
2018-07-03    0.853391
2018-07-04   -0.402229
2018-07-05    1.604596
2018-07-06    0.356193
Freq: D, Name: A, dtype: float64

<h3 style='color:#123456'>Selecting via [], which slices the rows.</h3>

In [8]:
df[1:4]

Unnamed: 0,A,B,C,D
2018-07-02,0.481526,-3.062182,0.555586,0.097942
2018-07-03,0.853391,1.486362,-1.157538,0.769359
2018-07-04,-0.402229,-0.921763,-0.516047,1.586753


In [10]:
df['20180703':'20180705']

Unnamed: 0,A,B,C,D
2018-07-03,0.853391,1.486362,-1.157538,0.769359
2018-07-04,-0.402229,-0.921763,-0.516047,1.586753
2018-07-05,1.604596,0.464708,0.57635,-1.028631


<h3 style='color:#123456'>Selection by Label<h3>

In [12]:
#For getting a cross section using a label
df.loc[dates[3]]

A   -0.402229
B   -0.921763
C   -0.516047
D    1.586753
Name: 2018-07-04 00:00:00, dtype: float64

In [13]:
#SElecting on a multi - axis by label
df.loc[:,['A','B']]

Unnamed: 0,A,B
2018-07-01,0.677224,-0.007172
2018-07-02,0.481526,-3.062182
2018-07-03,0.853391,1.486362
2018-07-04,-0.402229,-0.921763
2018-07-05,1.604596,0.464708
2018-07-06,0.356193,-0.283564


In [14]:
#Showing label slicing, both endpoints are included:
df.loc['20180703':'20180705',['A','B']]

Unnamed: 0,A,B
2018-07-03,0.853391,1.486362
2018-07-04,-0.402229,-0.921763
2018-07-05,1.604596,0.464708


In [15]:
#Reduction in the dimensions of the returned object:
df.loc['20180705',['A','B']]

A    1.604596
B    0.464708
Name: 2018-07-05 00:00:00, dtype: float64

In [16]:
#For getting a scalar value:
df.loc[dates[0],'A']

0.67722387081480551

In [17]:
#For getting fast access to a scalar (equivalent to the prior method)
df.at[dates[0],'A']

0.67722387081480551

<h3 style='color:#123456'>Selection by Position</h3>

In [18]:
#Select via the position of the passed integers:
df.iloc[3]

A   -0.402229
B   -0.921763
C   -0.516047
D    1.586753
Name: 2018-07-04 00:00:00, dtype: float64

In [19]:
#By integer slices, acting similar to numpy/python:
df.iloc[3:6,2:4]

Unnamed: 0,C,D
2018-07-04,-0.516047,1.586753
2018-07-05,0.57635,-1.028631
2018-07-06,-0.849374,-0.629533


In [20]:
#By lists of integer position locations, similar to the numpy/python style:
df.iloc[[1,2,4],[0,2]]

Unnamed: 0,A,C
2018-07-02,0.481526,0.555586
2018-07-03,0.853391,-1.157538
2018-07-05,1.604596,0.57635


In [21]:
#For slicing rows explicitly:
df.iloc[1:3,:]

Unnamed: 0,A,B,C,D
2018-07-02,0.481526,-3.062182,0.555586,0.097942
2018-07-03,0.853391,1.486362,-1.157538,0.769359


In [22]:
#For slicing columns explicitly:
df.iloc[:,[0,2,3]]

Unnamed: 0,A,C,D
2018-07-01,0.677224,0.223911,0.861312
2018-07-02,0.481526,0.555586,0.097942
2018-07-03,0.853391,-1.157538,0.769359
2018-07-04,-0.402229,-0.516047,1.586753
2018-07-05,1.604596,0.57635,-1.028631
2018-07-06,0.356193,-0.849374,-0.629533


In [23]:
#For getting a value explicitly:
df.iloc[2,2]

-1.1575380820270891

In [24]:
#For getting fast access to a scalar (equivalent to the prior method):
df.iat[2,2]

-1.1575380820270891

<h3 style='color:#123456'>Boolean Indexing<h3>

In [25]:
#Using a single column’s values to select data.
df[df.A > 0]

Unnamed: 0,A,B,C,D
2018-07-01,0.677224,-0.007172,0.223911,0.861312
2018-07-02,0.481526,-3.062182,0.555586,0.097942
2018-07-03,0.853391,1.486362,-1.157538,0.769359
2018-07-05,1.604596,0.464708,0.57635,-1.028631
2018-07-06,0.356193,-0.283564,-0.849374,-0.629533


In [26]:
#Selecting values from a DataFrame where a boolean condition is met.
df[df>0]

Unnamed: 0,A,B,C,D
2018-07-01,0.677224,,0.223911,0.861312
2018-07-02,0.481526,,0.555586,0.097942
2018-07-03,0.853391,1.486362,,0.769359
2018-07-04,,,,1.586753
2018-07-05,1.604596,0.464708,0.57635,
2018-07-06,0.356193,,,


In [27]:
#Using the isin() method for filtering:
df2 = df.copy()
df2['E'] = ['one', 'one','two','three','four','three']
df2

Unnamed: 0,A,B,C,D,E
2018-07-01,0.677224,-0.007172,0.223911,0.861312,one
2018-07-02,0.481526,-3.062182,0.555586,0.097942,one
2018-07-03,0.853391,1.486362,-1.157538,0.769359,two
2018-07-04,-0.402229,-0.921763,-0.516047,1.586753,three
2018-07-05,1.604596,0.464708,0.57635,-1.028631,four
2018-07-06,0.356193,-0.283564,-0.849374,-0.629533,three


In [29]:
df2[df2['E'].isin(['two','four'])]

Unnamed: 0,A,B,C,D,E
2018-07-03,0.853391,1.486362,-1.157538,0.769359,two
2018-07-05,1.604596,0.464708,0.57635,-1.028631,four
