In [1]:
import pandas as pd
import numpy as np

## serie

In [5]:
obj = pd.Series(np.arange(4.), index=['a', 'b', 'c', 'd'])
obj

a    0.0
b    1.0
c    2.0
d    3.0
dtype: float64

In [6]:
obj['b']

1.0

In [7]:
obj[1]

1.0

In [8]:
obj[2:4]

c    2.0
d    3.0
dtype: float64

In [9]:
obj[['b', 'a', 'd']]

b    1.0
a    0.0
d    3.0
dtype: float64

In [10]:
obj[[1, 3]]

b    1.0
d    3.0
dtype: float64

In [11]:
obj[obj < 2]

a    0.0
b    1.0
dtype: float64

### Slicing with labels
behaves differently than normal Python slicing in that the end‐ point is inclusive:

In [12]:
obj['b':'c']

b    1.0
c    2.0
dtype: float64

In [13]:
obj['b':'c'] = 5
obj

a    0.0
b    5.0
c    5.0
d    3.0
dtype: float64

## dataframe

In [14]:
data = pd.DataFrame(np.arange(16).reshape((4, 4)),
                    index=['Ohio', 'Colorado', 'Utah', 'New York'],
                    columns=['one', 'two', 'three', 'four'])
data

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


### basic

In [15]:
data['two']

Ohio         1
Colorado     5
Utah         9
New York    13
Name: two, dtype: int32

In [16]:
data[['three', 'one']]

Unnamed: 0,three,one
Ohio,2,0
Colorado,6,4
Utah,10,8
New York,14,12


In [17]:
data[:2]

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7


In [18]:
data[data['three'] > 5]

Unnamed: 0,one,two,three,four
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


__DataFrame.query(condition) to return a subset of the data frame matching condition__

In [3]:
df = pd.DataFrame(np.arange(9).reshape(3,3), columns=list('ABC'))
df

Unnamed: 0,A,B,C
0,0,1,2
1,3,4,5
2,6,7,8


In [4]:
df.query('C < 6')

Unnamed: 0,A,B,C
0,0,1,2
1,3,4,5


In [5]:
df.query('2*B <= C')

Unnamed: 0,A,B,C
0,0,1,2


In [6]:
df.query('A % 2 == 0')

Unnamed: 0,A,B,C
0,0,1,2
2,6,7,8


### indexing with a boolean DataFrame,

In [19]:
data < 5

Unnamed: 0,one,two,three,four
Ohio,True,True,True,True
Colorado,True,False,False,False
Utah,False,False,False,False
New York,False,False,False,False


In [21]:
data[data < 5] = 0
data

Unnamed: 0,one,two,three,four
Ohio,0,0,0,0
Colorado,0,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


### Selection with loc and iloc

In [22]:
# let’s select a single row (Colorado) and multiple columns (two, three) by label:
data.loc['Colorado', ['two', 'three']]

two      5
three    6
Name: Colorado, dtype: int32

In [23]:
# We’ll then perform some similar selections with integers using iloc: (dòng 2, cột 3 0 1)
data.iloc[2, [3, 0, 1]]

four    11
one      8
two      9
Name: Utah, dtype: int32

In [24]:
data.iloc[2]

one       8
two       9
three    10
four     11
Name: Utah, dtype: int32

In [25]:
data.iloc[[1, 2], [3, 0, 1]]

Unnamed: 0,four,one,two
Colorado,7,0,5
Utah,11,8,9


In [26]:
data.loc[:'Utah', 'two']

Ohio        0
Colorado    5
Utah        9
Name: two, dtype: int32

In [27]:
data.iloc[:, :3][data.three > 5]

Unnamed: 0,one,two,three
Colorado,0,5,6
Utah,8,9,10
New York,12,13,14


Table 5-4. Indexing options with DataFrame

    df[val]                             Select single column or sequence of columns from the DataFrame; 
                                        special case conveniences: boolean array (filter rows), slice (slice rows), 
                                        or boolean DataFrame (set values based on some criterion)

    df.loc[val]                         Selects single row or subset of rows from the DataFrame by label
    df.loc[:, val]                      Selects single column or subset of columns by label
    df.loc[val1, val2]                  Select both rows and columns by label
    df.iloc[where]                      Selects single row or subset of rows from the DataFrame by integer position
    df.iloc[:, where]                   Selects single column or subset of columns by integer position
    df.iloc[where_i, where_j]           Select both rows and columns by integer position
    df.at[label_i, label_j]             Select a single scalar value by row and column label
    df.iat[i, j]                        Select a single scalar value by row and column position (integers)
    reindex method                      Select either rows or columns by labels
    get_value, set_value methods        Select single value by row and column label