In [1]:
import pandas as pd
import numpy as np

In [2]:
obj = pd.Series([4.5,8.7,-9.0,3.6],index=['d','b','a','c'])

In [3]:
obj

d    4.5
b    8.7
a   -9.0
c    3.6
dtype: float64

In [4]:
obj2 = obj.reindex(['a','b','c','d','e'])

In [5]:
obj2

a   -9.0
b    8.7
c    3.6
d    4.5
e    NaN
dtype: float64

In [6]:
obj3 = pd.Series(['blue','purple','yellow'],index=[0,2,4])

In [7]:
obj3

0      blue
2    purple
4    yellow
dtype: object

In [8]:
obj3.reindex(range(6),method='ffill')

0      blue
1      blue
2    purple
3    purple
4    yellow
5    yellow
dtype: object

In [9]:
frame = pd.DataFrame(np.arange(9).reshape((3, 3)),
        index=['a', 'c', 'd'],
        columns=['Ohio', 'Texas', 'California'])

In [10]:
frame

Unnamed: 0,Ohio,Texas,California
a,0,1,2
c,3,4,5
d,6,7,8


In [11]:
frame2 = frame.reindex(['a','b','c','d'])

In [12]:
frame2

Unnamed: 0,Ohio,Texas,California
a,0.0,1.0,2.0
b,,,
c,3.0,4.0,5.0
d,6.0,7.0,8.0


In [13]:
states =['Texas','Utah','California']

In [14]:
frame.reindex(columns=states)

Unnamed: 0,Texas,Utah,California
a,1,,2
c,4,,5
d,7,,8


In [15]:
frame.loc[['a','b','c','d'],states]

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike
  return self._getitem_tuple(key)


Unnamed: 0,Texas,Utah,California
a,1.0,,2.0
b,,,
c,4.0,,5.0
d,7.0,,8.0


### Dropping Entries from axis

In [16]:
obj = pd.Series(np.arange(5.), index=['a', 'b', 'c', 'd', 'e'])

In [17]:
obj

a    0.0
b    1.0
c    2.0
d    3.0
e    4.0
dtype: float64

In [18]:
new_obj = obj.drop('c')

In [19]:
new_obj

a    0.0
b    1.0
d    3.0
e    4.0
dtype: float64

In [20]:
data = pd.DataFrame(np.arange(16).reshape(4,4),
                   index =['Ohio','Colorado','Utah','New York'],
                   columns =['one','two','three','four'])

In [21]:
data

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [22]:
data.drop(['Colorado','Ohio'])

Unnamed: 0,one,two,three,four
Utah,8,9,10,11
New York,12,13,14,15


In [23]:
data.drop('two',axis=1)

Unnamed: 0,one,three,four
Ohio,0,2,3
Colorado,4,6,7
Utah,8,10,11
New York,12,14,15


In [24]:
data.drop(['two','four'],axis='columns')

Unnamed: 0,one,three
Ohio,0,2
Colorado,4,6
Utah,8,10
New York,12,14


In [25]:
obj.drop('c',inplace=True)

In [26]:
obj

a    0.0
b    1.0
d    3.0
e    4.0
dtype: float64

### Indexing,Selection and Filtering

In [27]:
obj = pd.Series(np.arange(4.),index=['a','b','c','d'])

In [28]:
obj['b']

1.0

In [29]:
obj[1]

1.0

In [30]:
obj[2:4]

c    2.0
d    3.0
dtype: float64

In [31]:
obj[['b', 'a', 'd']]

b    1.0
a    0.0
d    3.0
dtype: float64

In [32]:
obj[[1, 3]]

b    1.0
d    3.0
dtype: float64

In [33]:
obj[obj<2]

a    0.0
b    1.0
dtype: float64

In [34]:
obj['b':'c']

b    1.0
c    2.0
dtype: float64

In [35]:
obj['b':'c']=5

In [36]:
obj

a    0.0
b    5.0
c    5.0
d    3.0
dtype: float64

In [37]:
data = pd.DataFrame(np.arange(16).reshape(4,4),
                   index =['Ohio','Colorado','Utah','New York'],
                   columns =['one','two','three','four'])

In [38]:
data['two']

Ohio         1
Colorado     5
Utah         9
New York    13
Name: two, dtype: int64

In [39]:
data[['three','one']]

Unnamed: 0,three,one
Ohio,2,0
Colorado,6,4
Utah,10,8
New York,14,12


In [40]:
data[:2]

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7


In [41]:
data[data['three']>5]

Unnamed: 0,one,two,three,four
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [42]:
data < 5

Unnamed: 0,one,two,three,four
Ohio,True,True,True,True
Colorado,True,False,False,False
Utah,False,False,False,False
New York,False,False,False,False


In [43]:
data[data<5]=0

In [44]:
data

Unnamed: 0,one,two,three,four
Ohio,0,0,0,0
Colorado,0,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


### Selection with iloc and loc


In [45]:
data.loc['Colorado',['two','three']]

two      5
three    6
Name: Colorado, dtype: int64

In [46]:
data.iloc[2,[3,0,1]]

four    11
one      8
two      9
Name: Utah, dtype: int64

### Differences Between iloc,loc and ix

.loc[] is a label based method. This means that it will take into account the names or labels of the index when taking slices. For our data set B, df_B.loc["b"] will result in all the second row being selected.

.iloc[] takes slices based on index’s position. For the ones familiar with Python, it behaves like regular slicing. You just indicate the positional index number, and you get the appropriate slice. For example df_A.iloc[0] and df_B.iloc[0], both will give you the first row of the data set. Note that, as in Python, .iloc is zero positional based, i.e. it starts at 0. 

.ix[] is the more flexible option. It accepts label based or index positional arguments.

In [47]:
data.iloc[[1, 2], [3, 0, 1]]

Unnamed: 0,four,one,two
Colorado,7,0,5
Utah,11,8,9


In [48]:
data.loc[:'Utah', 'two']

Ohio        0
Colorado    5
Utah        9
Name: two, dtype: int64

In [49]:
data.iloc[:, :3][data.three > 5]

Unnamed: 0,one,two,three
Colorado,0,5,6
Utah,8,9,10
New York,12,13,14


### Integer indexes

In [50]:
ser = pd.Series(np.arange(3.))

In [51]:
ser

0    0.0
1    1.0
2    2.0
dtype: float64

In [53]:
ser2= pd.Series(np.arange(3.),index=['a','b','c'])

In [54]:
ser2[-1]

2.0

In [55]:
ser[:1]

0    0.0
dtype: float64

In [56]:
ser.loc[:1]

0    0.0
1    1.0
dtype: float64