# Selecting Entries[19]
<h3 style="font-family:Courier;font-size:10px;">Converted to Python3</h3>

In [8]:
import numpy as np
import pandas as pd
from pandas import Series,DataFrame

In [9]:
ser1 = Series(np.arange(3),index=['A','B','C'])
ser1 = 2*ser1
ser1

A    0
B    2
C    4
dtype: int32

In [10]:
#grab entries by index name
ser1['B']

2

In [11]:
#grab by position
ser1[1]

2

In [12]:
ser1[:3]

A    0
B    2
C    4
dtype: int32

In [13]:
ser1[['A','B']]

A    0
B    2
dtype: int32

In [14]:
#grab by logic
ser1[ser1>3]

C    4
dtype: int32

In [15]:
#set values by a logical method
ser1[ser1>3] = 10
ser1

A     0
B     2
C    10
dtype: int32

## How might these methods work on a DataFrame???

In [17]:
dframe = DataFrame(np.arange(25).reshape((5,5)),index=['NYC','LA','SF','DC','CHI'],
                  columns=['A','B','C','D','E'])

dframe

Unnamed: 0,A,B,C,D,E
NYC,0,1,2,3,4
LA,5,6,7,8,9
SF,10,11,12,13,14
DC,15,16,17,18,19
CHI,20,21,22,23,24


In [18]:
#Selecting data from a DataFrame
#By column name
dframe['B']

NYC     1
LA      6
SF     11
DC     16
CHI    21
Name: B, dtype: int32

In [19]:
dframe[['B','E']]

Unnamed: 0,B,E
NYC,1,4
LA,6,9
SF,11,14
DC,16,19
CHI,21,24


In [20]:
#Every row where C is greater than 8
dframe[dframe['C']>8]

Unnamed: 0,A,B,C,D,E
SF,10,11,12,13,14
DC,15,16,17,18,19
CHI,20,21,22,23,24


In [21]:
dframe > 10

Unnamed: 0,A,B,C,D,E
NYC,False,False,False,False,False
LA,False,False,False,False,False
SF,False,True,True,True,True
DC,True,True,True,True,True
CHI,True,True,True,True,True


In [22]:
#Use ix to reference index values
dframe.ix['LA']['B'],dframe.ix['LA']['C']

(6, 7)

In [23]:
dframe.ix[1]

A    5
B    6
C    7
D    8
E    9
Name: LA, dtype: int32

# Data Alignment[20]
## Arithmetic between DataFrames

In [24]:
ser1 = Series([0,1,2],index=['A','B','C'])
ser2 = Series([3,4,5,6],index=['A','B','C','D'])

In [25]:
ser1

A    0
B    1
C    2
dtype: int64

In [26]:
ser2

A    3
B    4
C    5
D    6
dtype: int64

In [27]:
#What happens when we add two Series of different lengths
ser1 + ser2

A    3.0
B    5.0
C    7.0
D    NaN
dtype: float64

In [28]:
#What will happen with DataFrames
dframe1 = DataFrame(np.arange(4).reshape((2,2)),columns=list('AB'),
                    index=['NYC','LA'])
dframe1

Unnamed: 0,A,B
NYC,0,1
LA,2,3


In [29]:
dframe2 = DataFrame(np.arange(9).reshape((3,3)),columns=list('ADC'),
                   index=['NYC','SF','LA'])
dframe2

Unnamed: 0,A,D,C
NYC,0,1,2
SF,3,4,5
LA,6,7,8


In [30]:
#What happens when we add dframe1 to dframe2??
dframe1 + dframe2

Unnamed: 0,A,B,C,D
LA,8.0,,,
NYC,0.0,,,
SF,,,,


In [31]:
#Only adds where the column and the row match up properly by naming convention

In [32]:
#If we are adding different sized DataFrames, we must use fill_value 0 if we don't want to lose data
dframe1.add(dframe2,fill_value=0)

Unnamed: 0,A,B,C,D
LA,8.0,3.0,8.0,7.0
NYC,0.0,1.0,2.0,1.0
SF,3.0,,5.0,4.0


In [33]:
#What about operations between Series and DataFrames
ser3 = dframe2.ix[0]
ser3

A    0
D    1
C    2
Name: NYC, dtype: int32

In [34]:
dframe2-ser3

Unnamed: 0,A,D,C
NYC,0,0,0
SF,3,3,3
LA,6,6,6


# Rank and Sort[21]

In [35]:
ser1 = Series(range(3),index=['C','A','B'])
ser1

C    0
A    1
B    2
dtype: int32

In [36]:
ser1.sort_index()

A    1
B    2
C    0
dtype: int32

In [37]:
#Order Series by values not by index
ser1.order()
ser1.sort_values()

  from ipykernel import kernelapp as app


C    0
A    1
B    2
dtype: int32

In [38]:
from numpy.random import randn

In [39]:
ser2 = Series(randn(10))
ser2

0    0.925089
1    0.509849
2   -0.486421
3   -0.060133
4   -0.617488
5    1.779224
6   -1.329086
7    1.351262
8   -0.046729
9    1.296709
dtype: float64

In [40]:
ser2.sort_values()

6   -1.329086
4   -0.617488
2   -0.486421
3   -0.060133
8   -0.046729
1    0.509849
0    0.925089
9    1.296709
7    1.351262
5    1.779224
dtype: float64

In [41]:
#What if we want to know the ranking of each order in Series, returns a rank value
ser2.rank()

0     7.0
1     6.0
2     3.0
3     4.0
4     2.0
5    10.0
6     1.0
7     9.0
8     5.0
9     8.0
dtype: float64

In [42]:
ser3 = Series(randn(50))
ser3

0    -0.416945
1     0.122219
2     0.493501
3     1.308076
4    -0.602143
5     0.169991
6     0.045340
7    -1.015510
8     1.793135
9     1.266336
10    0.212036
11    0.440208
12   -0.615090
13   -1.394584
14   -0.138960
15   -1.033765
16    1.133194
17    1.572166
18   -0.407619
19    0.055045
20   -1.691258
21    0.243539
22    0.209167
23   -0.806099
24   -1.174373
25   -1.147882
26    1.282939
27   -1.553918
28    1.167035
29   -0.513286
30   -0.354190
31    0.959203
32    0.853895
33   -0.185026
34   -1.110611
35    0.308151
36    0.957979
37    1.170759
38    1.592287
39    0.574730
40   -0.690147
41   -0.169293
42   -0.678360
43   -0.400168
44   -0.054196
45    0.701610
46   -0.856774
47   -0.075511
48   -0.017693
49   -0.279571
dtype: float64

In [43]:
ser3.sort_values()

20   -1.691258
27   -1.553918
13   -1.394584
24   -1.174373
25   -1.147882
34   -1.110611
15   -1.033765
7    -1.015510
46   -0.856774
23   -0.806099
40   -0.690147
42   -0.678360
12   -0.615090
4    -0.602143
29   -0.513286
0    -0.416945
18   -0.407619
43   -0.400168
30   -0.354190
49   -0.279571
33   -0.185026
41   -0.169293
14   -0.138960
47   -0.075511
44   -0.054196
48   -0.017693
6     0.045340
19    0.055045
1     0.122219
5     0.169991
22    0.209167
10    0.212036
21    0.243539
35    0.308151
11    0.440208
2     0.493501
39    0.574730
45    0.701610
32    0.853895
36    0.957979
31    0.959203
16    1.133194
28    1.167035
37    1.170759
9     1.266336
26    1.282939
3     1.308076
17    1.572166
38    1.592287
8     1.793135
dtype: float64