## Indexing and Selection

#### Series

In [1]:
import pandas as pd
data = pd.Series([0.25,0.5,0.75,1.0], index = ['a', 'b', 'c', 'd'])
data

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64

In [2]:
'a' in data

True

In [3]:
'populasi' in data

False

In [5]:
list(data.items())

[('a', 0.25), ('b', 0.5), ('c', 0.75), ('d', 1.0)]

In [6]:
data['populasi'] = 100000

In [7]:
data

a                0.25
b                0.50
c                0.75
d                1.00
populasi    100000.00
dtype: float64

In [8]:
data

a                0.25
b                0.50
c                0.75
d                1.00
populasi    100000.00
dtype: float64

#### Slicing

In [9]:
data['a':'c']

a    0.25
b    0.50
c    0.75
dtype: float64

In [10]:
data[(data > 0.3) & (data < 0.8)]

b    0.50
c    0.75
dtype: float64

In [11]:
#fancy indexing
data[['a','c']]

a    0.25
c    0.75
dtype: float64

#### Indexer: loc, iloc, ix

In [12]:
data = pd.Series(['a', 'b', 'c'], index=[1,3,5])

In [13]:
data[1] # implicit ketika dipanggil

'a'

In [14]:
data[1:3] # eksplisit

3    b
5    c
dtype: object

In [15]:
data.loc[1]

'a'

In [16]:
data.loc[1:3]

1    a
3    b
dtype: object

In [17]:
data.iloc[1]

'b'

In [18]:
data.iloc[1:3]

3    b
5    c
dtype: object

#### DataFrame Slicing

In [39]:
population_dict = {'Jakarta': 750000, 'Bogor': 400000, 'Depok': 200000, 'Tanggerang': 150000, 'Bekasi': 234765}
population_dict = pd.Series(population_dict)
population_dict

Jakarta       750000
Bogor         400000
Depok         200000
Tanggerang    150000
Bekasi        234765
dtype: int64

In [40]:
luas_dict = {'Jakarta': 32453, 'Bogor': 435355, 'Depok': 655453, 'Tanggerang': 763463, 'Bekasi': 44564}
luas = pd.Series(luas_dict)
luas

Jakarta        32453
Bogor         435355
Depok         655453
Tanggerang    763463
Bekasi         44564
dtype: int64

In [47]:
daerah = pd.DataFrame({'pop': population_dict, 'luas':luas})
daerah

Unnamed: 0,pop,luas
Jakarta,750000,32453
Bogor,400000,435355
Depok,200000,655453
Tanggerang,150000,763463
Bekasi,234765,44564


In [42]:
daerah['luas']

Jakarta        32453
Bogor         435355
Depok         655453
Tanggerang    763463
Bekasi         44564
Name: luas, dtype: int64

In [43]:
daerah.luas

Jakarta        32453
Bogor         435355
Depok         655453
Tanggerang    763463
Bekasi         44564
Name: luas, dtype: int64

In [44]:
daerah.pop

<bound method NDFrame.pop of                pop    luas
Jakarta     750000   32453
Bogor       400000  435355
Depok       200000  655453
Tanggerang  150000  763463
Bekasi      234765   44564>

In [45]:
daerah['pop']

Jakarta       750000
Bogor         400000
Depok         200000
Tanggerang    150000
Bekasi        234765
Name: pop, dtype: int64

In [30]:
daerah.luas is daerah['luas']

True

In [31]:
daerah.pop is daerah['pop']

False

In [36]:
daerah.T['Jakarta']

pop     750000
luas     32453
Name: Jakarta, dtype: object

In [48]:
daerah['kepadatan'] = daerah['pop'] / daerah['luas']
daerah

Unnamed: 0,pop,luas,kepadatan
Jakarta,750000,32453,23.110344
Bogor,400000,435355,0.91879
Depok,200000,655453,0.305132
Tanggerang,150000,763463,0.196473
Bekasi,234765,44564,5.268041


In [50]:
daerah.loc[:'Depok', :'luas']

Unnamed: 0,pop,luas
Jakarta,750000,32453
Bogor,400000,435355
Depok,200000,655453


In [55]:
daerah.ix[:4, :'luas']

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  """Entry point for launching an IPython kernel.


Unnamed: 0,pop,luas
Jakarta,750000,32453
Bogor,400000,435355
Depok,200000,655453
Tanggerang,150000,763463


In [57]:
# masking
daerah.loc[daerah.kepadatan > 5, ['pop','kepadatan']]

Unnamed: 0,pop,kepadatan
Jakarta,750000,23.110344
Bekasi,234765,5.268041
