In [1]:
# Chapter 5 - Getting started with panda | Essential Functionality

## Reindexing

In [2]:
import numpy as np
import pandas as pd

In [3]:
# New obj series
obj = pd.Series([2.7,1.2,1.9,9.5], index=["d","b","a","c"])
obj

d    2.7
b    1.2
a    1.9
c    9.5
dtype: float64

In [4]:
# Reindex objek
algn_obj = obj.reindex(index=["a","b","c","d","e"])
algn_obj

a    1.9
b    1.2
c    9.5
d    2.7
e    NaN
dtype: float64

**ffill** > *Forward fill*

In [5]:
# Create new series
obj2 = pd.Series(["Merah","Kuning","Hijau"],index=[0,3,5])
obj2

0     Merah
3    Kuning
5     Hijau
dtype: object

In [6]:
# Without method
reindx_obj2 = obj2.reindex(np.arange(7))
reindx_obj2

0     Merah
1       NaN
2       NaN
3    Kuning
4       NaN
5     Hijau
6       NaN
dtype: object

In [7]:
# menggunakan method ffill
reindx_ffill_obj2 = obj2.reindex(np.arange(7), method="ffill")
reindx_ffill_obj2 

0     Merah
1     Merah
2     Merah
3    Kuning
4    Kuning
5     Hijau
6     Hijau
dtype: object

In [8]:
# Create df with given columns and index
frame = pd.DataFrame(np.arange(8).reshape(4,2),columns=["Dua","Satu"], index=['3rd','4th','1st','2nd'])
frame

Unnamed: 0,Dua,Satu
3rd,0,1
4th,2,3
1st,4,5
2nd,6,7


In [9]:
# Reindex index dan kolom
frame.reindex(index=['1st','2nd','3rd','4th'], columns=['Satu','Dua'])

Unnamed: 0,Satu,Dua
1st,5,4
2nd,7,6
3rd,1,0
4th,3,2


In [10]:
# tanpa sepsifikasi nama argumen
frame.reindex(['Satu','Dua'])

Unnamed: 0,Dua,Satu
Satu,,
Dua,,


In [11]:
# With axis argumen
frame.reindex(['Satu','Dua','Tiga'], axis="columns")

Unnamed: 0,Satu,Dua,Tiga
3rd,1,0,
4th,3,2,
1st,5,4,
2nd,7,6,


In [12]:
# Documentation of reindex
print(pd.DataFrame.reindex.__doc__)


Conform Series/DataFrame to new index with optional filling logic.

Places NA/NaN in locations having no value in the previous index. A new object
is produced unless the new index is equivalent to the current one and
``copy=False``.

Parameters
----------

keywords for axes : array-like, optional
    New labels / index to conform to, should be specified using
    keywords. Preferably an Index object to avoid duplicating data.

method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'}
    Method to use for filling holes in reindexed DataFrame.
    Please note: this is only applicable to DataFrames/Series with a
    monotonically increasing/decreasing index.

    * None (default): don't fill gaps
    * pad / ffill: Propagate last valid observation forward to next
      valid.
    * backfill / bfill: Use next valid observation to fill gap.
    * nearest: Use nearest valid observations to fill gap.

copy : bool, default True
    Return a new object, even if the passed indexes are the 

Ada cara lain untuk me-reindex menggunakan fungsi *loc* sebagaimana yang sudah kita pelajari.

In [13]:
# Reindex menggunakan loc
frame.loc[['1st','2nd','3rd','4th'],['Satu','Dua']]

Unnamed: 0,Satu,Dua
1st,5,4
2nd,7,6
3rd,1,0
4th,3,2


In [14]:
# Exception jika index atau column tidak tersedia
try:
    frame.loc[['1st','2nd','3rd','4th'],['Satu','Dua','Tiga']]
except Exception as ex:
    print(ex)

"['Tiga'] not in index"


## Dropping Entries from an Axis

In [15]:
series = pd.Series(np.arange(6),index=['a','b','c','d','e','f'])
series

a    0
b    1
c    2
d    3
e    4
f    5
dtype: int32

In [16]:
# Drop row menggunakna fungsi drop
series.drop('a')

b    1
c    2
d    3
e    4
f    5
dtype: int32

In [17]:
# Same but more complex
series.loc[series.index != 'a']

b    1
c    2
d    3
e    4
f    5
dtype: int32

In [18]:
# Drop multiple row
series.drop(['a','f'])

b    1
c    2
d    3
e    4
dtype: int32

In [19]:
# fungsi drop tidak merubah kondisi objek awal
series

a    0
b    1
c    2
d    3
e    4
f    5
dtype: int32

#### Dropping entries on DataFrame

In [20]:
# columns name
columns = ["Satu","Dua","Tiga","Empat"]
index_name = ["1st","2nd","3rd",'4th']

frame2 = pd.DataFrame(np.arange(16).reshape(4,4), columns=columns, index=index_name)
frame2

Unnamed: 0,Satu,Dua,Tiga,Empat
1st,0,1,2,3
2nd,4,5,6,7
3rd,8,9,10,11
4th,12,13,14,15


In [21]:
# Droping by index
frame2.drop(index=['1st','4th'])

Unnamed: 0,Satu,Dua,Tiga,Empat
2nd,4,5,6,7
3rd,8,9,10,11


In [22]:
# Dropping by columns name
frame2.drop(columns=['Dua','Empat'])

Unnamed: 0,Satu,Tiga
1st,0,2
2nd,4,6
3rd,8,10
4th,12,14


In [23]:
frame2.loc["2nd","Satu"]

4

## Indexing, Selecting and Filtering

In [24]:
np_array = np.arange(3)
np_array

array([0, 1, 2])

In [25]:
series = pd.Series(np.arange(4), index=list('abcd'))
series["b"]

1

In [26]:
series[1]

1

In [27]:
# Slicing posisi index
series[2:4]

c    2
d    3
dtype: int32

In [28]:
# Slicing menggunakan label
series["c":"e"]

c    2
d    3
dtype: int32

In [29]:
# index array
series[[0,3,1]]

a    0
d    3
b    1
dtype: int32

In [30]:
# index array
series[['a','d','b']]

a    0
d    3
b    1
dtype: int32

In [31]:
# boolean filtering
series[series%2 != 0]

b    1
d    3
dtype: int32

In [32]:
# Prefered way menggunakan operator loc
series.loc[['a','d','b']]

a    0
d    3
b    1
dtype: int32

In [33]:
# Prove preffer loc rather than []
ser1 = pd.Series([1,2,3], index=[2,0,1])
ser2 = pd.Series([1,2,3], index=['a','b','c'])

In [34]:
ser1

2    1
0    2
1    3
dtype: int64

In [35]:
ser2

a    1
b    2
c    3
dtype: int64

In [36]:
ser1[[0,1,2]]

0    2
1    3
2    1
dtype: int64

In [37]:
ser2[[0,1,2]]

a    1
b    2
c    3
dtype: int64

In [38]:
# Using loc
ser1.iloc[[0,1,2]]

2    1
0    2
1    3
dtype: int64

In [39]:
# Using loc
ser1.iloc[[0,1,2]]

2    1
0    2
1    3
dtype: int64

In [40]:
ser2.iloc[[0,1,2]]

a    1
b    2
c    3
dtype: int64

In [41]:
ser2.loc[['a','b','c']]

a    1
b    2
c    3
dtype: int64

In [42]:
ser1.loc[[0,1,2]]

0    2
1    3
2    1
dtype: int64

*Indexing into DataFrame*

In [43]:
frame = pd.DataFrame(np.arange(16).reshape(4,4).T, columns=['Satu','Dua','Tiga','Empat'], 
                     index=['Jakarta','Bogor','Bandung','Jogja'])
frame

Unnamed: 0,Satu,Dua,Tiga,Empat
Jakarta,0,4,8,12
Bogor,1,5,9,13
Bandung,2,6,10,14
Jogja,3,7,11,15


In [44]:
frame['Satu']

Jakarta    0
Bogor      1
Bandung    2
Jogja      3
Name: Satu, dtype: int32

In [45]:
frame[['Satu','Empat']]

Unnamed: 0,Satu,Empat
Jakarta,0,12
Bogor,1,13
Bandung,2,14
Jogja,3,15


In [46]:
# memberikan slicing pada square brancket akan mengindex baris
frame[:2]

Unnamed: 0,Satu,Dua,Tiga,Empat
Jakarta,0,4,8,12
Bogor,1,5,9,13


In [47]:
# Select hanya bilangan genap
(frame %2 == 0) & (frame != 0)

Unnamed: 0,Satu,Dua,Tiga,Empat
Jakarta,False,True,True,True
Bogor,False,False,False,False
Bandung,True,True,True,True
Jogja,False,False,False,False


In [48]:
frame[(frame %2 == 0) & (frame != 0)]

Unnamed: 0,Satu,Dua,Tiga,Empat
Jakarta,,4.0,8.0,12.0
Bogor,,,,
Bandung,2.0,6.0,10.0,14.0
Jogja,,,,


In [49]:
frame[(frame %2 == 0) & (frame != 0)]

Unnamed: 0,Satu,Dua,Tiga,Empat
Jakarta,,4.0,8.0,12.0
Bogor,,,,
Bandung,2.0,6.0,10.0,14.0
Jogja,,,,


In [50]:
frame[(frame %2 == 0) & (frame != 0)] = 88
frame

Unnamed: 0,Satu,Dua,Tiga,Empat
Jakarta,0,88,88,88
Bogor,1,5,9,13
Bandung,88,88,88,88
Jogja,3,7,11,15


*Selection using loc and iloc*

In [51]:
frame.loc['Bogor']

Satu      1
Dua       5
Tiga      9
Empat    13
Name: Bogor, dtype: int32

In [52]:
frame.loc[['Bogor','Jakarta']]

Unnamed: 0,Satu,Dua,Tiga,Empat
Bogor,1,5,9,13
Jakarta,0,88,88,88


In [53]:
frame.loc[['Bogor','Jakarta'],['Satu','Tiga']]

Unnamed: 0,Satu,Tiga
Bogor,1,9
Jakarta,0,88


In [54]:
# Iloc
frame.iloc[0]

Satu      0
Dua      88
Tiga     88
Empat    88
Name: Jakarta, dtype: int32

In [55]:
frame.iloc[[0,2]]

Unnamed: 0,Satu,Dua,Tiga,Empat
Jakarta,0,88,88,88
Bandung,88,88,88,88


In [56]:
# Indexing ilco with array on row and column  
frame.iloc[[0,2],[0,3,2]]

Unnamed: 0,Satu,Empat,Tiga
Jakarta,0,88,88
Bandung,88,88,88


In [57]:
# Indexing loc with slicing on row and column
frame.loc[:"Bandung",["Satu","Empat","Dua"]]

Unnamed: 0,Satu,Empat,Dua
Jakarta,0,88,88
Bogor,1,13,5
Bandung,88,88,88


In [58]:
# Indexing iloc with slicing
frame.iloc[:,0:3]

Unnamed: 0,Satu,Dua,Tiga
Jakarta,0,88,88
Bogor,1,5,9
Bandung,88,88,88
Jogja,3,7,11


*Boolean Array using loc and iloc*

In [59]:
frame.loc[frame['Satu'] % 2 != 0]

Unnamed: 0,Satu,Dua,Tiga,Empat
Bogor,1,5,9,13
Jogja,3,7,11,15


In [60]:
# Cant using boolean array using iloc 
try:
    frame.iloc[frame['Satu'] % 2 != 0]
except Exception as ex:
    print (ex)

iLocation based boolean indexing cannot use an indexable as a mask


### Pitfalls

In [61]:
frame = pd.DataFrame(np.arange(16).reshape(4,4).T, columns=['Satu','Dua','Tiga','Empat'], 
                     index=['Jakarta','Bogor','Bandung','Jogja'])
frame

Unnamed: 0,Satu,Dua,Tiga,Empat
Jakarta,0,4,8,12
Bogor,1,5,9,13
Bandung,2,6,10,14
Jogja,3,7,11,15


*Chaining index*

In [62]:
frame.loc[:,"Satu"] = 1

In [63]:
frame

Unnamed: 0,Satu,Dua,Tiga,Empat
Jakarta,1,4,8,12
Bogor,1,5,9,13
Bandung,1,6,10,14
Jogja,1,7,11,15


In [64]:
frame.iloc[2] = 5

In [65]:
frame

Unnamed: 0,Satu,Dua,Tiga,Empat
Jakarta,1,4,8,12
Bogor,1,5,9,13
Bandung,5,5,5,5
Jogja,1,7,11,15


In [66]:
 frame.loc[frame['Empat']>5] = 3

In [67]:
frame

Unnamed: 0,Satu,Dua,Tiga,Empat
Jakarta,3,3,3,3
Bogor,3,3,3,3
Bandung,5,5,5,5
Jogja,3,3,3,3


In [68]:
# Chained
frame.loc[frame['Tiga']== 5]['Tiga']

Bandung    5
Name: Tiga, dtype: int32

In [69]:
frame.loc[frame['Tiga'] == 5,'Tiga']

Bandung    5
Name: Tiga, dtype: int32

In [70]:
frame.loc[frame['Tiga']== 5]['Tiga'] = 40

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  frame.loc[frame['Tiga']== 5]['Tiga'] = 40


In [71]:
frame.loc[frame['Tiga'] == 5,'Tiga'] = 99
frame

Unnamed: 0,Satu,Dua,Tiga,Empat
Jakarta,3,3,3,3
Bogor,3,3,3,3
Bandung,5,5,99,5
Jogja,3,3,3,3


## Arithmetic and Data Alignment

In [72]:
# Create serries
s1 = pd.Series([7.3, -2.5, 3.4, 1.5], index=["a", "c", "d", "e"])
s2 = pd.Series([-2.1, 3.6, -1.5, 4, 3.1],index=["a", "c", "e", "f", "g"])

In [73]:
s1

a    7.3
c   -2.5
d    3.4
e    1.5
dtype: float64

In [74]:
s2

a   -2.1
c    3.6
e   -1.5
f    4.0
g    3.1
dtype: float64

In [75]:
s1+s2

a    5.2
c    1.1
d    NaN
e    0.0
f    NaN
g    NaN
dtype: float64

In [76]:
# Data Frame
df1 = pd.DataFrame({'Satu':[24,25,26],'Dua':[27,26,25]},index=['a','b','c'])
df2 = pd.DataFrame({'Tiga':[11,12,13],'Dua':[4,5,6],'Satu':[0,1,2]},index=['c','b','a'])

In [77]:
df1

Unnamed: 0,Satu,Dua
a,24,27
b,25,26
c,26,25


In [78]:
df2

Unnamed: 0,Tiga,Dua,Satu
c,11,4,0
b,12,5,1
a,13,6,2


In [79]:
df1+df2

Unnamed: 0,Dua,Satu,Tiga
a,33,26,
b,31,26,
c,29,26,


**Arithmetic method with fill values**

In [80]:
# Useing method add dan argumen fill_value
df1.add(df2, fill_value=0)

Unnamed: 0,Dua,Satu,Tiga
a,33,26,13.0
b,31,26,12.0
c,29,26,11.0


In [81]:
df1.reindex()

Unnamed: 0,Satu,Dua
a,24,27
b,25,26
c,26,25


In [82]:
df1.reindex(columns=['Satu','Dua','Tiga','Empat'], fill_value=0)

Unnamed: 0,Satu,Dua,Tiga,Empat
a,24,27,0,0
b,25,26,0,0
c,26,25,0,0


*Operation between dataframe and series*

In [83]:
arr = np.arange(12).reshape(3,4)
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [84]:
arr[0]

array([0, 1, 2, 3])

In [85]:
# Boradcasting
arr - arr[0]

array([[0, 0, 0, 0],
       [4, 4, 4, 4],
       [8, 8, 8, 8]])

Same with NumPy. Operation between series and dataframe is similiar

In [86]:
frame = pd.DataFrame(np.arange(12).reshape(3,4), columns=list('abcd'),index=('Jakarta','Bogor','Bandung'))
frame

Unnamed: 0,a,b,c,d
Jakarta,0,1,2,3
Bogor,4,5,6,7
Bandung,8,9,10,11


In [87]:
series = frame.iloc[0]
series

a    0
b    1
c    2
d    3
Name: Jakarta, dtype: int32

In [88]:
# Broadcasting
frame-series

Unnamed: 0,a,b,c,d
Jakarta,0,0,0,0
Bogor,4,4,4,4
Bandung,8,8,8,8


*If an index value is not found in either the DataFrame’s columns or the Series’s index, the objects will be reindexed to form the union:*

In [89]:
frame

Unnamed: 0,a,b,c,d
Jakarta,0,1,2,3
Bogor,4,5,6,7
Bandung,8,9,10,11


In [90]:
series2 = frame['a']
series2

Jakarta    0
Bogor      4
Bandung    8
Name: a, dtype: int32

In [91]:
frame + series2

Unnamed: 0,Bandung,Bogor,Jakarta,a,b,c,d
Jakarta,,,,,,,
Bogor,,,,,,,
Bandung,,,,,,,


In [92]:
frame.add(series2, axis='index')

Unnamed: 0,a,b,c,d
Jakarta,0,1,2,3
Bogor,8,9,10,11
Bandung,16,17,18,19


In [93]:
series2

Jakarta    0
Bogor      4
Bandung    8
Name: a, dtype: int32

## Function Application and Mapping

In [94]:
frame = pd.DataFrame(np.arange(9).reshape(3,3), columns=['Satu','Dua','Tiga'])
frame

Unnamed: 0,Satu,Dua,Tiga
0,0,1,2
1,3,4,5
2,6,7,8


NumPy universial function dapat digunakna pada DataFrame

In [95]:
# Fungsi pada numpy diterapkan pada Dataframe
np.power(frame,2)

Unnamed: 0,Satu,Dua,Tiga
0,0,1,4
1,9,16,25
2,36,49,64


Anda dapat menerapkan costum function sepanjang axis pada sebuah DataFrame

Fungsi `apply` dapat digunakan untuk menerapkan fungsi pada sebuah axis dari dataframe. (index atau column)
```
axis : {0 or 'index', 1 or 'columns'}, default 0
            Axis along which the function is applied:

            * 0 or 'index': apply function to each column.
            * 1 or 'columns': apply function to each row.
```
> Oke agak kebalik ya, hmmm dingat saja catatan ini

In [96]:
frame

Unnamed: 0,Satu,Dua,Tiga
0,0,1,2
1,3,4,5
2,6,7,8


In [97]:
print(frame)

   Satu  Dua  Tiga
0     0    1     2
1     3    4     5
2     6    7     8


In [98]:
def gap_max_min(x):
    print (x.max() - x.min())
    return x.max() - x.min()

In [99]:
frame.apply(gap_max_min)
# Defalut value axis adalah kolom atau 0

6
6
6


Satu    6
Dua     6
Tiga    6
dtype: int32

In [100]:
# Penarapan function pada setiap baris index
frame.apply(gap_max_min, axis='columns')

2
2
2


0    2
1    2
2    2
dtype: int32

Return value pada fungsi `apply` tidak harus selalu mengembalikan scalar, namun dapat juga mengembalikan *Series*

In [101]:
def dsply_min_max(x):
    print(pd.Series((x.max(), x.min()), index=['max','min']))
    return pd.Series((x.max(), x.min()), index=['max','min'])

In [102]:
frame.apply(dsply_min_max)

max    6
min    0
dtype: int32
max    7
min    1
dtype: int32
max    8
min    2
dtype: int32


Unnamed: 0,Satu,Dua,Tiga
max,6,7,8
min,0,1,2


*Element-wise Python functions can be used, too. Suppose you wanted to compute
a formatted string from each floating-point value in frame. You can do this with
applymap:*

In [103]:
def my_format(x):
    '''
    Devided by 3 and format with 2 decimal end point
    '''
    return f"{x:.2f}"

In [104]:
rng = np.random.default_rng(seed=42)
df = pd.DataFrame(rng.standard_normal((3,3)),columns=['Satu','Dua','Tiga'])

df.applymap(my_format)

Unnamed: 0,Satu,Dua,Tiga
0,0.3,-1.04,0.75
1,0.94,-1.95,-1.3
2,0.13,-0.32,-0.02


In [105]:
# Why no only apply
try:
    df.apply(my_format)
except Exception as ex:
    print (ex)

unsupported format string passed to Series.__format__


*The reason for the name applymap is that Series has a map method for applying an
element-wise function:*

In [106]:
frame['Tiga'].map(my_format)

0    2.00
1    5.00
2    8.00
Name: Tiga, dtype: object

In [107]:
# For remember thing about map
def devided_2(x):
    return x/2
my_list = [0,2,4,6,8]
list(map(devided_2, my_list))

[0.0, 1.0, 2.0, 3.0, 4.0]

Apa perbedaan antara `apply` dan `applymap`

* `apply` berfungsi untuk mengaplikasikan fungsi pada **setiap baris** atau **setiap kolom** dari DataFrame
* `applymap` akan mengaplikasikan fungsi buatan pada **setiap element** pada DataFrame

In [108]:
df = pd.DataFrame(np.ones((3,3), dtype=np.int16),index=['Satu','Dua','Tiga'])
df

Unnamed: 0,0,1,2
Satu,1,1,1
Dua,1,1,1
Tiga,1,1,1


In [109]:
counts = 0
def count_loop(x):
    global counts
    counts += 1
    print(f"(ln/clmn/elmnt) ke - {counts} \r\n{x}")
    print("----")
    return(x)

In [110]:
df.apply(count_loop)

(ln/clmn/elmnt) ke - 1 
Satu    1
Dua     1
Tiga    1
Name: 0, dtype: int16
----
(ln/clmn/elmnt) ke - 2 
Satu    1
Dua     1
Tiga    1
Name: 1, dtype: int16
----
(ln/clmn/elmnt) ke - 3 
Satu    1
Dua     1
Tiga    1
Name: 2, dtype: int16
----


Unnamed: 0,0,1,2
Satu,1,1,1
Dua,1,1,1
Tiga,1,1,1


In [111]:
counts = 0
df.applymap(count_loop)

(ln/clmn/elmnt) ke - 1 
1
----
(ln/clmn/elmnt) ke - 2 
1
----
(ln/clmn/elmnt) ke - 3 
1
----
(ln/clmn/elmnt) ke - 4 
1
----
(ln/clmn/elmnt) ke - 5 
1
----
(ln/clmn/elmnt) ke - 6 
1
----
(ln/clmn/elmnt) ke - 7 
1
----
(ln/clmn/elmnt) ke - 8 
1
----
(ln/clmn/elmnt) ke - 9 
1
----


Unnamed: 0,0,1,2
Satu,1,1,1
Dua,1,1,1
Tiga,1,1,1


## Sorting and Ranking

*Sorting* berdasarkan index

In [112]:
# Sorting index series
series = pd.Series(np.arange(4),index=['d','a','c','b'])

series.sort_index()

a    1
b    3
c    2
d    0
dtype: int32

In [113]:
# Sorting index on DataFrame
frame = pd.DataFrame(np.arange(8).reshape(2,4),index=['Jakarta','Bandung'],columns=['b','d','a','c'])
frame.sort_index(axis='columns')

Unnamed: 0,a,b,c,d
Jakarta,2,0,3,1
Bandung,6,4,7,5


In [114]:
# Descanding order
frame.sort_index(axis=1, ascending=False)

Unnamed: 0,d,c,b,a
Jakarta,1,3,0,2
Bandung,5,7,4,6


*Sorting* berdasarkan nilai

In [115]:
series = pd.Series([8,0,3,1])
series.sort_values()

1    0
3    1
2    3
0    8
dtype: int64

In [118]:
# soritng series that contain missing value
series = pd.Series([8,np.nan,0,3,np.nan,1])
series.sort_values()

2    0.0
5    1.0
3    3.0
0    8.0
1    NaN
4    NaN
dtype: float64

In [119]:
series.sort_values(na_position='first')

1    NaN
4    NaN
2    0.0
5    1.0
3    3.0
0    8.0
dtype: float64

In [140]:
# Sorting dataframe
frame = pd.DataFrame({'b':[4, 7, -3, 2],'a':[0, 1, 0, 1]})
frame

Unnamed: 0,b,a
0,4,0
1,7,1
2,-3,0
3,2,1


In [143]:
# sort a column
frame.sort_values('b')

Unnamed: 0,b,a
2,-3,0
3,2,1
0,4,0
1,7,1


In [147]:
# sort columns
frame.sort_values(['a','b'])

Unnamed: 0,b,a
2,-3,0
0,4,0
3,2,1
1,7,1


`rank`
Ranking assigns ranks from one through the number of valid data points in an array,
starting from the lowest value. The rank methods for Series and DataFrame are the
place to look; by default, rank breaks ties by assigning each group the mean rank:

In [180]:
obj = pd.Series([7, -5, 7, 4, 2, 0, 4])
obj

0    7
1   -5
2    7
3    4
4    2
5    0
6    4
dtype: int64

In [181]:
obj.rank()

0    6.5
1    1.0
2    6.5
3    4.5
4    3.0
5    2.0
6    4.5
dtype: float64

How the compute this ?

Method `rank` tanpa diberikan argumen *method* akan menghitung menggunakan rata-rata (*mean*)

Misalkan : 
berikut adalah daftar angka 7, -5, 7, 4, 2, 0, 4

Jika diurutkan maka akan menghasilkan angka seperit ini -5, 0, 2, 4, 4, 7, 7


| Ranking | Values | Mean | calculate |
| :------: | :----: | :--: | :--- |
| 1 | -5 | 1 | |
| 2 | 0 | 2 | |
| 3 | 2 | 3 | |
| 4 | 4 | 4.5 | (4 + 5) / 2 = **4.5** |
| 5 | 4 | 4.5 | (4 + 5) / 2 = **4.5** |
| 6 | 7 | 6.5 | (6 + 7) / 2 = **6.5** |
| 7 | 7 | 6.5 | (6 + 7) / 2 = **6.5**  |

Setelah itu urutkan kembali nilai pada kolom calculate pada posisi sesuai dengan nilai Series

| Values | calculate |
| :---: | :--- | 
| 7 | **6.5** |
| -5 | **1** |
| 7 | **6.5** |
| 4 | **5.5** |
| 2 | **3** |
| 0 | **2** |
| 4 | **5.5** |


In [184]:
obj.rank(method='first')

0    6.0
1    1.0
2    7.0
3    4.0
4    3.0
5    2.0
6    5.0
dtype: float64

In [186]:
# Rank on DataFrame
frame = pd.DataFrame({"b": [4.3, 7, -3, 2], "a": [0, 1, 0, 1],"c": [-2, 5, 8, -2.5]})
frame

Unnamed: 0,b,a,c
0,4.3,0,-2.0
1,7.0,1,5.0
2,-3.0,0,8.0
3,2.0,1,-2.5


In [188]:
# Rank on DataFrame, on axis=index (default value)
frame.rank()

Unnamed: 0,b,a,c
0,3.0,1.5,2.0
1,4.0,3.5,3.0
2,1.0,1.5,4.0
3,2.0,3.5,1.0


In [189]:
# Rank on DataFrame, on axis=columns
frame.rank(axis='columns')

Unnamed: 0,b,a,c
0,3.0,2.0,1.0
1,3.0,1.0,2.0
2,1.0,2.0,3.0
3,3.0,2.0,1.0
