# Pandas Cheatsheet

## Import Library

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

## Basic

### Series

In [3]:
s = pd.Series([10, 20, 30, 40, 50])
s

0    10
1    20
2    30
3    40
4    50
dtype: int64

### Series From List

In [4]:
data_list = [1,2,3,4,5]
s = pd.Series(data_list)
s

0    1
1    2
2    3
3    4
4    5
dtype: int64

### Series with Custom Index/Label

In [5]:
index_custom = ['a','b','c']
data_list = [1,2,3]
s = pd.Series(data=data_list, index=index_custom)
s

a    1
b    2
c    3
dtype: int64

### Series From Dictionary

In [6]:
data_dict = {
    'a': 10,
    'b': 20,
    'c': 30
}
s = pd.Series(data=data_dict)
s

a    10
b    20
c    30
dtype: int64

### Series with Different Data Type

In [7]:
mixed_data = [1,'dua',3.0,4.5,'lima']
s = pd.Series(data=mixed_data)
s

0       1
1     dua
2     3.0
3     4.5
4    lima
dtype: object

### DataFrame

In [8]:
df = pd.DataFrame({
    'nama': ['Orang 1', 'Orang 2', 'Orang 3'],
    'usia': [23, 50, 35],
    'kota': ['Jakarta','Bandung','Depok']
})
df

Unnamed: 0,nama,usia,kota
0,Orang 1,23,Jakarta
1,Orang 2,50,Bandung
2,Orang 3,35,Depok


### DataFrame from List

In [9]:
list_of_list = [
    [1,'Orang 1', 20],
    [2, 'Orang 2', 30],
    [3, 'Orang 3', 40]
]
columns = ['id','nama','usia']

df = pd.DataFrame(data=list_of_list, columns=columns)
df

Unnamed: 0,id,nama,usia
0,1,Orang 1,20
1,2,Orang 2,30
2,3,Orang 3,40


### DataFrame from Dictionary

In [10]:
data_dict = {
    'id': [1,2,3],
    'nama': ['Orang 1', 'Orang 2', 'Orang 3'],
    'usia': [15,20,30]
}

df = pd.DataFrame(data = data_dict)
df

Unnamed: 0,id,nama,usia
0,1,Orang 1,15
1,2,Orang 2,20
2,3,Orang 3,30


### Merge Series into DataFrame

In [11]:
s1 = pd.Series([1,2,3], name='id')
s2 = pd.Series(['Orang 1', 'Orang 2', 'Orang 3'], name='nama')
s3 = pd.Series([15,20,30], name='usia')

## .T (Transponse) dibutuhkan untuk mengubah baris menjadi kolom
df = pd.DataFrame([s1,s2,s3]).T
df

Unnamed: 0,id,nama,usia
0,1,Orang 1,15
1,2,Orang 2,20
2,3,Orang 3,30


### Getting Basic Information from DataFrame

In [12]:
data = {
    'A': np.random.randint(1, 100, 20),
    'B': np.random.randint(1, 100, 20),
    'C': np.random.randint(1, 100, 20),
    'D': np.random.randint(1, 100, 20)
}
df = pd.DataFrame(data=data)
df.head()

Unnamed: 0,A,B,C,D
0,1,99,87,96
1,86,38,5,4
2,32,13,86,74
3,98,70,96,24
4,29,98,81,4


In [13]:
df.tail()

Unnamed: 0,A,B,C,D
15,9,17,40,69
16,27,85,91,51
17,98,24,79,40
18,17,74,35,88
19,99,62,80,99


In [14]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   A       20 non-null     int64
 1   B       20 non-null     int64
 2   C       20 non-null     int64
 3   D       20 non-null     int64
dtypes: int64(4)
memory usage: 768.0 bytes


In [15]:
df.describe()

Unnamed: 0,A,B,C,D
count,20.0,20.0,20.0,20.0
mean,60.8,48.9,58.85,45.65
std,31.508729,33.23584,31.866952,34.319359
min,1.0,6.0,5.0,1.0
25%,31.25,15.25,34.75,14.5
50%,68.5,54.0,73.5,43.5
75%,86.75,75.25,83.75,74.5
max,99.0,99.0,96.0,99.0


### Identifyng Missing Data

In [16]:
data_missing = {
    'A': [1,2, np.nan, 4, 5],
    'B': [np.nan, 2, 3, 4, 5],
    'C': [1,2,3,4, np.nan]
}
df = pd.DataFrame(data=data_missing)
df

Unnamed: 0,A,B,C
0,1.0,,1.0
1,2.0,2.0,2.0
2,,3.0,3.0
3,4.0,4.0,4.0
4,5.0,5.0,


In [17]:
missing_data_count = df.isna().sum()
missing_data_count

A    1
B    1
C    1
dtype: int64

### Checking Data Distribution

In [18]:
ser_categorical = pd.Series(['apple','banana','apple','orange','banana','apple',
                            'apple','orange'])
value_counts = ser_categorical.value_counts()
type(value_counts) ## Series
value_counts

apple     4
banana    2
orange    2
Name: count, dtype: int64

### Checking Unique Values

In [19]:
data_sample = {
    'A': np.random.randint(1, 100, 3),
    'B': np.random.random(3),
    'C': ['apple','banana','cherry'],
    'D': pd.date_range("20230101", periods=3)
}
df = pd.DataFrame(data=data_sample)
data_types = df.dtypes
data_types

A             int64
B           float64
C            object
D    datetime64[ns]
dtype: object

In [20]:
unique_count = ser_categorical.nunique()
unique_count

3

In [21]:
unique_values = ser_categorical.unique().tolist()
unique_values

['apple', 'banana', 'orange']

### Checking Data Types

### Checking DataFrame Size

In [22]:
df_shape = df.shape
df_shape ## (3,4) 3 baris 4 kolom

(3, 4)

### Checking Columns and Index

In [23]:
df_columns = df.columns
df_index = df.index

df_columns, df_index

(Index(['A', 'B', 'C', 'D'], dtype='object'),
 RangeIndex(start=0, stop=3, step=1))

### Checking Memory Usage

In [24]:
memory_usage = df.memory_usage()
memory_usage

Index    128
A         24
B         24
C         24
D         24
dtype: int64

## Accessing Data

### Accessing Data in Series

In [25]:
s = pd.Series([10,20,30,40,50], index = ['a','b','c','d','e'])

# Mengakses element dengan index possisi
ele_pos = s[4]

# Mengakses element dengan index label
ele_label = s['c']

ele_pos, ele_label

  ele_pos = s[4]


(50, 30)

### Accessing Data in DataFrame

In [26]:
data_example = {
    'A': [1,2,3,4,5],
    'B': ['apple','banana','cherry','date','fig'],
    'C': [1.1, 2.2, 3.3, 4.4, 5.5]
}
df = pd.DataFrame(data=data_example)

column_A = df['A']
column_B = df['B']

column_A.head(), column_B.head()

(0    1
 1    2
 2    3
 3    4
 4    5
 Name: A, dtype: int64,
 0     apple
 1    banana
 2    cherry
 3      date
 4       fig
 Name: B, dtype: object)

### Access Data in DataFrame using iloc and loc

In [27]:
row_by_idx_position = df.iloc[2]
row_by_label = df.loc[2]

row_by_idx_position, row_by_label

(A         3
 B    cherry
 C       3.3
 Name: 2, dtype: object,
 A         3
 B    cherry
 C       3.3
 Name: 2, dtype: object)

### Accessing Data in DataFrame by Subset

In [28]:
df

Unnamed: 0,A,B,C
0,1,apple,1.1
1,2,banana,2.2
2,3,cherry,3.3
3,4,date,4.4
4,5,fig,5.5


In [29]:
subset_position = df.iloc[1:4, 0:2]
subset_position

Unnamed: 0,A,B
1,2,banana
2,3,cherry
3,4,date


In [30]:
subset_label = df.loc[1:3, ['A','B']]
subset_label

Unnamed: 0,A,B
1,2,banana
2,3,cherry
3,4,date


### Accessing Data in DataFrame using at and iat

In [31]:
data_at = df.at[2, 'B']
data_at

'cherry'

In [32]:
data_iat = df.iat[2, 1]
data_iat

'cherry'

### Creating Multi index DataFrame

In [33]:
arrays = [
    ['A','A','B','B'],
    [1,2,1,2]
]
multi_index = pd.MultiIndex.from_arrays(arrays, names=('letters', 'numbers'))
data_multi = {
    'data1': [10,20,30,40],
    'data2': [100,200,300,400]
}
df = pd.DataFrame(data_multi, index=multi_index)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,data1,data2
letters,numbers,Unnamed: 2_level_1,Unnamed: 3_level_1
A,1,10,100
A,2,20,200
B,1,30,300
B,2,40,400


In [34]:
rows_with_A = df.xs(key='A', level='letters')
rows_with_A

Unnamed: 0_level_0,data1,data2
numbers,Unnamed: 1_level_1,Unnamed: 2_level_1
1,10,100
2,20,200


In [35]:
df_query = df.query('data1 > 15 & data2 < 350')
df_query

Unnamed: 0_level_0,Unnamed: 1_level_0,data1,data2
letters,numbers,Unnamed: 2_level_1,Unnamed: 3_level_1
A,2,20,200
B,1,30,300


In [36]:
data_example = {
    'A': [1,2,3,4,5],
    'B': ['apple','banana','cherry','date','fig'],
    'C': [1.1, 2.2, 3.3, 4.4, 5.5]
}
df = pd.DataFrame(data=data_example)

taken_rows = df.take([1,3])
taken_rows

Unnamed: 0,A,B,C
1,2,banana,2.2
3,4,date,4.4


In [37]:
data = {
    'A': [i for i in range(1, 11)],
    'B': [chr(i) for i in range(65, 75)],
    'C': [i * 0.5 for i in range(1, 11)]
}
df = pd.DataFrame(data=data)
df

Unnamed: 0,A,B,C
0,1,A,0.5
1,2,B,1.0
2,3,C,1.5
3,4,D,2.0
4,5,E,2.5
5,6,F,3.0
6,7,G,3.5
7,8,H,4.0
8,9,I,4.5
9,10,J,5.0


In [38]:
subset_iloc = df.iloc[1:5, 0:2]
subset_iloc

Unnamed: 0,A,B
1,2,B
2,3,C
3,4,D
4,5,E


In [39]:
subset_loc = df.loc[1:4, ['A','B']]
subset_loc

Unnamed: 0,A,B
1,2,B
2,3,C
3,4,D
4,5,E


In [40]:
rows_greater_than_5 = df[df['A'] > 5]
rows_greater_than_5

Unnamed: 0,A,B,C
5,6,F,3.0
6,7,G,3.5
7,8,H,4.0
8,9,I,4.5
9,10,J,5.0


## Filtering Data

In [41]:
filtered_rows = df[df['A'] > 3]
filtered_rows

Unnamed: 0,A,B,C
3,4,D,2.0
4,5,E,2.5
5,6,F,3.0
6,7,G,3.5
7,8,H,4.0
8,9,I,4.5
9,10,J,5.0


In [42]:
multiple_conditions = df[(df['A'] > 3) & (df['C'] < 5)]
multiple_conditions

Unnamed: 0,A,B,C
3,4,D,2.0
4,5,E,2.5
5,6,F,3.0
6,7,G,3.5
7,8,H,4.0
8,9,I,4.5


In [43]:
rows_selected_value = df[df['B'].isin(['A','C','E'])]
rows_selected_value

Unnamed: 0,A,B,C
0,1,A,0.5
2,3,C,1.5
4,5,E,2.5


In [44]:
date_range = pd.date_range(start='2020-01-01', end='2020-01-10', freq='D')
df_date = pd.DataFrame(date_range, columns=['date'])
df_date['data'] = np.random.randint(0, 100, size=len(date_range))
df_date = df_date.set_index('date')

subset_date = df_date['2020-01-03':'2020-01-07']
subset_date

Unnamed: 0_level_0,data
date,Unnamed: 1_level_1
2020-01-03,92
2020-01-04,66
2020-01-05,9
2020-01-06,1
2020-01-07,42


In [45]:
subset_where = df_date['data'].where(df_date['data'] > 15)
subset_where

date
2020-01-01    98.0
2020-01-02    20.0
2020-01-03    92.0
2020-01-04    66.0
2020-01-05     NaN
2020-01-06     NaN
2020-01-07    42.0
2020-01-08    71.0
2020-01-09    53.0
2020-01-10    96.0
Name: data, dtype: float64

## Basic Operation

### Add New Column

In [46]:
data_example = {
    'A': [1,2,3,4,5],
    'B': ['apple','banana','cherry','date','fig'],
    'C': [1.1, 2.2, 3.3, 4.4, 5.5]
}
df = pd.DataFrame(data=data_example)

df['D'] = df['A'] * 10
df['E'] = 'new_column'
df.head()

Unnamed: 0,A,B,C,D,E
0,1,apple,1.1,10,new_column
1,2,banana,2.2,20,new_column
2,3,cherry,3.3,30,new_column
3,4,date,4.4,40,new_column
4,5,fig,5.5,50,new_column


### Deleting Column and Row

In [47]:
df_example_dropped_col = df.drop(columns=['E'])
df_example_dropped_col

Unnamed: 0,A,B,C,D
0,1,apple,1.1,10
1,2,banana,2.2,20
2,3,cherry,3.3,30
3,4,date,4.4,40
4,5,fig,5.5,50


In [48]:
df_example_dropped_row = df.drop(index = 4)
df_example_dropped_row

Unnamed: 0,A,B,C,D,E
0,1,apple,1.1,10,new_column
1,2,banana,2.2,20,new_column
2,3,cherry,3.3,30,new_column
3,4,date,4.4,40,new_column


## Applying Change to Data

In [49]:
df['B_Upper'] = df['B'].apply(lambda x: x.upper())
df[['B_Upper','B']]

Unnamed: 0,B_Upper,B
0,APPLE,apple
1,BANANA,banana
2,CHERRY,cherry
3,DATE,date
4,FIG,fig


### Replacing a Value

In [50]:
df_replaced = df.replace({
    'B': {
        'apple': 'grape',
        'banana': 'watermelon'
    }
})
df_replaced

Unnamed: 0,A,B,C,D,E,B_Upper
0,1,grape,1.1,10,new_column,APPLE
1,2,watermelon,2.2,20,new_column,BANANA
2,3,cherry,3.3,30,new_column,CHERRY
3,4,date,4.4,40,new_column,DATE
4,5,fig,5.5,50,new_column,FIG


### Rename a Column

In [51]:
df_renamed = df.rename(columns={
    'A': 'X',
    'B': 'Y'
})
df_renamed

Unnamed: 0,X,Y,C,D,E,B_Upper
0,1,apple,1.1,10,new_column,APPLE
1,2,banana,2.2,20,new_column,BANANA
2,3,cherry,3.3,30,new_column,CHERRY
3,4,date,4.4,40,new_column,DATE
4,5,fig,5.5,50,new_column,FIG


## Resetting and Setting Index

In [52]:
df_set_index = df.set_index('B')
df_set_index

Unnamed: 0_level_0,A,C,D,E,B_Upper
B,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
apple,1,1.1,10,new_column,APPLE
banana,2,2.2,20,new_column,BANANA
cherry,3,3.3,30,new_column,CHERRY
date,4,4.4,40,new_column,DATE
fig,5,5.5,50,new_column,FIG


In [53]:
df_reset_index = df_set_index.reset_index()
df_reset_index

Unnamed: 0,B,A,C,D,E,B_Upper
0,apple,1,1.1,10,new_column,APPLE
1,banana,2,2.2,20,new_column,BANANA
2,cherry,3,3.3,30,new_column,CHERRY
3,date,4,4.4,40,new_column,DATE
4,fig,5,5.5,50,new_column,FIG


In [54]:
data = {
    'A': [i for i in range(1, 11)],
    'B': [chr(i) for i in range(65, 75)],
    'C': [i * 0.5 for i in range(1, 11)]
}
df = pd.DataFrame(data=data)
df

new_index = list(range(0,12))
reindex_df = df.reindex(new_index)
reindex_df

Unnamed: 0,A,B,C
0,1.0,A,0.5
1,2.0,B,1.0
2,3.0,C,1.5
3,4.0,D,2.0
4,5.0,E,2.5
5,6.0,F,3.0
6,7.0,G,3.5
7,8.0,H,4.0
8,9.0,I,4.5
9,10.0,J,5.0


## Pandas Method

### Statistic Method

In [55]:
np.random.seed(42)
df_stats = pd.DataFrame({
    'A': np.random.randn(100),
    'B': np.random.randint(1, 10, 100),
    'C': np.random.choice(['pencil','pen','eraser'], 100)
})
df_stats

Unnamed: 0,A,B,C
0,0.496714,9,pen
1,-0.138264,5,pen
2,0.647689,1,pen
3,1.523030,3,pen
4,-0.234153,8,eraser
...,...,...,...
95,-1.463515,3,eraser
96,0.296120,9,pencil
97,0.261055,3,pencil
98,0.005113,9,pencil


### Mean, Mode, Modus

In [56]:
mean_A = df_stats['A'].mean()
median_B = df_stats['B'].median()
mode_C = df_stats['C'].mode()

mean_A, median_B, mode_C

(-0.10384651739409384,
 4.0,
 0    pencil
 Name: C, dtype: object)

### Aggregate Method

In [57]:
aggregated = df_stats.agg({
    'A': ['mean','std','min','max'],
    'B': ['sum','median']
})
aggregated

Unnamed: 0,A,B
mean,-0.103847,
std,0.908168,
min,-2.619745,
max,1.852278,
sum,,473.0
median,,4.0


In [58]:
df_stats['length_C'] = df_stats['C'].apply(len)
df_stats[['C','length_C']]

Unnamed: 0,C,length_C
0,pen,3
1,pen,3
2,pen,3
3,pen,3
4,eraser,6
...,...,...
95,eraser,6
96,pencil,6
97,pencil,6
98,pencil,6


In [59]:
replacement = {
    'pencil': 'PCL',
    'pen': 'PN',
    'eraser': 'ERS'
}
df_stats['short_C'] = df_stats['C'].map(replacement)
df_stats

Unnamed: 0,A,B,C,length_C,short_C
0,0.496714,9,pen,3,PN
1,-0.138264,5,pen,3,PN
2,0.647689,1,pen,3,PN
3,1.523030,3,pen,3,PN
4,-0.234153,8,eraser,6,ERS
...,...,...,...,...,...
95,-1.463515,3,eraser,6,ERS
96,0.296120,9,pencil,6,PCL
97,0.261055,3,pencil,6,PCL
98,0.005113,9,pencil,6,PCL


### Method Chaining

In [60]:
average_A_notna = df_stats[df_stats['B'].notna()]['A'].mean()
average_A_notna

-0.10384651739409384

## Joining DataFrame

### Join with concat

In [63]:
df1 = pd.DataFrame({
    'A': ['A0', 'A1', 'A2', 'A3'],
    'B': ['B0', 'B1', 'B2', 'B3'],
    'C': ['C0', 'C1', 'C2', 'C3'],
    'D': ['D0', 'D1', 'D2', 'D3']
})

df2 = pd.DataFrame({
    'A': ['A4', 'A5', 'A6', 'A7'],
    'B': ['B4', 'B5', 'B6', 'B7'],
    'C': ['C4', 'C5', 'C6', 'C7'],
    'D': ['D4', 'D5', 'D6', 'D7']
})

concatenated = pd.concat([df1, df2])
concatenated

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3
0,A4,B4,C4,D4
1,A5,B5,C5,D5
2,A6,B6,C6,D6
3,A7,B7,C7,D7


In [68]:
df_row_reindex = pd.concat([df1,df2], ignore_index=True)
df_row_reindex

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3
4,A4,B4,C4,D4
5,A5,B5,C5,D5
6,A6,B6,C6,D6
7,A7,B7,C7,D7


In [69]:
df_concat_along_column = pd.concat([df1,df2], axis=1)
df_concat_along_column

Unnamed: 0,A,B,C,D,A.1,B.1,C.1,D.1
0,A0,B0,C0,D0,A4,B4,C4,D4
1,A1,B1,C1,D1,A5,B5,C5,D5
2,A2,B2,C2,D2,A6,B6,C6,D6
3,A3,B3,C3,D3,A7,B7,C7,D7


### Join with Merge

In [64]:
df_left = pd.DataFrame({
    'key': ['K0', 'K1', 'K2', 'K3'],
    'A': ['A0', 'A1', 'A2', 'A3'],
    'B': ['B0', 'B1', 'B2', 'B3']
})

df_right = pd.DataFrame({
    'key': ['K0', 'K1', 'K2', 'K3'],
    'C': ['C0', 'C1', 'C2', 'C3'],
    'D': ['D0', 'D1', 'D2', 'D3']
})

merged = df_left.merge(df_right, on='key')
merged

Unnamed: 0,key,A,B,C,D
0,K0,A0,B0,C0,D0
1,K1,A1,B1,C1,D1
2,K2,A2,B2,C2,D2
3,K3,A3,B3,C3,D3


In [71]:
merged_suffix = df_left.merge(df_right, on='key', suffixes=('_left', '_right'))
merged_suffix

Unnamed: 0,key,A,B,C,D
0,K0,A0,B0,C0,D0
1,K1,A1,B1,C1,D1
2,K2,A2,B2,C2,D2
3,K3,A3,B3,C3,D3


In [73]:
df_left_join = pd.DataFrame({
    'A': ['A0', 'A1', 'A2'],
    'B': ['B0', 'B1', 'B2']
}, index=['K0', 'K1', 'K2'])

df_right_join = pd.DataFrame({
    'C': ['C0', 'C1', 'C2'],
    'D': ['D0', 'D1', 'D2']
}, index=['K0', 'K2', 'K3'])

merged_index = df_left_join.merge(df_right_join, left_index=True, right_index=True, suffixes=('_left','_right'))
merged_index

Unnamed: 0,A,B,C,D
K0,A0,B0,C0,D0
K2,A2,B2,C1,D1


### Join with Join

In [67]:
df_left_join = pd.DataFrame({
    'A': ['A0', 'A1', 'A2'],
    'B': ['B0', 'B1', 'B2']
}, index=['K0', 'K1', 'K2'])

df_right_join = pd.DataFrame({
    'C': ['C0', 'C1', 'C2'],
    'D': ['D0', 'D1', 'D2']
}, index=['K0', 'K2', 'K3'])

joined = df_left_join.join(df_right_join, how='outer')
joined

Unnamed: 0,A,B,C,D
K0,A0,B0,C0,D0
K1,A1,B1,,
K2,A2,B2,C1,D1
K3,,,C2,D2


## Sorting

### With sort_index()

In [75]:
df_unsorted_index = pd.DataFrame({
    'A': [3, 1, 2],
    'B': [33, 11, 22],
    'C': ['C3', 'C1', 'C2']
}, index=['K2', 'K0', 'K1'])
df_unsorted_index

df_sorted_index = df_unsorted_index.sort_index()
df_sorted_index

Unnamed: 0,A,B,C
K0,1,11,C1
K1,2,22,C2
K2,3,33,C3


### Sort by Single Column

In [76]:
df_sorted_values = df_unsorted_index.sort_values(by='A')
df_sorted_values

Unnamed: 0,A,B,C
K0,1,11,C1
K1,2,22,C2
K2,3,33,C3


### Sort by Multiple Column

In [77]:
df_sorted_multi_values = df_unsorted_index.sort_values(by=['A','B'])
df_sorted_multi_values

Unnamed: 0,A,B,C
K0,1,11,C1
K1,2,22,C2
K2,3,33,C3


### Reverse Sort

In [78]:
df_sorted_descending = df_unsorted_index.sort_values(by='A', ascending=False)
df_sorted_descending

Unnamed: 0,A,B,C
K2,3,33,C3
K1,2,22,C2
K0,1,11,C1


### Ascending-Descending Depends By Column

In [79]:
df_multi_sort = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Charlie', 'David'],
    'Age': [25, 25, 35, 30],
    'Score': [85, 90, 88, 78]
})
df_sorted_multi_order = df_multi_sort.sort_values(by=['Age','Score'], ascending=[True,False])
df_sorted_multi_order

Unnamed: 0,Name,Age,Score
1,Bob,25,90
0,Alice,25,85
3,David,30,78
2,Charlie,35,88


### Sort by MultiIndex

In [81]:
arrays = [
    ['K0','K0','K1','K2','K2','K3'],
    ['L0','L1','L0','L0','L1','L0']
]
index = pd.MultiIndex.from_arrays(arrays, names=('key1','key2'))
df_multiindex = pd.DataFrame({
    'A': [1,2,3,4,5,6],
    'B': [6,5,4,3,2,1]
}, index=index)
df_multiindex

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
K0,L0,1,6
K0,L1,2,5
K1,L0,3,4
K2,L0,4,3
K2,L1,5,2
K3,L0,6,1


In [82]:
df_sorted_multiindex = df_multiindex.sort_index(level='key2')
df_sorted_multiindex

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
K0,L0,1,6
K1,L0,3,4
K2,L0,4,3
K3,L0,6,1
K0,L1,2,5
K2,L1,5,2


### Sort by Categorical

In [85]:
df_categorical = pd.DataFrame({
    'grade': ['gold', 'silver', 'bronze', 'platinum', 'gold', 'silver']
})
order = ['bronze','silver','gold','platinum']
df_categorical['grade'] = pd.Categorical(df_categorical['grade'], categories=order, ordered=True)
df_categorical.info()

df_sorted_categorical = df_categorical.sort_values(by='grade')
df_sorted_categorical

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype   
---  ------  --------------  -----   
 0   grade   6 non-null      category
dtypes: category(1)
memory usage: 338.0 bytes


Unnamed: 0,grade
2,bronze
1,silver
5,silver
0,gold
4,gold
3,platinum
