# **Pandas**

<hr>

## Selecting Subset of Rows and Columns

In [18]:
df1 = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
                  index = ['Jakarta', 'Bandung', 'Bekasi'],
                  columns=['Maret', 'April', 'Mei'])
df1

Unnamed: 0,Maret,April,Mei
Jakarta,1,2,3
Bandung,4,5,6
Bekasi,7,8,9


In [22]:
# menampilkan index 'Bekasi', kolom 'Mei'
df1.loc['Bekasi', 'Mei']

9

In [27]:
# menampilkan index Jakarta dan Bekasi, Kolom: Maret dan Mei
df1.loc[['Jakarta', 'Bekasi'], ['Maret', 'Mei']]

Unnamed: 0,Maret,Mei
Jakarta,1,3
Bekasi,7,9


In [26]:
# menampilkan index Jakarta dan Bekasi, Kolom: Maret
df1.loc[['Jakarta', 'Bekasi'], ['Maret']]

Unnamed: 0,Maret
Jakarta,1
Bekasi,7


In [28]:
df1.loc[['Jakarta'], ['Maret', 'April', 'Mei']]

Unnamed: 0,Maret,April,Mei
Jakarta,1,2,3


## Conditional Selection

In [41]:
df1 > 2

Unnamed: 0,Maret,April,Mei
Jakarta,False,False,True
Bandung,True,True,True
Bekasi,True,True,True


In [33]:
df1[df1 > 3]

Unnamed: 0,Maret,April,Mei
Jakarta,,,
Bandung,4.0,5.0,6.0
Bekasi,7.0,8.0,9.0


In [38]:
# menampilkan df1 yang kolom df1 'April' lebih besar dari 1
df1[df1['April'] > 1]

Unnamed: 0,Maret,April,Mei
Jakarta,1,2,3
Bandung,4,5,6
Bekasi,7,8,9


In [42]:
# menampilkan df1 yang kolom df1 'April' lebih besar dari 2
# sehingga baris Jakarta tidak dimunculkan, karena kolom 'Aprilnya' kurang dari 2
df1[df1['April'] > 2]

Unnamed: 0,Maret,April,Mei
Bandung,4,5,6
Bekasi,7,8,9


In [44]:
# menampilkan SELURUH df1 dengan syarat kolom 'Maret' < 5
df1[df1['Maret'] < 5]

Unnamed: 0,Maret,April,Mei
Jakarta,1,2,3
Bandung,4,5,6


In [48]:
# menampilkan df1 dengan syarat kolom 'Maret' < 5, tapi yang ditampilkan hanya kolom April
df1[df1['Maret'] < 5]['Mei']

Jakarta    3
Bandung    6
Name: Mei, dtype: int32

In [52]:
# menampilkan df1 dengan syarat kolom 'Maret' < 5,
# tapi yang dimpilkan kolom 'April' dan 'Mei'

df1[df1['Maret'] < 5][['April', 'Mei']]

Unnamed: 0,April,Mei
Jakarta,2,3
Bandung,5,6


In [53]:
# menampilkan df1 dengan SYARAT kolom 'Maret' > 1 dan kolom 'Mei' < 9
df1[(df1['Maret'] > 1) & (df1['Mei'] < 9)]

Unnamed: 0,Maret,April,Mei
Bandung,4,5,6


In [61]:
df1[(df1['Maret'] > 1) | (df1['Mei'] < 9)]

Unnamed: 0,Maret,April,Mei
Jakarta,1,2,3
Bandung,4,5,6
Bekasi,7,8,9


In [62]:
df1

Unnamed: 0,Maret,April,Mei
Jakarta,1,2,3
Bandung,4,5,6
Bekasi,7,8,9


## Index Details

In [100]:
df2 = df1.reset_index()

In [101]:
df2

Unnamed: 0,index,Maret,April,Mei
0,Jakarta,1,2,3
1,Bandung,4,5,6
2,Bekasi,7,8,9


In [102]:
# mengecek kolom dalam dataframe
df2.columns

Index(['index', 'Maret', 'April', 'Mei'], dtype='object')

In [105]:
df3 = df1.reset_index(drop=True)

In [106]:
df3

Unnamed: 0,Maret,April,Mei
0,1,2,3
1,4,5,6
2,7,8,9


In [72]:
goods = 'Smartphone Laptop Tablet'.split()
goods

['Smartphone', 'Laptop', 'Tablet']

In [73]:
df3['Items'] = goods
df3

Unnamed: 0,Maret,April,Mei,Items
0,1,2,3,Smartphone
1,4,5,6,Laptop
2,7,8,9,Tablet


In [75]:
df3.set_index('Items')

Unnamed: 0_level_0,Maret,April,Mei
Items,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Smartphone,1,2,3
Laptop,4,5,6
Tablet,7,8,9


In [76]:
df3

Unnamed: 0,Maret,April,Mei,Items
0,1,2,3,Smartphone
1,4,5,6,Laptop
2,7,8,9,Tablet


In [77]:
df3.set_index('Items', inplace=True)

In [80]:
df3

Unnamed: 0_level_0,Maret,April,Mei
Items,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Smartphone,1,2,3
Laptop,4,5,6
Tablet,7,8,9


## Multi-Index and Index Hierarchy

In [81]:
# Index Levels
outside = ['Jabodetabek', 'Jabodetabek', 'Jabodetabek', 'Outside', 'Outside', 'Outside']
inside = [1,2,3,1,2,3]
hier_index = list(zip(outside, inside))
hier_index

[('Jabodetabek', 1),
 ('Jabodetabek', 2),
 ('Jabodetabek', 3),
 ('Outside', 1),
 ('Outside', 2),
 ('Outside', 3)]

In [82]:
hier_index = pd.MultiIndex.from_tuples(hier_index)
hier_index

MultiIndex([('Jabodetabek', 1),
            ('Jabodetabek', 2),
            ('Jabodetabek', 3),
            (    'Outside', 1),
            (    'Outside', 2),
            (    'Outside', 3)],
           )

In [84]:
df_multi = pd.DataFrame(np.random.randn(6,2), index = hier_index, columns = ['Smartphone', 'PC'])
df_multi

Unnamed: 0,Unnamed: 1,Smartphone,PC
Jabodetabek,1,-1.644628,1.274329
Jabodetabek,2,-1.041751,-0.184408
Jabodetabek,3,-0.729091,-1.129195
Outside,1,0.53123,-0.227876
Outside,2,0.475701,0.859628
Outside,3,1.048208,-1.537253


In [87]:
df_multi.columns

Index(['Smartphone', 'PC'], dtype='object')

In [85]:
df_multi.loc['Jabodetabek']

Unnamed: 0,Smartphone,PC
1,-1.644628,1.274329
2,-1.041751,-0.184408
3,-0.729091,-1.129195


In [86]:
df_multi.loc['Jabodetabek'].loc[1]

Smartphone   -1.644628
PC            1.274329
Name: 1, dtype: float64

## Selecting = iloc

In [88]:
dfX = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
                  columns=['Maret', 'April', 'Mei'])
dfX

Unnamed: 0,Maret,April,Mei
0,1,2,3
1,4,5,6
2,7,8,9


In [108]:
# iloc [row, columns]
dfX.iloc[0:1, :]

Unnamed: 0,Maret,April,Mei
0,1,2,3
