In [3]:
import pandas as pd
import numpy as np

In [4]:
df = pd.DataFrame(np.array([[1,2,3],[4,5,6],[7,8,9]]),
                  index = ['Jakarta','Bandung','Bekasi'],
                  columns= ['Maret','April','Mei'])

df
                  

Unnamed: 0,Maret,April,Mei
Jakarta,1,2,3
Bandung,4,5,6
Bekasi,7,8,9


# Reset & Set Index

In [5]:
df2 = df.reset_index() # mengubah index menjadi kolom 
df2

Unnamed: 0,index,Maret,April,Mei
0,Jakarta,1,2,3
1,Bandung,4,5,6
2,Bekasi,7,8,9


In [6]:
df2.rename(columns={'index':'kota'}) # mengubah nama kolom (sementara)

Unnamed: 0,kota,Maret,April,Mei
0,Jakarta,1,2,3
1,Bandung,4,5,6
2,Bekasi,7,8,9


In [7]:
df3 = df.reset_index(drop=True) # index lama akan hilang
df3

Unnamed: 0,Maret,April,Mei
0,1,2,3
1,4,5,6
2,7,8,9


In [8]:
goods ='Smartphone Laptop Tablet'.split()
goods

['Smartphone', 'Laptop', 'Tablet']

In [9]:
df3['Items'] = goods
df3

Unnamed: 0,Maret,April,Mei,Items
0,1,2,3,Smartphone
1,4,5,6,Laptop
2,7,8,9,Tablet


In [10]:
df3.set_index('Items') # mengubah suatu kolom menjadi index (sementara)

Unnamed: 0_level_0,Maret,April,Mei
Items,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Smartphone,1,2,3
Laptop,4,5,6
Tablet,7,8,9


In [11]:
df3 # kalo ga pake inplace, akan balik ke df awal

Unnamed: 0,Maret,April,Mei,Items
0,1,2,3,Smartphone
1,4,5,6,Laptop
2,7,8,9,Tablet


In [12]:
df3.set_index('Items', inplace=True) 
# inplace untuk mengubah secara permanen
df3

Unnamed: 0_level_0,Maret,April,Mei
Items,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Smartphone,1,2,3
Laptop,4,5,6
Tablet,7,8,9


# Multi Index and Index Hierarchy

In [13]:
# index level
outside = ['Jabodetabek','Jabodetabek','Jabodetabek','Outside','Outside','Outside']
inside = [1,2,3,1,2,3]
hier_index = list(zip(outside,inside)) # memasangkan outside dan inside

hier_index # output berupa tuple

[('Jabodetabek', 1),
 ('Jabodetabek', 2),
 ('Jabodetabek', 3),
 ('Outside', 1),
 ('Outside', 2),
 ('Outside', 3)]

In [14]:
hier_index = pd.MultiIndex.from_tuples(hier_index)
hier_index
# mengubah tuple menjadi MultiIndex

MultiIndex([('Jabodetabek', 1),
            ('Jabodetabek', 2),
            ('Jabodetabek', 3),
            (    'Outside', 1),
            (    'Outside', 2),
            (    'Outside', 3)],
           )

In [15]:
df_multi = pd.DataFrame(np.random.randint(1,100,12).reshape(6,2), index=hier_index, columns=['Smartphone','PC'])
df_multi

Unnamed: 0,Unnamed: 1,Smartphone,PC
Jabodetabek,1,1,7
Jabodetabek,2,27,12
Jabodetabek,3,83,56
Outside,1,28,3
Outside,2,65,25
Outside,3,99,61


In [16]:
# indexing
df_multi.loc['Jabodetabek']

Unnamed: 0,Smartphone,PC
1,1,7
2,27,12
3,83,56


In [17]:
# indexing dari multiIndex dgn 2 kali .loc[]
df_multi.loc['Jabodetabek'].loc[1]

Smartphone    1
PC            7
Name: 1, dtype: int32

In [18]:
df_multi.index.names

FrozenList([None, None])

In [19]:
df_multi.index.names=['Kota','Lokasi']
df_multi

Unnamed: 0_level_0,Unnamed: 1_level_0,Smartphone,PC
Kota,Lokasi,Unnamed: 2_level_1,Unnamed: 3_level_1
Jabodetabek,1,1,7
Jabodetabek,2,27,12
Jabodetabek,3,83,56
Outside,1,28,3
Outside,2,65,25
Outside,3,99,61


# Excercise

In [20]:
# membuat nama index
region = ['Jawa','Jawa','Jawa','Sumatera','Sumatera','Sumatera','Sulawesi','Sulawesi','Sulawesi']
kota = ['Jakarta','Bandung','Surabaya','Medan','Padang','Palembang','Manado','Palu','Makassar']

# memasangkan index
indexMul = list(zip(region,kota))
indexMul

[('Jawa', 'Jakarta'),
 ('Jawa', 'Bandung'),
 ('Jawa', 'Surabaya'),
 ('Sumatera', 'Medan'),
 ('Sumatera', 'Padang'),
 ('Sumatera', 'Palembang'),
 ('Sulawesi', 'Manado'),
 ('Sulawesi', 'Palu'),
 ('Sulawesi', 'Makassar')]

In [21]:
# indexy yg sudah terpasang berbentuk tuple, diubah menjadi MultiIndex
indexMul = pd.MultiIndex.from_tuples(indexMul)
indexMul

MultiIndex([(    'Jawa',   'Jakarta'),
            (    'Jawa',   'Bandung'),
            (    'Jawa',  'Surabaya'),
            ('Sumatera',     'Medan'),
            ('Sumatera',    'Padang'),
            ('Sumatera', 'Palembang'),
            ('Sulawesi',    'Manado'),
            ('Sulawesi',      'Palu'),
            ('Sulawesi',  'Makassar')],
           )

In [22]:
# membuat DataFrame bernama permen
permen = pd.DataFrame(
    {'Manager':['Andi Susetyo','Budi Budiman','Cecep Maricep','Diana Wijaya','Eddy Susanto','Franky Sujaya','Galih Diantara','Hans Leman',        'Ingrid Mahesa'],
    'Total Produksi':[15,12,12,13,16,18,20,11,14],
    'Jumlah Karyawan':[20,15,12,14,18,14,25,12,13]
    }, index=indexMul
)

permen

Unnamed: 0,Unnamed: 1,Manager,Total Produksi,Jumlah Karyawan
Jawa,Jakarta,Andi Susetyo,15,20
Jawa,Bandung,Budi Budiman,12,15
Jawa,Surabaya,Cecep Maricep,12,12
Sumatera,Medan,Diana Wijaya,13,14
Sumatera,Padang,Eddy Susanto,16,18
Sumatera,Palembang,Franky Sujaya,18,14
Sulawesi,Manado,Galih Diantara,20,25
Sulawesi,Palu,Hans Leman,11,12
Sulawesi,Makassar,Ingrid Mahesa,14,13


In [23]:
# membuat index bisa terpisah dari dataframe dengan cara
# permen = permen.set_index(indexMul)

In [24]:
# membuat kolom 'Total Penjualan' dgn value 'Total Produksi' dikali 150 
permen['Total Penjualan'] = permen['Total Produksi']*150

permen

Unnamed: 0,Unnamed: 1,Manager,Total Produksi,Jumlah Karyawan,Total Penjualan
Jawa,Jakarta,Andi Susetyo,15,20,2250
Jawa,Bandung,Budi Budiman,12,15,1800
Jawa,Surabaya,Cecep Maricep,12,12,1800
Sumatera,Medan,Diana Wijaya,13,14,1950
Sumatera,Padang,Eddy Susanto,16,18,2400
Sumatera,Palembang,Franky Sujaya,18,14,2700
Sulawesi,Manado,Galih Diantara,20,25,3000
Sulawesi,Palu,Hans Leman,11,12,1650
Sulawesi,Makassar,Ingrid Mahesa,14,13,2100


In [25]:
# membuat nama untuk index
permen.index.names=['Region','Kota']

permen

Unnamed: 0_level_0,Unnamed: 1_level_0,Manager,Total Produksi,Jumlah Karyawan,Total Penjualan
Region,Kota,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Jawa,Jakarta,Andi Susetyo,15,20,2250
Jawa,Bandung,Budi Budiman,12,15,1800
Jawa,Surabaya,Cecep Maricep,12,12,1800
Sumatera,Medan,Diana Wijaya,13,14,1950
Sumatera,Padang,Eddy Susanto,16,18,2400
Sumatera,Palembang,Franky Sujaya,18,14,2700
Sulawesi,Manado,Galih Diantara,20,25,3000
Sulawesi,Palu,Hans Leman,11,12,1650
Sulawesi,Makassar,Ingrid Mahesa,14,13,2100


In [26]:
# index diubah menjadi kolom
permen2 = permen.reset_index()
permen2

Unnamed: 0,Region,Kota,Manager,Total Produksi,Jumlah Karyawan,Total Penjualan
0,Jawa,Jakarta,Andi Susetyo,15,20,2250
1,Jawa,Bandung,Budi Budiman,12,15,1800
2,Jawa,Surabaya,Cecep Maricep,12,12,1800
3,Sumatera,Medan,Diana Wijaya,13,14,1950
4,Sumatera,Padang,Eddy Susanto,16,18,2400
5,Sumatera,Palembang,Franky Sujaya,18,14,2700
6,Sulawesi,Manado,Galih Diantara,20,25,3000
7,Sulawesi,Palu,Hans Leman,11,12,1650
8,Sulawesi,Makassar,Ingrid Mahesa,14,13,2100
