In [None]:
#17. Resampling pada data deret waktu (time series data)

In [1]:
import pandas as pd
import numpy as np


In [2]:
# Data Frame

n_rows = 365 * 24
n_cols = 2
cols = ['col1', 'col2']

df = pd.DataFrame(np.random.randint(1, 20, size=(n_rows, n_cols)), 
                  columns=cols)

df.index = pd.util.testing.makeDateIndex(n_rows, freq='H')
df

  import pandas.util.testing


Unnamed: 0,col1,col2
2000-01-01 00:00:00,6,5
2000-01-01 01:00:00,16,3
2000-01-01 02:00:00,3,11
2000-01-01 03:00:00,17,17
2000-01-01 04:00:00,8,13
...,...,...
2000-12-30 19:00:00,15,3
2000-12-30 20:00:00,11,13
2000-12-30 21:00:00,8,4
2000-12-30 22:00:00,19,6


In [3]:
# Resampling data dengan interval monthly

df.resample('M')['col1'].sum().to_frame()

Unnamed: 0,col1
2000-01-31,7706
2000-02-29,6769
2000-03-31,7450
2000-04-30,7086
2000-05-31,7511
2000-06-30,7127
2000-07-31,7345
2000-08-31,7232
2000-09-30,7209
2000-10-31,7475


In [4]:
# Resampling data dengan interval daily

df.resample('D')['col1'].sum().to_frame()

Unnamed: 0,col1
2000-01-01,301
2000-01-02,241
2000-01-03,215
2000-01-04,242
2000-01-05,180
...,...
2000-12-26,267
2000-12-27,278
2000-12-28,297
2000-12-29,208


In [None]:
#18. Membentuk dummy Data Frame

In [5]:
# Membentuk Data Frame dari Dictionary
pd.DataFrame({'col1':[1, 2, 3, 4], 
              'col2':[5, 6, 7, 8]})

Unnamed: 0,col1,col2
0,1,5
1,2,6
2,3,7
3,4,8


In [6]:
# Membentuk Data Frame dari Numpy Array

n_rows = 5
n_cols = 3

arr = np.random.randint(1, 20, size=(n_rows, n_cols))
arr

array([[15, 14,  7],
       [ 5, 15, 15],
       [18, 11,  7],
       [ 3, 19,  3],
       [ 7,  3, 19]])

In [7]:
pd.DataFrame(arr, columns=tuple('ABC'))

Unnamed: 0,A,B,C
0,15,14,7
1,5,15,15
2,18,11,7
3,3,19,3
4,7,3,19


In [8]:
# Membentuk Data Frame dengan memanfaatkan pandas.util.testing


pd.util.testing.makeDataFrame().head()

pd.util.testing.makeMixedDataFrame().head()

pd.util.testing.makeTimeDataFrame().head()

pd.util.testing.makeMissingDataframe().head()

Unnamed: 0,A,B,C,D
jJibs4iQ7d,-1.004601,0.815119,-0.528371,-1.131521
lulh5eZcdW,-0.979053,-0.813058,-0.340395,0.192874
i6jt9LlT0S,0.044482,0.374939,,0.961909
I0QzYvrNRI,-2.126819,0.553515,,-0.739158
FsqzSXiJKI,0.956682,0.218226,-1.742875,-0.098693


In [9]:
#19. Formatting tampilan Data Frame

n_rows = 5
n_cols = 2
cols = ['omset', 'operasional']

df = pd.DataFrame(np.random.randint(1, 20, size=(n_rows, n_cols)), 
                  columns=cols)
df

Unnamed: 0,omset,operasional
0,11,12
1,15,2
2,3,18
3,4,4
4,3,1


In [10]:
df['omset'] = df['omset'] * 100_000
df['operasional'] = df['operasional'] * 10_000
df

Unnamed: 0,omset,operasional
0,1100000,120000
1,1500000,20000
2,300000,180000
3,400000,40000
4,300000,10000


In [11]:
df.index = pd.util.testing.makeDateIndex(n_rows, freq='D')
df = df.reset_index()
df = df.rename(columns={'index':'tanggal'})
df

Unnamed: 0,tanggal,omset,operasional
0,2000-01-01,1100000,120000
1,2000-01-02,1500000,20000
2,2000-01-03,300000,180000
3,2000-01-04,400000,40000
4,2000-01-05,300000,10000


In [12]:
# Melakukan formatting tampilan Data Frame

formatku = {'tanggal':'{:%d/%m/%y}', 
            'operasional':'Rp {:.2f}',
            'omset':'Rp {:.2f}'}

laporan = df.style.format(formatku)
laporan

Unnamed: 0,tanggal,omset,operasional
0,01/01/00,Rp 1100000.00,Rp 120000.00
1,02/01/00,Rp 1500000.00,Rp 20000.00
2,03/01/00,Rp 300000.00,Rp 180000.00
3,04/01/00,Rp 400000.00,Rp 40000.00
4,05/01/00,Rp 300000.00,Rp 10000.00


In [13]:
type(laporan)

pandas.io.formats.style.Styler

In [14]:
laporan.hide_index()

tanggal,omset,operasional
01/01/00,Rp 1100000.00,Rp 120000.00
02/01/00,Rp 1500000.00,Rp 20000.00
03/01/00,Rp 300000.00,Rp 180000.00
04/01/00,Rp 400000.00,Rp 40000.00
05/01/00,Rp 300000.00,Rp 10000.00


In [15]:
laporan.set_caption('Data Omset dan Operasional')

tanggal,omset,operasional
01/01/00,Rp 1100000.00,Rp 120000.00
02/01/00,Rp 1500000.00,Rp 20000.00
03/01/00,Rp 300000.00,Rp 180000.00
04/01/00,Rp 400000.00,Rp 40000.00
05/01/00,Rp 300000.00,Rp 10000.00


In [16]:
laporan.highlight_min('omset', color='pink')
laporan.highlight_max('omset', color='lightgreen')

laporan.highlight_min('operasional', color='lightblue')
laporan.highlight_max('operasional', color='grey')

tanggal,omset,operasional
01/01/00,Rp 1100000.00,Rp 120000.00
02/01/00,Rp 1500000.00,Rp 20000.00
03/01/00,Rp 300000.00,Rp 180000.00
04/01/00,Rp 400000.00,Rp 40000.00
05/01/00,Rp 300000.00,Rp 10000.00


In [None]:
#20. Menggabungkan (merge) dua Data Frame secara berdampingan

In [17]:
# Persiapan Data Frame

d1 = {'col1':[1, 2, 3], 
'col2':[10, 20, 30]}
df1 = pd.DataFrame(d1)
df1

Unnamed: 0,col1,col2
0,1,10
1,2,20
2,3,30


In [18]:
d2 = {'col3':[4, 5, 6], 
      'col4':[40, 50, 60]}
df2 = pd.DataFrame(d2)
df2

Unnamed: 0,col3,col4
0,4,40
1,5,50
2,6,60


In [19]:
# Menggabungkan (merge) dua Data Frame secara berdampingan

df = pd.merge(df1, df2, left_index=True, right_index=True)
df

Unnamed: 0,col1,col2,col3,col4
0,1,10,4,40
1,2,20,5,50
2,3,30,6,60
