### Time Series

In [2]:
import pandas as pd
import numpy as np

In [4]:
x = pd.Timestamp('20200220')
x

Timestamp('2020-02-20 00:00:00')

In [6]:
x = pd.DataFrame([{
    'no': 1,
    'waktu': pd.Timestamp('20200220')
}])
x

Unnamed: 0,no,waktu
0,1,2020-02-20


In [8]:
pd.date_range('20200306', periods=10)

DatetimeIndex(['2020-03-06', '2020-03-07', '2020-03-08', '2020-03-09',
               '2020-03-10', '2020-03-11', '2020-03-12', '2020-03-13',
               '2020-03-14', '2020-03-15'],
              dtype='datetime64[ns]', freq='D')

In [16]:
a = pd.DataFrame({
    'no':[1,2,3,4,5],
    'suhu': [30,32,35,34,23],
    'waktu': pd.date_range('20200306', periods=5)
})
a

Unnamed: 0,no,suhu,waktu
0,1,30,2020-03-06
1,2,32,2020-03-07
2,3,35,2020-03-08
3,4,34,2020-03-09
4,5,23,2020-03-10


In [17]:
# Cara paling mudah mengolah dataFrame dengan timeseries adalah dengan menjadikan kolom time series tersebut sebagai index
a = a.set_index('waktu')
a

Unnamed: 0_level_0,no,suhu
waktu,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-03-06,1,30
2020-03-07,2,32
2020-03-08,3,35
2020-03-09,4,34
2020-03-10,5,23


In [25]:
# Kemudahan dengan tipe datetime dan dijadikan index
a['2020']
a['2020-03']
a['2020-03-06':'2020-03-07']

Unnamed: 0_level_0,no,suhu
waktu,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-03-06,1,30
2020-03-07,2,32


#### Case Study

In [18]:
df = pd.read_csv('GOOG.csv')
df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2020-01-06,1350.0,1396.5,1350.0,1394.209961,1394.209961,1732300
1,2020-01-07,1397.939941,1402.98999,1390.380005,1393.339966,1393.339966,1502700
2,2020-01-08,1392.079956,1411.579956,1390.839966,1404.319946,1404.319946,1528000
3,2020-01-09,1420.569946,1427.329956,1410.27002,1419.829956,1419.829956,1500900
4,2020-01-10,1427.560059,1434.928955,1418.349976,1429.72998,1429.72998,1820700
5,2020-01-13,1436.130005,1440.52002,1426.02002,1439.22998,1439.22998,1652300
6,2020-01-14,1439.01001,1441.800049,1428.369995,1430.880005,1430.880005,1558900
7,2020-01-15,1430.209961,1441.39502,1430.209961,1439.199951,1439.199951,1282700
8,2020-01-16,1447.439941,1451.98999,1440.920044,1451.699951,1451.699951,1173700
9,2020-01-17,1462.910034,1481.295044,1458.219971,1480.390015,1480.390015,2396200


In [19]:
# Cek tipe data
df.dtypes

Date          object
Open         float64
High         float64
Low          float64
Close        float64
Adj Close    float64
Volume         int64
dtype: object

In [22]:
# Kenapa tanggal harus tipe datetime?
# Untuk memudahkan memanggil data jika datetime dijadikan index

In [27]:
# merubah format tanggal jadi datetime
df['Date'] = df['Date'].astype('datetime64')
df.dtypes

Date         datetime64[ns]
Open                float64
High                float64
Low                 float64
Close               float64
Adj Close           float64
Volume                int64
dtype: object

In [28]:
# Cara lain:
# Menggunakan parse_dates=['Date'] saat read files. (more info: catatan tutorial DataCamp)

#### All in One Time

In [32]:
df = pd.read_csv('GOOG.csv', parse_dates=['Date'], index_col='Date')
df

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-01-06,1350.0,1396.5,1350.0,1394.209961,1394.209961,1732300
2020-01-07,1397.939941,1402.98999,1390.380005,1393.339966,1393.339966,1502700
2020-01-08,1392.079956,1411.579956,1390.839966,1404.319946,1404.319946,1528000
2020-01-09,1420.569946,1427.329956,1410.27002,1419.829956,1419.829956,1500900
2020-01-10,1427.560059,1434.928955,1418.349976,1429.72998,1429.72998,1820700
2020-01-13,1436.130005,1440.52002,1426.02002,1439.22998,1439.22998,1652300
2020-01-14,1439.01001,1441.800049,1428.369995,1430.880005,1430.880005,1558900
2020-01-15,1430.209961,1441.39502,1430.209961,1439.199951,1439.199951,1282700
2020-01-16,1447.439941,1451.98999,1440.920044,1451.699951,1451.699951,1173700
2020-01-17,1462.910034,1481.295044,1458.219971,1480.390015,1480.390015,2396200


In [34]:
df['2020']

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-01-06,1350.0,1396.5,1350.0,1394.209961,1394.209961,1732300
2020-01-07,1397.939941,1402.98999,1390.380005,1393.339966,1393.339966,1502700
2020-01-08,1392.079956,1411.579956,1390.839966,1404.319946,1404.319946,1528000
2020-01-09,1420.569946,1427.329956,1410.27002,1419.829956,1419.829956,1500900
2020-01-10,1427.560059,1434.928955,1418.349976,1429.72998,1429.72998,1820700
2020-01-13,1436.130005,1440.52002,1426.02002,1439.22998,1439.22998,1652300
2020-01-14,1439.01001,1441.800049,1428.369995,1430.880005,1430.880005,1558900
2020-01-15,1430.209961,1441.39502,1430.209961,1439.199951,1439.199951,1282700
2020-01-16,1447.439941,1451.98999,1440.920044,1451.699951,1451.699951,1173700
2020-01-17,1462.910034,1481.295044,1458.219971,1480.390015,1480.390015,2396200


In [35]:
df['2020-02']

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-02-03,1462.0,1490.0,1458.98999,1485.939941,1485.939941,3055200
2020-02-04,1457.069946,1469.5,1426.300049,1447.069946,1447.069946,3933000
2020-02-05,1462.420044,1463.839966,1430.560059,1448.22998,1448.22998,1986200
2020-02-06,1450.329956,1482.0,1449.569946,1476.22998,1476.22998,1679400
2020-02-07,1467.300049,1485.839966,1466.349976,1479.22998,1479.22998,1172300
2020-02-10,1474.319946,1509.5,1474.319946,1508.680054,1508.680054,1419900
2020-02-11,1511.810059,1529.630005,1505.637939,1508.790039,1508.790039,1344600
2020-02-12,1514.47998,1520.694946,1508.109985,1518.27002,1518.27002,1167600
2020-02-13,1512.689941,1527.180054,1504.599976,1514.660034,1514.660034,929500
2020-02-14,1515.599976,1520.73999,1507.339966,1520.73999,1520.73999,1197800


In [36]:
df['2020-02-02':'2020-02-04']

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-02-03,1462.0,1490.0,1458.98999,1485.939941,1485.939941,3055200
2020-02-04,1457.069946,1469.5,1426.300049,1447.069946,1447.069946,3933000


#### RESAMPLE

In [42]:
# Mau tau rata-rata close tiap bulan ('M'), weekly ('W'), Quartal ('Q'), yearly ('Y'), daily ('D')
# df['Close'].resample('Q').mean()

Date
2020-03-31    1445.139515
Freq: Q-DEC, Name: Close, dtype: float64

In [43]:
# NOTES!
# Jika ada tanggal yang value nya kosong, disarankan tidak menghapus data (Jika menggunakan timeseries)
# What to do? Preferable forward filling missing value