# Import Data

In [1]:
import pandas as pd
data = pd.read_csv('https://raw.githubusercontent.com/jbrownlee/Datasets/master/daily-total-female-births.csv')

In [2]:
print(data)

           Date  Births
0    1959-01-01      35
1    1959-01-02      32
2    1959-01-03      30
3    1959-01-04      31
4    1959-01-05      44
..          ...     ...
360  1959-12-27      37
361  1959-12-28      52
362  1959-12-29      48
363  1959-12-30      55
364  1959-12-31      50

[365 rows x 2 columns]


# Data Frame

In [3]:
# DataFrame
df = pd.DataFrame(data)

In [4]:
# Mengubah format pada kolom Date menjadi datetime

import datetime as dt
df['Date'] = pd.to_datetime(df['Date'])

In [5]:
# Mengubah format pada kolom Date menjadi format tahun dan bulan
df['Date'] = df['Date'].dt.strftime('%Y-%m')
print(df)

        Date  Births
0    1959-01      35
1    1959-01      32
2    1959-01      30
3    1959-01      31
4    1959-01      44
..       ...     ...
360  1959-12      37
361  1959-12      52
362  1959-12      48
363  1959-12      55
364  1959-12      50

[365 rows x 2 columns]


In [6]:
# Melakukan grouping kolom 'Births' berdasarkan 'Date' dan melakukan aggregation sum

df = pd.DataFrame(df.groupby('Date')['Births'].agg('sum'))
df

Unnamed: 0_level_0,Births
Date,Unnamed: 1_level_1
1959-01,1213
1959-02,1148
1959-03,1218
1959-04,1195
1959-05,1208
1959-06,1212
1959-07,1300
1959-08,1351
1959-09,1446
1959-10,1368


# Sliding Window

In [7]:
# Definisi isi kolom Births yang telah diagregasi sum ke sebuah variabel 'lst' dan definisi n yang digunakan 

lst = df['Births']
n = 3

# Membuat DataFrame baru (newdf) untuk kolom unique dari Date dengan nama kolom 'Month' untuk hasil sliding window

newdf=pd.DataFrame()
newdf.insert(0, 'month',data['Date'].unique())
newdf

Unnamed: 0,month
0,1959-01
1,1959-02
2,1959-03
3,1959-04
4,1959-05
5,1959-06
6,1959-07
7,1959-08
8,1959-09
9,1959-10


In [8]:
# Membuat DataFrame baru (df2) lainnya dengan kolom sebanyak n-1 

df2=pd.DataFrame()
df2=pd.DataFrame(columns=[f'm{x}' for x in range(n)])

# Mengisi setiap kolom menggunakan perulangan untuk sliding window

for i in range(len(lst)-n+1):     
    df2.loc[i] = [lst[i], lst[i+1], lst[i+2]]
    
df2

Unnamed: 0,m0,m1,m2
0,1213,1148,1218
1,1148,1218,1195
2,1218,1195,1208
3,1195,1208,1212
4,1208,1212,1300
5,1212,1300,1351
6,1300,1351,1446
7,1351,1446,1368
8,1446,1368,1350
9,1368,1350,1314


In [9]:
# Menambahkan kolom baru untuk average

df2['average'] = df2.mean(axis=1)
df2

Unnamed: 0,m0,m1,m2,average
0,1213,1148,1218,1193.0
1,1148,1218,1195,1187.0
2,1218,1195,1208,1207.0
3,1195,1208,1212,1205.0
4,1208,1212,1300,1240.0
5,1212,1300,1351,1287.666667
6,1300,1351,1446,1365.666667
7,1351,1446,1368,1388.333333
8,1446,1368,1350,1388.0
9,1368,1350,1314,1344.0


In [10]:
# Menggabungkan dataframe newdf dan df2

sliding_window=pd.concat([newdf,df2], axis=1, ignore_index=True)


# Menghapus data berisi NaN disebabkan jumlah baris newdf tidak sama dengan df2
# karena sliding window yang dapat dibentuk dengan n=3 hanya sampai bulan ke-10

sliding_window=sliding_window.dropna(axis=0)
sliding_window

Unnamed: 0,0,1,2,3,4
0,1959-01,1213,1148,1218,1193.0
1,1959-02,1148,1218,1195,1187.0
2,1959-03,1218,1195,1208,1207.0
3,1959-04,1195,1208,1212,1205.0
4,1959-05,1208,1212,1300,1240.0
5,1959-06,1212,1300,1351,1287.666667
6,1959-07,1300,1351,1446,1365.666667
7,1959-08,1351,1446,1368,1388.333333
8,1959-09,1446,1368,1350,1388.0
9,1959-10,1368,1350,1314,1344.0


In [11]:
# Mengubah nama kolom sesuai output yang diinginkan

sliding_window = sliding_window.set_axis(["Month", "m0", "m1", "m2", "Average"], axis=1)
sliding_window

Unnamed: 0,Month,m0,m1,m2,Average
0,1959-01,1213,1148,1218,1193.0
1,1959-02,1148,1218,1195,1187.0
2,1959-03,1218,1195,1208,1207.0
3,1959-04,1195,1208,1212,1205.0
4,1959-05,1208,1212,1300,1240.0
5,1959-06,1212,1300,1351,1287.666667
6,1959-07,1300,1351,1446,1365.666667
7,1959-08,1351,1446,1368,1388.333333
8,1959-09,1446,1368,1350,1388.0
9,1959-10,1368,1350,1314,1344.0
