# Tugas 4 : Data Time Series
Time series merupakan data yang berisi mengenai pengamatan suatu perubahan dalam rentang waktu yang telah ditentukan

In [1]:
import numpy as np
import pandas as pd

## Import data Airline-Passengers

In [2]:
data = pd.read_csv('https://raw.githubusercontent.com/jbrownlee/Datasets/master/airline-passengers.csv')

## Mengambil target data

In [3]:
target = data['Passengers']
print(target)

0      112
1      118
2      132
3      129
4      121
      ... 
139    606
140    508
141    461
142    390
143    432
Name: Passengers, Length: 144, dtype: int64


## Split Sequence data
merubah data yang awalnya hanya 1 fitur menjadi beberapa fitur

In [4]:
def split_sequence(sequence, n_steps):
  X, y = list(), list()
  for i in range(len(sequence)):
    # Mencari n akhir
    end_ix = i + n_steps
    # Memeriksa urutan terluar dari data
    if end_ix > len(sequence)-1:
      break
    # Menambahkan bagian input dan output
    seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
    X.append(seq_x)
    y.append(seq_y)
  return np.array(X), np.array(y)

# Menentukan Waktu dari data time series
jmlh_X = 2
print('Shapes Target = ',target.shape)
#Split data fitur dan target
X, y = split_sequence(target, jmlh_X)
print('Shapes X = ',X.shape)
print('Shapes Y = ',y.shape)

Shapes Target =  (144,)
Shapes X =  (142, 2)
Shapes Y =  (142,)


In [5]:
newFitur = pd.DataFrame(X, columns=['Xt-'+str(i) for i in range(jmlh_X-1, -1,-1)])
newTarget = pd.DataFrame(y, columns=['Data Prediksi'])

In [6]:
newData = pd.concat([newFitur,newTarget], axis=1)
newData

Unnamed: 0,Xt-1,Xt-0,Data Prediksi
0,112,118,132
1,118,132,129
2,132,129,121
3,129,121,135
4,121,135,148
...,...,...,...
137,535,622,606
138,622,606,508
139,606,508,461
140,508,461,390


In [7]:
#Menentukan data testing dan predict

In [8]:
#Normalisasi minmax scalar (test dan prediksi)
#mean squared error

## Normalisasi data Fitur
Menggunakan MinMaxScaler

In [9]:
from sklearn.preprocessing import MinMaxScaler
scaler= MinMaxScaler()
X_norm= scaler.fit_transform(newFitur)
print(X_norm)

[[0.01544402 0.02702703]
 [0.02702703 0.05405405]
 [0.05405405 0.04826255]
 [0.04826255 0.03281853]
 [0.03281853 0.05984556]
 [0.05984556 0.08494208]
 [0.08494208 0.08494208]
 [0.08494208 0.06177606]
 [0.06177606 0.02895753]
 [0.02895753 0.        ]
 [0.         0.02702703]
 [0.02702703 0.02123552]
 [0.02123552 0.04247104]
 [0.04247104 0.07142857]
 [0.07142857 0.05984556]
 [0.05984556 0.04054054]
 [0.04054054 0.08687259]
 [0.08687259 0.12741313]
 [0.12741313 0.12741313]
 [0.12741313 0.1042471 ]
 [0.1042471  0.05598456]
 [0.05598456 0.01930502]
 [0.01930502 0.06949807]
 [0.06949807 0.07915058]
 [0.07915058 0.08880309]
 [0.08880309 0.14285714]
 [0.14285714 0.11389961]
 [0.11389961 0.13127413]
 [0.13127413 0.14285714]
 [0.14285714 0.18339768]
 [0.18339768 0.18339768]
 [0.18339768 0.15444015]
 [0.15444015 0.11196911]
 [0.11196911 0.08108108]
 [0.08108108 0.11969112]
 [0.11969112 0.12934363]
 [0.12934363 0.14671815]
 [0.14671815 0.17181467]
 [0.17181467 0.14864865]
 [0.14864865 0.15250965]


In [10]:
#KNN (-kuadratkan)

## Split Data training dan Testing
Mengambil data tes sebanyak 20% dari seluruh data

In [11]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_norm, y, test_size=0.2, random_state=0)

## Menentukan model
Menggunakan Model KNN dengan jumlah n = 3

In [12]:
# import knn
from sklearn.neighbors import KNeighborsRegressor
model_knn = KNeighborsRegressor(n_neighbors=3)

In [13]:
model_knn.fit(X_train, y_train)
y_pred=model_knn.predict(X_test)

## Menghitung MSE (Mean Squared Error)
Rumus MSE :
$$ \text{MSE} = \frac{1}{n} \sum_{i=0}^n (y_i - \hat{y}_i)^2$$ 

In [14]:
from sklearn.metrics import mean_squared_error
mean_squared_error(y_test, y_pred)

2207.5363984674336

In [15]:
print(y_test.shape,y_pred.shape)

(29,) (29,)


## Hasil

In [16]:
df_y_test = pd.DataFrame(y_test,columns=['y_test'])
df_y_pred = pd.DataFrame(y_pred,columns=['y_pred'])

df_hasil = pd.concat([df_y_test, df_y_pred], axis=1)
df_hasil.head(29)

Unnamed: 0,y_test,y_pred
0,194,163.666667
1,203,252.0
2,170,143.666667
3,180,221.0
4,145,137.333333
5,119,125.0
6,318,375.333333
7,390,448.666667
8,318,343.666667
9,465,434.666667


### Hasil prediksi Terendah

In [17]:
y.min()

104

### Hasil prediksi Tertinggi

In [18]:
y.max()

622