In [41]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import numpy as np
from numpy import array
from sklearn.neighbors import KNeighborsRegressor
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel
from sklearn.svm import SVR
from sklearn.ensemble import BaggingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import StackingRegressor
from sklearn.linear_model import RidgeCV
from sklearn.svm import LinearSVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.dummy import DummyRegressor

In [42]:
data =  pd.read_csv('LUCK.JK.csv')
data = data['Volume']
data

0       750200
1       922100
2      1143100
3      1960200
4       922600
        ...   
243    2750600
244    2007700
245    2173100
246     934100
247     681800
Name: Volume, Length: 248, dtype: int64

In [43]:
data_training, data_testing = train_test_split(data, test_size=0.2, random_state=42, shuffle=False)


In [44]:
data_training_new = pd.DataFrame(data_training)
data_test_new = pd.DataFrame(data_testing)


In [45]:
Mm = MinMaxScaler()
train_scaled = Mm.fit_transform(data_training_new)
test_scaled = Mm.transform(data_test_new)


In [46]:
def split_sequence(sequence, n_steps):
    X, y = list(), list()
    for i in range(len(sequence)):
        end_ix = i + n_steps
        if end_ix > len(sequence)-1:
            break
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
        X.append(seq_x)
        y.append(seq_y)
    return array(X), array(y)

In [47]:
n_steps = range(1,4)
acc_fix = {}
for n in n_steps:
    X_train, y_train = split_sequence(train_scaled, n)
    X_test, y_test = split_sequence(test_scaled, n)
    #membuat kolom otomatis
    colom = []
    for c in range(n):
        if(c==0):
            colom.insert(0,'Xt')
        else:
            colom.insert(0,f'Xt-{c}')
    newX_train = pd.DataFrame(np.reshape(X_train,(len(X_train),n)),columns=colom)
    newY_train = pd.DataFrame(y_train, columns=['Output'])
    new_data_train = pd.concat([newX_train,newY_train],axis=1)

    newX_test = pd.DataFrame(np.reshape(X_test,(len(X_test),n)),columns=colom)
    newY_test = pd.DataFrame(y_test, columns=['Output'])
    new_data_test = pd.concat([newX_test,newY_test],axis=1)

    #membuat model
    k_nei = range(1,30)
    acc = []
    for k in k_nei:
        neigh = KNeighborsRegressor(n_neighbors=k)
        neigh.fit(newX_train, newY_train)
        y_pred = neigh.predict(newX_test)
        y_pred_shape = y_pred.reshape(-1,1)
        y_pred_inverse = Mm.inverse_transform(y_pred_shape)
        newY_test_shape = y_test.reshape(-1,1)
        newY_test_inverse = Mm.inverse_transform(newY_test_shape)
        mape = mean_absolute_percentage_error(y_pred_inverse, newY_test_inverse)
        acc.append(mape)
    acc_fix[f"n-{n} knn"] = [min(acc), acc.index(min(acc))]
    # predict gaussian regression
    karnel = DotProduct() + WhiteKernel()
    gpr = GaussianProcessRegressor(kernel=karnel,random_state=0).fit(newX_train, newY_train)
    y_pred = gpr.predict(newX_test, return_std=True)[0]
    y_pred_shape = y_pred.reshape(-1,1)
    y_pred_inverse = Mm.inverse_transform(y_pred_shape)
    newY_test_shape = y_test.reshape(-1,1)
    newY_test_inverse = Mm.inverse_transform(newY_test_shape)
    # print(y_pred_inverse)
    mape = mean_absolute_percentage_error(y_pred_inverse, newY_test_inverse)

    acc_fix[f"n-{n} gpr"] = [mape]
# predict SVR dengan Bagging Regression
    n_estimators = range(1, 30)
    acc_svr = []
    for n in n_estimators:
        regr = BaggingRegressor(SVR(), n_estimators=n,random_state=0).fit(newX_train, newY_train.values.ravel())
        # regr.fit(newX_train, newY_train)
        y_pred = regr.predict(newX_test)
        y_pred_shape = y_pred.reshape(-1,1)
        y_pred_inverse = Mm.inverse_transform(y_pred_shape)
        newY_test_shape = y_test.reshape(-1,1)
        newY_test_inverse = Mm.inverse_transform(newY_test_shape)
        mape = mean_absolute_percentage_error(newY_test_inverse, y_pred_inverse)
        acc_svr.append(mape)

    acc_fix[f"n-{n} SVR"] = [min(acc_svr), acc_svr.index(min(acc_svr))]
    # Predict dengan from RandomForestRegressor
    maxd = range(1,10)
    acc_rf = []
    for d in maxd:
        regr = RandomForestRegressor(max_depth=d, random_state=0).fit(newX_train, newY_train.values.ravel())
        y_pred = regr.predict(newX_test)
        y_pred_shape = y_pred.reshape(-1,1)
        y_pred_inverse = Mm.inverse_transform(y_pred_shape)
        newY_test_shape = y_test.reshape(-1,1)
        newY_test_inverse = Mm.inverse_transform(newY_test_shape)
        mape = mean_absolute_percentage_error(y_pred_inverse, newY_test_inverse)
        acc_rf.append(mape)
    
    # masukkan ke acc-fix
    acc_fix[f"n-{n} rf"] = [min(acc_rf) , acc_rf.index(min(acc_rf))]

    # Prdict dengan LinearSVR , Ridge pada stacking regressors
    estimators = [('lr', RidgeCV()),
                ('svr', LinearSVR(random_state=42))]
    acc_stac = []
    for e in n_estimators:
        reg = StackingRegressor(
            estimators,
            final_estimator=RandomForestRegressor(n_estimators=e,
                                                random_state=42))
        reg.fit(newX_train, newY_train.values.ravel())
        y_pred = reg.predict(newX_test)
        y_pred_shape = y_pred.reshape(-1,1)
        y_pred_inverse = Mm.inverse_transform(y_pred_shape)
        newY_test_shape = y_test.reshape(-1,1)
        newY_test_inverse = Mm.inverse_transform(newY_test_shape)
        mape = mean_absolute_percentage_error(y_pred_inverse, newY_test_inverse)
        acc_stac.append(mape)
    acc_fix[f"n-{n} svr"] = [min(acc_stac), acc_stac.index(min(acc_stac))]
    
    #Predict dengan Decission Tree Regresor
    dt_max_depth = range(1, 10)
    acc_dt = []
    for depth in dt_max_depth:
        dt = DecisionTreeRegressor(max_depth=depth)
        dt.fit(newX_train, newY_train)
        y_pred_dt = dt.predict(newX_test)
        y_pred_dt_shape = y_pred_dt.reshape(-1, 1)
        y_pred_dt_inverse = Mm.inverse_transform(y_pred_dt_shape)
        mape = mean_absolute_percentage_error(y_pred_dt_inverse, newY_test_inverse)
        acc_dt.append(mape)
        
    # masukkan ke acc-fix    
    acc_fix[f"n-{n} dt"] = [min(acc_dt) , acc_dt.index(min(acc_dt))+1]
    
    #Linier Regressor
    acc_lr =[]
    lr = LinearRegression()
    lr.fit(newX_train, newY_train)
    y_pred_lr = lr.predict(newX_test)
    y_pred_lr_shape = y_pred_lr.reshape(-1, 1)
    y_pred_lr_inverse = Mm.inverse_transform(y_pred_lr_shape)
    mape_lr = mean_absolute_percentage_error(y_pred_lr_inverse, newY_test_inverse)
    acc_lr.append(mape)
    
    # masukkan ke acc-fix    
    acc_fix[f"n-{n} lr"] = [min(acc_lr) , acc_lr.index(min(acc_lr))+1]
    
    #Dummy Regressor
    acc_dr = []
    dummy = DummyRegressor()
    dummy.fit(newX_train, newY_train)
    y_pred_dummy = dummy.predict(newX_test)
    y_pred_dummy_shape = y_pred_dummy.reshape(-1, 1)
    y_pred_dummy_inverse = Mm.inverse_transform(y_pred_dummy_shape)
    mape = mean_absolute_percentage_error(y_pred_dummy_inverse, newY_test_inverse)
    acc_dr.append(mape)
    
    
    # masukkan ke acc-fix    
    acc_fix[f"n-{n} dr"] = [min(acc_dr) , acc_dr.index(min(acc_dr))+1]



In [67]:
# import joblib
# filename = 'Mm.pkl'
# joblib.dump(Mm, filename) 

In [114]:
import joblib
filename = 'linearReg.pkl'
joblib.dump(lr, filename) 

['linearReg.pkl']

In [113]:
temp = 1
temp2 = {}
for m in acc_fix:
    if(acc_fix[m][0] > temp ):
        temp = acc_fix[m][0]
        temp2 = {}
        temp2[m] = acc_fix[m]
        # print(temp2)
    # else:
    #     temp2[m] = acc_fix[m]


print(f"Hasil Dari Berbagai Model yang memiliki MAPE terkecil adalah: {temp2}")
# acc_fix
# acc_fix[m][0]
# acc_fix[m]
# temp2
# m

Hasil Dari Berbagai Model yang memiliki MAPE terkecil adalah: {'n-29 lr': [5.582760990534218, 1]}
