In [1]:
# Import modules yang akan digunakan
import pandas as pd
import numpy as np
from scipy.optimize import fmin_tnc
from sklearn.metrics import confusion_matrix

In [2]:
def sigmoid(z):
    return 1/(1+np.exp(-z))

def h(x, theta):
    return sigmoid( np.matmul(x, theta) )

def costf(theta, x, y):
    hf = h(x, theta)
    m = len(x)
    J = (-1/m) * ( np.matmul(y.transpose(), np.log(hf)) + np.matmul((1-y).transpose(), (np.log(1 - hf))))
    grad = (1/m)*np.matmul(x.transpose(), hf - y)
    return J, grad

def predict(x, theta):
    h1 = h(x, theta)
    try:
        for i in x.index:
            if h1[i] >= 0.5:
                h1[i] = 1
            else:
                h1[i] = 0
    except IndexError:
        if h1 >= 0.5:
            h1 = 1
        else:
            h1 = 0
    return h1

def testErr(h, y):
    err_sum = 0
    for i in y.index:
        if ((h[i] >= 0.5) and y[i] == 0):
            err_sum += 1
        elif ((h[i] < 0.5) and y[i] == 1):
            err_sum += 1
        else:
            err_sum += 0
    return err_sum/len(y)

In [3]:
# Buka dan pisahkan x dan y
dataset = pd.read_csv('dataset.csv')

In [4]:
dataset

Unnamed: 0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,y
0,1,19963.48,25011.81,81344.03,14978.52,8265.522,9986.67,15974.253,34628.378,11431.210,0
1,1,355.81,264.14,246.59,260.66,262.384,743.47,277.258,716.574,253.159,1
2,1,249.16,306.11,258.71,261.69,259.481,286.35,261.513,252.038,257.435,1
3,1,279.62,248.04,266.63,270.31,253.855,257.62,272.099,255.964,250.311,1
4,1,270.18,256.78,261.48,745.72,263.940,295.45,277.875,265.371,265.497,1
...,...,...,...,...,...,...,...,...,...,...,...
195,1,280.35,266.56,250.75,279.13,261.110,242.82,253.727,736.150,275.473,1
196,1,698.70,255.60,272.88,256.07,256.106,724.84,272.856,248.905,714.726,1
197,1,252.26,244.09,786.63,392.90,223.839,264.82,215.754,257.958,292.069,1
198,1,66067.09,22694.44,8907.54,8897.55,13650.813,11733.00,30321.058,18043.500,9938.916,0


In [18]:
train, validate, test = np.split(dataset.sample(frac=1, random_state=42), [int(.6*len(dataset)), int(.8*len(dataset))])

In [21]:
# Y dataset for all behaviour
y_train = train['y']
y_validate = validate['y']
y_test = test['y']

# Perilaku 1
x_train = train.drop('y', axis=1)
x_validate = validate.drop('y', axis=1)
x_test = test.drop('y', axis=1)

In [30]:
# Cari nilai theta untuk prediksi
result = fmin_tnc(costf, x0=np.zeros(x_train.shape[1]), args=(x_train.values, y_train.values), maxfun=400)

In [31]:
print("Train Error Perilaku 1: ", testErr(h(x_train, result[0]), y_train))

Train Error Perilaku 1:  0.0


In [32]:
print("Validate Error Perilaku 1: ", testErr(h(x_validate, result[0]), y_validate))

Validate Error Perilaku 1:  0.0


In [33]:
print("Test Error Perilaku 1: ", testErr(h(x_test, result[0]), y_test))

Test Error Perilaku 1:  0.0


In [35]:
pred = predict(x_test, result[0])
conf_matrix = confusion_matrix(pred, y_test)

In [36]:
conf_matrix = confusion_matrix(pred, y_test)
tn = conf_matrix[0,0]
fp = conf_matrix[0,1]
tp = conf_matrix[1,1]
fn = conf_matrix[1,0]
accuracy = (tp + tn) / (tp + fp + tn + fn)
precision = tp / (tp + fp)
recall    = tp / (tp + fn)
f1score  = 2 * precision * recall / (precision + recall)
print("#####################")
print(f'Akurasi Perilaku {i} : {accuracy}')
print(f'F1 score Perilaku {i} : {f1score}')

#####################
Akurasi Perilaku 4 : 1.0
F1 score Perilaku 4 : 1.0


In [57]:
training_result = []

In [58]:
for i in range(0, 10):
    training_result.append(result[0][i])

In [59]:
training_result

[18.897194947369407,
 0.00016946528306475481,
 -0.00046466636484231535,
 -9.690433203706609e-05,
 -0.001000042935456584,
 -0.0016549588835906711,
 -0.0008089956243595116,
 -0.0007371388631707813,
 0.0004990419750629027,
 -0.00038991813654082883]

In [60]:
f = open("model.js", "w")
f.write('const model = ' +str(training_result)+';\n'+ 'export { model };')
f.close()