In [1]:
import pandas as pd
import numpy as np

In [2]:
def sigmoid(z):
    return 1.0 / (1 + np.exp(-z))


def sigmoid_derivative(z):
    return sigmoid(z) * (1.0 - sigmoid(z))

In [3]:
def train(X, y, n_hidden, learning_rate, n_iter):
    m, n_input = X.shape
    W1 = np.random.randn(n_input, n_hidden)
    b1 = np.zeros((1, n_hidden))
    W2 = np.random.randn(n_hidden, 1)
    b2 = np.zeros((1, 1))
    for i in range(1, n_iter+1):
        Z2 = np.matmul(X, W1) + b1
        A2 = sigmoid(Z2)
        Z3 = np.matmul(A2, W2) + b2
        A3 = Z3

        dZ3 = A3 - y
        dW2 = np.matmul(A2.T, dZ3)
        db2 = np.sum(dZ3, axis=0, keepdims=True)

        dZ2 = np.matmul(dZ3, W2.T) * sigmoid_derivative(Z2)
        dW1 = np.matmul(X.T, dZ2)
        db1 = np.sum(dZ2, axis=0)

        W2 = W2 - learning_rate * dW2 / m
        b2 = b2 - learning_rate * db2 / m
        W1 = W1 - learning_rate * dW1 / m
        b1 = b1 - learning_rate * db1 / m

        if i % 100 == 0:
            cost = np.mean((y - A3) ** 2)
            print('Iteration %i, training loss: %f' % (i, cost))

    model = {'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}
    return model

In [4]:
def predict(x, model):
    W1 = model['W1']
    b1 = model['b1']
    W2 = model['W2']
    b2 = model['b2']
    A2 = sigmoid(np.matmul(x, W1) + b1)
    A3 = np.matmul(A2, W2) + b2
    return A3

In [5]:
d = pd.read_csv('CSM dataset.csv', sep=";")
df = d.dropna()
df.head()

Unnamed: 0,Movie,Year,Ratings,Genre,Gross,Budget,Screens,Sequel,Sentiment,Views,Likes,Dislikes,Comments,Aggregate Followers
0,13 Sins,2014.0,6.3,8.0,9130.0,4000000.0,45.0,1.0,0.0,3280543.0,4632.0,425.0,636.0,1120000.0
1,22 Jump Street,2014.0,7.1,1.0,192000000.0,50000000.0,3306.0,2.0,2.0,583289.0,3465.0,61.0,186.0,12350000.0
2,3 Days to Kill,2014.0,6.2,1.0,30700000.0,28000000.0,2872.0,1.0,0.0,304861.0,328.0,34.0,47.0,483000.0
3,300: Rise of an Empire,2014.0,6.3,1.0,106000000.0,110000000.0,3470.0,2.0,0.0,452917.0,2429.0,132.0,590.0,568000.0
4,A Haunted House 2,2014.0,4.7,8.0,17300000.0,3500000.0,2310.0,2.0,0.0,3145573.0,12163.0,610.0,1082.0,1923800.0


In [6]:
# Menghapus kolom pertama (No: 0) karena tidak terpakai
df = df.iloc[:,1:]

In [7]:
# Mengganti nama kolom agar mudah dalam membacanya
df.rename(columns=
            {'X1 transaction date':'Date',
            'X2 house age':'Age',
            'X3 distance to the nearest MRT station':'Dist_MRT',
            'X4 number of convenience stores':'Num_Stores',
            'X5 latitude':'Latitude',
            'X6 longitude':'Longitude',
            'Y house price of unit area':'Price'}, 
            inplace=True
            )

In [8]:
from sklearn import preprocessing
scaler = preprocessing.StandardScaler()

X = df.iloc[:, :-1]
X = scaler.fit_transform(X)
y = df.iloc[:, -1]

In [9]:
from sklearn.model_selection import train_test_split

# splitting train dan test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 101)

In [10]:
y_train = y_train.values.reshape(-1, 1)

In [11]:
n_hidden = 20
learning_rate = 0.1
n_iter = 2000

model = train(X_train, y_train, n_hidden, learning_rate, n_iter)

  return 1.0 / (1 + np.exp(-z))


Iteration 100, training loss: 22882822982674.558594
Iteration 200, training loss: 22802414338307.558594
Iteration 300, training loss: 22760332486453.906250
Iteration 400, training loss: 22727449744467.839844
Iteration 500, training loss: 22699784045985.753906
Iteration 600, training loss: 22675999169660.097656
Iteration 700, training loss: 22655341990181.500000
Iteration 800, training loss: 22637282285347.371094
Iteration 900, training loss: 22621412649785.074219
Iteration 1000, training loss: 22607407298535.742188
Iteration 1100, training loss: 22594999887112.835938
Iteration 1200, training loss: 22583969552622.390625
Iteration 1300, training loss: 22574131307385.371094
Iteration 1400, training loss: 22565329047964.238281
Iteration 1500, training loss: 22557430268545.218750
Iteration 1600, training loss: 22550321950588.031250
Iteration 1700, training loss: 22543907300740.371094
Iteration 1800, training loss: 22538103121983.097656
Iteration 1900, training loss: 22532837670444.191406
It

In [12]:
y_pred = predict(X_test, model)
print(y_pred)
print(y_test)

[[3493405.35227438]
 [3493405.35227438]
 [5014192.53697563]
 [3493405.35227438]
 [ 615351.66422826]
 [ 552497.59569939]
 [ 150376.99676421]
 [1671164.18146546]
 [-567610.79995715]
 [ 357450.14978706]
 [1671164.18146546]
 [2161544.07121722]
 [3280210.65698329]
 [ 150376.99676421]
 [1759423.47228204]
 [ 520591.73577004]
 [2475130.06752976]
 [ 640756.88651597]
 [ 924269.78704246]
 [5014192.53697563]
 [5014192.53697563]
 [3280210.65698329]
 [3280210.65698329]
 [3875752.10148507]
 [5014192.53697563]
 [3280210.65698329]
 [1671164.18146546]
 [2067075.94763696]
 [1759423.47228204]
 [1300056.03659521]
 [1759423.47228204]
 [1759423.47228204]
 [2475130.06752976]
 [1622093.69214599]
 [3280210.65698329]
 [1759423.47228204]
 [3587863.13233821]
 [1759423.47228204]
 [2700274.95330379]
 [1671164.18146546]
 [1759423.47228204]
 [5014192.53697563]
 [5014192.53697563]
 [1671164.18146546]
 [3493405.35227438]
 [3280210.65698329]
 [-269101.20482631]
 [1946910.79689103]
 [2354964.91678382]
 [3280210.65698329]


  return 1.0 / (1 + np.exp(-z))


In [13]:
y_pred = y_pred.reshape(1, -1)[0]

In [14]:
y_test[np.isnan(y_test)] = 0
y_pred[np.isnan(y_pred)] = 0

In [15]:
RES_test=y_test-y_pred
pd.DataFrame({'Y_test':y_test,'y_pred':y_pred,'RES':RES_test})

Unnamed: 0,Y_test,y_pred,RES
56,301000.0,3493405.0,-3192405.0
90,370000.0,3493405.0,-3123405.0
149,5987.0,5014193.0,-5008206.0
35,88586.0,3493405.0,-3404819.0
213,3841.0,615351.7,-611510.7
39,13720000.0,552497.6,13167500.0
209,1520000.0,150377.0,1369623.0
47,5887700.0,1671164.0,4216536.0
119,4240000.0,-567610.8,4807611.0
197,6714000.0,357450.1,6356550.0


In [16]:
from sklearn import metrics

meanAbErr = metrics.mean_absolute_error(y_test, y_pred)
meanSqErr = metrics.mean_squared_error(y_test, y_pred)
rootMeanSqErr = np.sqrt(metrics.mean_squared_error(y_test, y_pred))

print('Mean Absolute Error:', meanAbErr)
print('Mean Square Error:', meanSqErr)
print('Root Mean Square Error:', rootMeanSqErr)

Mean Absolute Error: 3780196.209280363
Mean Square Error: 26079804472861.645
Root Mean Square Error: 5106838.9903013045
