# Importing Libraries dan Membuat Dataframe

In [1]:
#importing libraries
# untuk tensorflow yang dipakai menggunakan versi 1.15 untuk melakukan bacpropagation
import tensorflow as tf
import pandas as pd

In [2]:
#membentuk dataset
df = [("I feel like I am drowning. #depression #anxiety #failure #worthless", "fear"),
      ("#panic Panic attack from fear of starting new medication", "fear"),
      ("My bus was in a car crash... I'm still shaking a bit... This week was an absolute horror and this was the icing on the cake... #terrible", "fear"),
      ("Just got back from seeing @GaryDelaney in Burslem. AMAZING!! Face still hurts from laughing so much #hilarious", "joy"),
      ("It's the #FirstDayofFall and I'm so happy. Sipping my #PumpkinSpice flavoured coffee and #smiling! Happy Fall everyone! #amwriting", "joy"),
      ("Morning all! Of course it is sunny on this Monday morning to cheerfully welcome us back to work.:)", "joy")]

In [3]:
#dataset yang dibentuk akan di apply ke dalam dataframe menggunakan pandas
df = pd.DataFrame(df, columns=['Data', 'Label'])

In [4]:
df

Unnamed: 0,Data,Label
0,I feel like I am drowning. #depression #anxiet...,fear
1,#panic Panic attack from fear of starting new ...,fear
2,My bus was in a car crash... I'm still shaking...,fear
3,Just got back from seeing @GaryDelaney in Burs...,joy
4,It's the #FirstDayofFall and I'm so happy. Sip...,joy
5,Morning all! Of course it is sunny on this Mon...,joy


#DATA PREPROCESSING FOR CLEANING DATA

In [5]:
#mengimport libraries untuk cleaning data
#menggunakan dan mendownload stopwords WordNetLemmatizer dan re untuk preprocess data
import numpy as np
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import re

In [6]:
#download stopwords
nltk.download('stopwords')
#download wordnet
nltk.download('wordnet')
#download punctuation
nltk.download('punkt')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [7]:
#membentuk object cleanedWords untuk menampung kalimat/string yang akan di preprocess
cleanedWords = []
#inisialisasi WordNetLemmatizer
lemm = WordNetLemmatizer()
#inisialisasi stopwords
st_words = set(stopwords.words("english"))
# membuat variabel tx untuk menampung seluruh string yang ada di dalam dataset(df[Data])
for tx in df['Data']:
  #mengubah seluruh string yang memiliki huruf kapital menjadi huruf kecil
  tx = tx.lower()
  #membersihkan atau menghapus special character
  tx = re.sub("[^A-Za-z0-9]+"," ",tx)
  #tokenizing and lemmatizing
  #tokenizing: membagi text menjadi words (membagi kalimat menjadi kata)
  tx = nltk.word_tokenize(tx.lower())
  #lemmatizing: mengubah words atau kata-kata yang ada di dataset ke bentuk base form nya contohnya kata hurts yang di lemmatizing akan menghasilkan hurt.
  tx = [lemm.lemmatize(word) for word in tx]
  #remove stopwords
  tx = [word for word in tx if word not in st_words]
  #joining string yang sudah di proses
  tx = " ".join(tx)

  cleanedWords.append(tx)

In [8]:
#melihat hasil string yang sudah di preprocess
ranging_scale = 6
for i in range(ranging_scale):
    print(cleanedWords[i],end="\n")

feel like drowning depression anxiety failure worthless
panic panic attack fear starting new medication
bus wa car crash still shaking bit week wa absolute horror wa icing cake terrible
got back seeing garydelaney burslem amazing face still hurt laughing much hilarious
firstdayoffall happy sipping pumpkinspice flavoured coffee smiling happy fall everyone amwriting
morning course sunny monday morning cheerfully welcome u back work


In [9]:
#merubah label false dan joy ke 0 dan 1
df['Label'] = df['Label'].replace(['fear', 'joy'], [0,1])

In [10]:
df['Label'].head(6)

0    0
1    0
2    0
3    1
4    1
5    1
Name: Label, dtype: int64

# WORD2VEC

In [11]:
#melakukan import libraries gensim dan Word2vec serta mengapply cleanedWords ke dalam model Word2vec
import warnings as wr
import gensim
from gensim.models import Word2Vec
wr.filterwarnings(action='ignore')

w_model = gensim.models.Word2Vec(cleanedWords, min_count =1, window=5)

In [12]:
# print model untuk mengecek size, vocab dan alpha
print(w_model)

Word2Vec(vocab=25, size=100, alpha=0.025)


In [13]:
#list vocab yang terkandung di dalam model
words = list(w_model.wv.vocab)
print(words)

['f', 'e', 'l', ' ', 'i', 'k', 'd', 'r', 'o', 'w', 'n', 'g', 'p', 's', 'a', 'x', 't', 'y', 'u', 'h', 'c', 'm', 'b', 'z', 'v']


# MAKING BPNN MODEL

In [14]:
#melakukan vectorize untuk membuat bag of words
#bag of words: adalah untuk memetakan(mapping) kata ke dalam bentuk vektor
#atau dalam bentuk angka(1 dan 0) dan juga untuk mengecek apakah fitur(data)
#yang sudah kita preprocess mengandung kata di dalamnya, jika ada satu kata di dalam fitur tersedia maka akan terhitung 1 , jika tidak maka 0.
from sklearn.feature_extraction.text import CountVectorizer
vector = CountVectorizer(max_features=10000)
bag_words = vector.fit_transform(cleanedWords).toarray()

In [15]:
#splitting dataset menjadi training dan test set dari countvectorization
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(bag_words,np.asarray(df["Label"]), test_size=0.2)

In [16]:
x_train.shape

(4, 54)

In [17]:
y_train.shape

(4,)

In [18]:
x_test.shape

(2, 54)

In [19]:
y_test.shape

(2,)

In [20]:
#reshape data untuk di apply ke dalam encoder
y_train = y_train.reshape(-1, 1)
y_test = y_test.reshape(-1, 1)

In [21]:
#apply OneHotEncoder ke dalam y_train dan y_test yang sudah di resize
from sklearn.preprocessing import OneHotEncoder
en = OneHotEncoder()
y_train = en.fit_transform(y_train)
y_test = en.transform(y_test)
y_train = y_train.toarray()
y_test = y_test.toarray()

In [22]:
#membuat epoch dan learning rate
epochs = 2500
lr = 0.01

In [23]:
#membuat placeholder dengan float 32
input_tensor = tf.placeholder(tf.float32, name='input')
label_tensor = tf.placeholder(tf.float32, name='output')

In [24]:
#membentuk parameter dan arsitektur BPNN
#pemakaian angka 64 pada tiap weight dan bias merupakan angka default
parameters = {
    'W1': tf.Variable(
        tf.random.normal([
            54, 64
        ]),
        dtype=tf.float32
    ),
    'B1': tf.Variable(
        tf.random.normal([
            1, 64
        ]),
        dtype=tf.float32
    ),
    'W2': tf.Variable(
        tf.random.normal([
        64, 64
        ]),
        dtype=tf.float32
    ),
    'B2': tf.Variable(
        tf.random.normal([
        1, 64
        ]),
        dtype=tf.float32
    ),
    'W3': tf.Variable(
        tf.random.normal([
        64,2
        ]),
        dtype=tf.float32
    ),
    'B3': tf.Variable(
        tf.random.normal([
            1, 2
        ]),
        dtype=tf.float32
    )
}

In [25]:
#memuat function feed forward dengan activation function sigmoid dan softmax
def forward(x, parameters):
  W1 = parameters['W1']
  b1 = parameters['B1']
  W2 = parameters['W2']
  b2 = parameters['B2']
  W3 = parameters['W3']
  b3 = parameters['B3']

  a1 = tf.matmul(x, W1) + b1
  z1 = tf.nn.sigmoid(a1)
  a2 = tf.matmul(z1, W2) + b2
  z2 = tf.nn.softmax(a2)
  a3 = tf.matmul(z2, W3) + b3
  z3 = tf.nn.softmax(a3)

  return a3

In [27]:
saver = tf.train.Saver()

In [28]:
#training model dengan  acc, val_acc, val_loss, loss
with tf.Session() as sess:
  sess.run(tf.global_variables_initializer())
  output_tensor = forward(input_tensor, parameters)
  loss_tensor = tf.reduce_mean(0.5 * (label_tensor - output_tensor) ** 2)
  optimizer = tf.train.GradientDescentOptimizer(lr).minimize(loss_tensor)
  true_preds_tensor = tf.equal(
      tf.argmax(output_tensor, axis=1), tf.argmax(label_tensor, axis=1)
  )
  acc_tensor = tf.reduce_mean(tf.cast(true_preds_tensor, tf.float32))
  
  best_val_loss = 1000.0
  for epoch in range(epochs):
      sess.run(
          optimizer,
          feed_dict={
              input_tensor: x_train,
              label_tensor: y_train
          }
      )

      if epoch % 25 == 0:
        loss = sess.run(loss_tensor, feed_dict={
          input_tensor: x_train,
          label_tensor: y_train
        })

        acc = sess.run(acc_tensor, feed_dict={
        input_tensor: x_train,
        label_tensor: y_train
    })

        print(f'Epoch: {epoch}, loss : {loss}, acc: {acc}')

      if epoch % 125 == 0:
        val_loss = sess.run(loss_tensor, feed_dict={
          input_tensor: x_test,
          label_tensor: y_test
        })
        val_acc = sess.run(acc_tensor, feed_dict={
        input_tensor: x_test,
        label_tensor: y_test
        })

        print(f'\nVal_loss: {val_loss}, val_acc: {val_acc}')

        if val_loss < best_val_loss:
          best_val_loss = val_loss
          saver.save(sess, './model/best_model.ckpt')
          print("Model saved!\n")

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Epoch: 0, loss : 2.0765209197998047, acc: 0.5

Val_loss: 1.4463896751403809, val_acc: 0.5
Model saved!

Epoch: 25, loss : 1.3390679359436035, acc: 0.5
Epoch: 50, loss : 0.7882785201072693, acc: 0.5
Epoch: 75, loss : 0.5345466136932373, acc: 0.5
Epoch: 100, loss : 0.3903392255306244, acc: 0.75
Epoch: 125, loss : 0.2968374788761139, acc: 0.75

Val_loss: 0.4107763469219208, val_acc: 0.5
Model saved!

Epoch: 150, loss : 0.23258736729621887, acc: 0.75
Epoch: 175, loss : 0.18650972843170166, acc: 0.75
Epoch: 200, loss : 0.1523474007844925, acc: 0.75
Epoch: 225, loss : 0.12633869051933289, acc: 0.75
Epoch: 250, loss : 0.10606130957603455, acc: 0.75

Val_loss: 0.20139645040035248, val_acc: 0.5
Model saved!

Epoch: 275, loss : 0.08985374122858047, acc: 0.75
Epoch: 300, loss : 0.07656005024909973, acc: 0.75
Epoch: 325, loss : 0.0654270127415657, acc: 1.0
Epoch: 350, loss : 0.05602692812681198, acc: 1.0


In [29]:
#bagian ini adalah bagian untuk melihat value yang dihasilkan dari model
with tf.Session() as sess:
  sess.run(tf.global_variables_initializer())
  logits_tensor = forward(input_tensor,parameters)
  loss_tensor = tf.reduce_mean(0.5 * (label_tensor - logits_tensor)**2)
  for s in range(epoch):
    sess.run(optimizer, feed_dict={
        input_tensor: x_train,
        label_tensor: y_train
    })
    
    train_predicted=sess.run(logits_tensor, feed_dict={
          input_tensor: x_train,
          label_tensor: y_train
        })
    predicted=sess.run(logits_tensor, feed_dict={
          input_tensor: x_test,
          label_tensor: y_test
        })
    
true_value = np.argmax(y_test, 1)
print("true value from test: ", true_value)
predicted = np.argmax(predicted, 1)
print("predicted true value from test: ", predicted)
true_value_from_train = np.argmax(y_train,1)
print("true value from train: ", true_value_from_train)
predicted_true_value_from_train = np.argmax(train_predicted,1)
print("predicted true value from train: ", predicted_true_value_from_train)

true value from test:  [1 0]
predicted true value from test:  [1 0]
true value from train:  [1 1 0 0]
predicted true value from train:  [1 1 0 0]


# PERFORMANCE OF BPNN


In [30]:
# mengimport library dari scikitlearn untuk accuracy score, percision dan recall score
from sklearn.metrics import precision_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score

In [31]:
#melihat hasil precsion dari training
Precision_from_training = precision_score(true_value_from_train, predicted_true_value_from_train)
print("Precision in training data: ", Precision_from_training)

Precision in training data:  1.0


In [32]:
#mealihat hasil dari recall training
recall_from_training = recall_score(true_value_from_train, predicted_true_value_from_train)
print("Recall in training data: ", recall_from_training)

Recall in training data:  1.0


In [33]:
#mealihat hasil dari accuracy training
accuracy_from_training = accuracy_score(true_value_from_train, predicted_true_value_from_train)
print("Accuracy in training data: ",accuracy_from_training)

Accuracy in training data:  1.0


In [34]:
#melihat hasil dari precision test
precision_test = precision_score(true_value, predicted)
print(f'Precision on test data: {precision_test:.3f}')

Precision on test data: 1.000


In [35]:
#melihat hasil dari recall test
recall_test = recall_score(true_value, predicted)
print(f'Precision on test data: {recall_test:.3f}')

Precision on test data: 1.000


In [36]:
#melihat accuracy dari test
accuracy_test = accuracy_score(true_value, predicted)
print(f'Precision on test data: {accuracy_test:.3f}')

Precision on test data: 1.000
