<a href="https://colab.research.google.com/github/marzukipilliang/BERT-and-K-Means-for-Predict-Risk-Matrix/blob/main/Ekstraksi_Data_JSON_Trello.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
import os
import json
import numpy as np

# Connect to Kaggle

In [3]:
from google.colab import files

uploaded = files.upload()

for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))

# Then move kaggle.json into the folder where the API expects to find it.
!mkdir -p ~/.kaggle/ && mv kaggle.json ~/.kaggle/ && chmod 600 ~/.kaggle/kaggle.json

Saving kaggle.json to kaggle.json
User uploaded file "kaggle.json" with length 71 bytes


In [4]:
dir = f'drive/MyDrive/Colab Notebooks'
files = os.listdir(dir)

In [5]:
!kaggle kernels output marzukipilliang/803k-issues-keras-risk-matrix -p /content/dir

Output file downloaded to /content/dir/categories_priority.png
Output file downloaded to /content/dir/df_keras_risk_matrix.pickle
Output file downloaded to /content/dir/keras_cluster.png
Output file downloaded to /content/dir/priority_chart.png
Output file downloaded to /content/dir/risk_matrix.png
Kernel log downloaded to /content/dir/803k-issues-keras-risk-matrix.log 


In [6]:
!kaggle kernels output marzukipilliang/803k-issues-keras-classify-risk -p /content/dir

Output file downloaded to /content/dir/lstm_classification.h5
Output file downloaded to /content/dir/lstm_matrix.png
Output file downloaded to /content/dir/model_history.png
Output file downloaded to /content/dir/naive_matrix.png
Output file downloaded to /content/dir/priority_chart.png
Output file downloaded to /content/dir/svm_matrix.png
Output file downloaded to /content/dir/train_label.png
Output file downloaded to /content/dir/validation_label.png
Kernel log downloaded to /content/dir/803k-issues-keras-classify-risk.log 


In [7]:
import tensorflow as tf
import tensorflow.python.keras
import seaborn as sns
import pickle
from keras.preprocessing.text import Tokenizer
from keras.utils import pad_sequences
from datetime import datetime
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Load LSTM Model

In [8]:
MODELPATH = f'/content/dir/lstm_classification.h5'

In [9]:
lstm_model = tf.keras.models.load_model(MODELPATH)

In [10]:
# Risk Priority
##########################
# 0 -> Extreme Risk
# 1 -> High Risk
# 2 -> Medium Risk
# 3 -> Small (Low) Risk
##########################
CLASSES = ['Extreme', 'High', 'Medium', 'Small']

In [11]:
MAX_LEN = 185

In [12]:
with open(f'/content/dir/df_keras_risk_matrix.pickle', 'rb') as fp:
    df = pickle.load(fp)

In [13]:
tokenizer = Tokenizer(num_words=50000, oov_token='oov')

In [14]:
tokenizer.fit_on_texts(df['cleaned'].tolist())

In [15]:
def lstm_priority(corpus):
    tkn = tokenizer.texts_to_sequences(corpus)
    tkn_padded = pad_sequences(tkn, padding='post', truncating='post', maxlen=MAX_LEN)
    prd = lstm_model.predict(tkn_padded)
    return CLASSES[int(np.argmax(prd, axis=1))]

In [16]:
# test model
print(lstm_priority(['good performance']))

Small


# Parsing Trello Data

In [17]:
f = open(os.path.join(dir, 'IsoGsgt8.json'),)
data = json.load(f)

In [18]:
lists = {l['id']: l['name'] for l in data['lists']}
users = {u['id']: u['fullName'] for u in data['members']}
labels = {l['id']: l['name'] for l in data['labels']}

parsed_cards = [{
    "name": c['name'],
    "list": lists[c['idList']],
    "description": c['desc'],
    "members": [u for k, u in users.items() if k in c['idMembers']],
    "labels": [l for k, l in labels.items() if k in c['idLabels']]
} for c in data['cards']]

output = {
    "board_data": {
        "name": data['name'],
        "url": data['shortUrl']
    },
    "cards": parsed_cards
}

In [19]:
output['cards']

[{'name': 'Backlog',
  'list': 'Backlog',
  'description': 'A list of the things we think we want to do, maybe not quite ready for work, but high likelihood of being worked on.\n\nThis is the staging area where specs should get fleshed out.\n\nNo limit on the list size, but we should reconsider if it gets long.',
  'members': [],
  'labels': []},
 {'name': 'Term of References',
  'list': 'Backlog',
  'description': 'Pelayanan Terpatu Satu Pintu (PTSP) dilatar belakangi hal berikut:\n\n- Pentingnya peningkatan _service level_ Kementerian Agama Kota Bekasi dalam memberikan pelayanan prima kepada masyarakat.\n- Pentingnya meminimalisir kontak langsung masyarakat dengan petugas dimasa pandemi, layanan Kemenag Kota Bekasi dapat diakses kapan saja dan dimana saja.\n- Pentingnya penyajian data bagi pimpinan dan stakeholder terkait yang akurat dan riil time untuk menjadi rujukan pengambilan keputusan.\n- Pentingnya penyajian data bagi pimpinan dan stakeholder terkait yang akurat dan riil time 

In [20]:
df = pd.DataFrame(output['cards'])

In [21]:
list(df.columns)

['name', 'list', 'description', 'members', 'labels']

In [22]:
risks = df.loc[(df['description'] != '')
  & (df['list'] == 'Risk Register')
  & (df['name'] != 'Risks')]['description'].tolist()

In [23]:
df_risk = pd.DataFrame(risks, columns=['risk'])
df_risk

Unnamed: 0,risk
0,Server delivery from vendors is expected to be...
1,The developer is on holiday for Christmas and ...
2,Requests for changes to flow from the Administ...
3,In this first sprint there is a possibility th...
4,The Product Owner was confirmed that the domai...
5,"The main link internet was not installed yet, ..."
6,The request for the Email Blast service was re...
7,The Product Owner requested a change to the Fl...
8,"SSL security cannot be activated, because the ..."
9,There is no captcha validation yet on the new ...


# Risk Priority Classification using Model LSTM

In [24]:
df_risk['prediction'] = df_risk['risk'].apply(lambda x: lstm_priority([x]))



In [25]:
df_risk

Unnamed: 0,risk,prediction
0,Server delivery from vendors is expected to be...,Small
1,The developer is on holiday for Christmas and ...,Medium
2,Requests for changes to flow from the Administ...,Small
3,In this first sprint there is a possibility th...,Medium
4,The Product Owner was confirmed that the domai...,Small
5,"The main link internet was not installed yet, ...",Small
6,The request for the Email Blast service was re...,Small
7,The Product Owner requested a change to the Fl...,Medium
8,"SSL security cannot be activated, because the ...",Small
9,There is no captcha validation yet on the new ...,Small
