# Adil HTML Generation

## Download Model From GCP

In [None]:
!pip install --upgrade google-cloud-storage

Collecting google-cloud-storage
[?25l  Downloading https://files.pythonhosted.org/packages/12/75/78ed0d1ef691592b94e7a3d9f58153298166486342a97df82d3c5b66cc16/google_cloud_storage-1.38.0-py2.py3-none-any.whl (103kB)
[K     |████████████████████████████████| 112kB 3.9MB/s 
[?25hCollecting google-resumable-media<2.0dev,>=1.2.0
[?25l  Downloading https://files.pythonhosted.org/packages/f9/ad/bc80b0b33ccb5e21375ca1440da9dab99596948d5035e2f597fdcffb31f1/google_resumable_media-1.3.0-py2.py3-none-any.whl (75kB)
[K     |████████████████████████████████| 81kB 7.1MB/s 
Collecting google-cloud-core<2.0dev,>=1.4.1
  Downloading https://files.pythonhosted.org/packages/ad/fc/6e8c449185cb8862af353c1164100ff75e32d55ba1de3baf9eaa01b7d2a9/google_cloud_core-1.6.0-py2.py3-none-any.whl
Collecting google-crc32c<2.0dev,>=1.0; python_version >= "3.5"
  Downloading https://files.pythonhosted.org/packages/fc/ae/b6efa1019e18c6c791f0f5cd93b2ff40f8f06696dbf04db39ec0f5591b1e/google_crc32c-1.1.2-cp37-cp37m-manyl

In [None]:
from google.colab import files

uploaded = files.upload()

for fn in uploaded.keys():
  service_account = fn
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))

Saving adil-gcp-13783a2acc15.json to adil-gcp-13783a2acc15.json
User uploaded file "adil-gcp-13783a2acc15.json" with length 2293 bytes


In [None]:
import os

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = service_account

In [None]:
!echo $GOOGLE_APPLICATION_CREDENTIALS

adil-gcp-13783a2acc15.json


### Download the model

### Download the zip model file

In [None]:
!mkdir saved_model

In [None]:
from google.cloud import storage

def download_model(bucket_name, source_name, destination_name):
  client = storage.Client()
  bucket = client.bucket(bucket_name)

  blob = bucket.blob(source_name)
  blob.download_to_filename(destination_name)
  print("File {} Successfully Downloaded to {}".format(source_name, destination_name))

In [None]:
download_model("adil-model", "adil_model_v2.zip", "saved_model/adil_model_v2.zip")

File adil_model_v2.zip Successfully Downloaded to saved_model/adil_model_v2.zip


In [None]:
!unzip saved_model/adil_model_v2.zip

Archive:  saved_model/adil_model_v2.zip
   creating: saved_model/adil_model/
  inflating: saved_model/adil_model/saved_model.pb  
   creating: saved_model/adil_model/variables/
  inflating: saved_model/adil_model/variables/variables.data-00000-of-00001  
  inflating: saved_model/adil_model/variables/variables.index  
  inflating: saved_model/adil_model/keras_metadata.pb  
   creating: saved_model/adil_model/assets/


### Download Sample TXT File

In [None]:
download_model("adil-plaintext", "11e44c4ebb124b60b246313231363135.0.txt", "sample.txt")

File 11e44c4ebb124b60b246313231363135.0.txt Successfully Downloaded to sample.txt


## Predict Words

### Load Model

In [None]:
import tensorflow as tf
import numpy as np

print(tf.__version__)

2.5.0


In [None]:
model = tf.keras.models.load_model('saved_model/adil_model')

In [None]:
model.summary()

Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
text_vectorization_11 (TextV (None, None)              0         
_________________________________________________________________
embedding_10 (Embedding)     (None, None, 32)          320000    
_________________________________________________________________
conv1d_10 (Conv1D)           (None, None, 32)          1056      
_________________________________________________________________
global_average_pooling1d_10  (None, 32)                0         
_________________________________________________________________
dense_20 (Dense)             (None, 128)               4224      
_________________________________________________________________
dense_21 (Dense)             (None, 5)                 645       
Total params: 325,925
Trainable params: 325,925
Non-trainable params: 0
_______________________________________________

### Predict The String

In [None]:
word = "TATA CARA PENGUNDURAN DIRI KEPALA DAERAH, WAKIL KEPALA DAERAH,\
DAN PEGAWAI NEGERI YANG AKAN MENJADI BAKAL CALON ANGGOTA DPR,\
DPD, DPRD PROVINSI, DAN DPRD KABUPATEN/KOTA, SERTA PELAKSANAAN\
CUTI PEJABAT NEGARA DALAM KAMPANYE PEMILU"

prediction = model.predict(np.array([word]))
prediction

array([[1.5225636e-23, 3.0849716e-01, 6.9150287e-01, 4.7737520e-18,
        1.5663561e-12]], dtype=float32)

## HTML Generator

### Load TXT File

In [None]:
with open("sample.txt", 'r') as f:
  content = f.read()
  content_data = content.split("\n\n")

np_content = np.array(content_data)
np_content = np.char.rstrip(np_content)
np_content = np.char.lstrip(np_content)
np_content[:5]

array(['PERATURAN PRESIDEN REPUBLIK INDONESIA \nNOMOR 20 TAHUN 2011',
       'TENTANG',
       'PERUBAHAN ATAS PERATURAN PRESIDEN NOMOR 96 TAHUN 2006 \nTENTANG TUNJANGAN DAN HAK-HAK LAINNYA BAGI HAKIM AD HOC \nPADA PENGADILAN HUBUNGAN INDUSTRIAL\n \nDENGAN RAHMAT TUHAN YANG MAHA ESA \nPRESIDEN REPUBLIK INDONESIA,',
       'Menimbang   :   bahwa dalam upaya peningkatan kinerja bagi Hakim Ad Hoc di \nlingkungan Pengadilan Hubungan Industrial pada Pengadilan Negeri \ndan Mahkamah Agung, maka dipandang perlu untuk mengatur \nkembali besarnya tunjangan bagi Hakim Ad Hoc pada Pengadilan \nHubungan Industrial, dengan Peraturan Presiden;',
       'Mengingat    :  1.Pasal 4 ayat (1) Undang-Undang Dasar Negara Republik \nIndonesia Tahun 1945;'],
      dtype='<U309')

In [None]:
labels = ["no_label", "body", "subtitle", "title", "section"]
content_label = {}
for content in np_content:
  if content != '':
    prediction = model.predict(np.array([content]))
    tensor = tf.math.argmax(prediction, axis=1)
    index = tf.keras.backend.get_value(tensor[0])
    content_label[content] = labels[index]

In [None]:
from bs4 import BeautifulSoup

head_template = []
body_template = []
section = 0
title_count = 1

for content in np_content:
  if content != '':
    if title_count == 1: 
      head_template.append("  " + content.replace("\n", ""))
      body_template.append("<h1 data-block-type=\"title\"> \n"+content.replace("\n", "")+"\n </h1>")
      if content_label[content] == "title":
        title_count += 1

    else:
      if content_label[content] == "title" : 
          body_template.append("<h1 data-block-type=\"title\"> \n"+content.replace("\n", "")+"\n </h1>")

      elif content_label[content] == "section" and section == 0: 
          body_template.append("<section> \n <h1> \n"+content.replace("\n", "")+"\n </h1>")
          section = 1

      elif content_label[content] == "section" and section == 1:
          body_template.append("</section> \n <section> \n <h1> \n"+content.replace("\n", "")+"\n </h1>")

      elif content_label[content] == "body" :
          body_template.append("<p> \n" + content.replace("\n", "\n    ") + "\n </p>")

      elif content_label[content] == "subtitle" :
          body_template.append("<h1 data-block-type=\"subtitle\"> \n" + content.replace("\n", "\n    ") + "\n </h1>")

      elif content_label[content] == "no_label" :
          body_template.append("<p> \n" + content.replace("\n", "\n      ") + "\n </p>")


html_template = ["<html> \n <head> \n <title>"] + head_template + ["</title> \n <meta name=\"status\" content=\"aktif\"/> \n </head> \n <body> \n <article>"] + body_template + ["\n </article> \n </body> \n </html>"]
html_template = '\n '.join(html_template)

soup = BeautifulSoup(html_template)
html_template = soup.prettify()

print(html_template)

<html>
 <head>
  <title>
   PERATURAN PRESIDEN REPUBLIK INDONESIA NOMOR 20 TAHUN 2011
  </title>
  <meta content="aktif" name="status"/>
 </head>
 <body>
  <article>
   <h1 data-block-type="title">
    PERATURAN PRESIDEN REPUBLIK INDONESIA NOMOR 20 TAHUN 2011
   </h1>
   <p>
    TENTANG
   </p>
   <h1 data-block-type="subtitle">
    PERUBAHAN ATAS PERATURAN PRESIDEN NOMOR 96 TAHUN 2006 
    TENTANG TUNJANGAN DAN HAK-HAK LAINNYA BAGI HAKIM AD HOC 
    PADA PENGADILAN HUBUNGAN INDUSTRIAL
     
    DENGAN RAHMAT TUHAN YANG MAHA ESA 
    PRESIDEN REPUBLIK INDONESIA,
   </h1>
   <p>
    Menimbang   :   bahwa dalam upaya peningkatan kinerja bagi Hakim Ad Hoc di 
    lingkungan Pengadilan Hubungan Industrial pada Pengadilan Negeri 
    dan Mahkamah Agung, maka dipandang perlu untuk mengatur 
    kembali besarnya tunjangan bagi Hakim Ad Hoc pada Pengadilan 
    Hubungan Industrial, dengan Peraturan Presiden;
   </p>
   <p>
    Mengingat    :  1.Pasal 4 ayat (1) Undang-Undang Dasar Negara Repub

###Save HTML

In [None]:
with open("sample.html", "w") as file:
    file.write(str(html_template))

### Upload to Bucket


In [None]:
from google.cloud import storage

def upload_html(bucket_name, source_name, destination_name):
  storage_client = storage.Client()
  bucket = storage_client.bucket(bucket_name)
  blob = bucket.blob(destination_name)

  blob.upload_from_filename(source_name)
  print("File {} uploaded to Bucket: {}, with filename: {}".format(source_name, 
                                                                   bucket_name, 
                                                                   destination_name))

In [None]:
upload_html("sample.html", "adil-dataset", "peraturan.html")