## Import Library

meghubungkan ke google drive

In [None]:
from google.colab import drive
drive.mount("/content/drive", force_remount = True)

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np

# preprocessing
import re, string
from spacy.lang.id import Indonesian
import spacy

# Prediksi Klasifikasi
from sklearn.feature_extraction.text import TfidfTransformer, TfidfVectorizer
import joblib

# Deteksi Objek
import os
import cv2
import torch
from PIL import Image

## Dataset

In [None]:
df_train = pd.read_csv("/content/drive/MyDrive/skripsi/teks/X_train_baru_2.csv")

In [None]:
dfJson = pd.read_json("/content/drive/MyDrive/skripsi/test/JSON/BAR e-Rekon/210511_019166_Berita_Acara_Rekonsiliasi.json")
dfJsonT = dfJson.transpose()

In [None]:
dfJsonT

Unnamed: 0,text,image_path
page_1,BERITA ACARA REKONSILIASI\nNomor: BAR-1586/WPB...,images/Baru\210511_019166_Berita_Acara_Rekonsi...
page_2,Laporan Hasil Rekonsiliasi\nUAKPA: BADAN PUSAT...,images/Baru\210511_019166_Berita_Acara_Rekonsi...
page_3,Lampiran |\n\nBerita Acara Rekonsiliasi\n\nNom...,images/Baru\210511_019166_Berita_Acara_Rekonsi...


In [None]:
# menggabungkan beberapa halaman pada dokumen menjadi satu baris dataframe
dfJsonBaru = ''.join(dfJsonT.text)
imagePath = ''.join(dfJsonT.image_path)
dt = {'teks' : [dfJsonBaru],
      'imgPath': [imagePath]}
data = pd.DataFrame(dt)

In [None]:
data

Unnamed: 0,teks,imgPath
0,BERITA ACARA REKONSILIASI\nNomor: BAR-1586/WPB...,images/Baru\210511_019166_Berita_Acara_Rekonsi...


In [None]:
# mengecek isi dokumen apakah kosong atau ada isinya untuk keseluruhan dokumen
import sys
if data['teks'][0] == '':
  sys.exit("Dokumen tidak valid. Mohon unggah dokumen yang sesuai")

## Preprocessing Data Input Dokumen

*cleaning text*

In [None]:
def casefolding(text):
  text = text.lower()
  text = text.replace("_<.*?/\|-_:^()>", ' ')
  text = text.strip()
  text = re.compile('<.*?/\|-_:^()>——').sub('', text)
  text = re.compile('[%s]' % re.escape(string.punctuation)).sub(' ', text)
  text = text.translate(str.maketrans("","",string.punctuation))
  text = re.sub(r"\d+","", text)
  text = re.sub('\+s', ' ', text)
  text = re.sub(r'\s+', ' ', text)
  text = text.replace("\n", ' ')
  text = text.replace("/\-———————— « — —", ' ')
  text = text.encode('ascii', 'ignore').decode('ascii')
  text = re.sub(r'[^\x00-\x7f]', "", text)

  return text

*tokenize*

In [None]:
def tokenisasi(text):
  tokens = text.split(" ")
  return tokens

*stopword removal/stopword elimination*

In [None]:
def stopword_elim(text):
    nlp = Indonesian()
    nlp = spacy.blank('id')
    stopwords = nlp.Defaults.stop_words
    tokens = tokenisasi(text)
    tokens_nostopword = [w for w in tokens if not w in stopwords]
    return (" ").join(tokens_nostopword)

Memanggil function masing-masing preprocessing

In [None]:
def preprocess(text):
  return stopword_elim(casefolding(text))

In [None]:
data['clean_text'] = data['teks'].apply(lambda x: preprocess(x))

In [None]:
data

Unnamed: 0,teks,imgPath,clean_text
0,BERITA ACARA REKONSILIASI\nNomor: BAR-1586/WPB...,images/Baru\210511_019166_Berita_Acara_Rekonsi...,berita acara rekonsiliasi nomor bar wpb kp kam...


## Prediksi Klasifikasi

In [None]:
X_train = np.array(df_train['0'].values.astype('U'))

In [None]:
X_baru = np.array(data['clean_text'].values.astype('U'))

*Feature selection* menggunakan pembobotan TF-IDF

In [None]:
tfidf_vectorizer = TfidfVectorizer(use_idf = True, encoding='latin-1').fit(X_train)

X_baru_tfidf = tfidf_vectorizer.transform(X_baru).toarray()

Prediksi dengan model yang sudah ada

In [None]:
teks_path = "/content/drive/MyDrive/skripsi/teks/model_mnb_baru"

In [None]:
def prediksi(path):
  loaded_model = joblib.load(path)
  y_pred_mnb = loaded_model.predict(X_baru_tfidf)
  hasil = y_pred_mnb[0]
  return hasil

In [None]:
pred = prediksi(teks_path)

In [None]:
pred

2

In [None]:
# pengecekan jenis dokumen sudah sesuai atau belum
import sys
if pred != 2 :
  text = "Dokumen tidak valid. Mohon unggah dokumen yang sesuai"
  print(text)
  sys.exit()

## Deteksi Tanda Tangan dan Stempel

In [None]:
imgPath = [dfJson[x]["image_path"] for x in dfJson]

In [None]:
def fullpath(image_path):
  new_path = []
  for x in imgPath:
    path_full = "\content\drive\\MyDrive\\skripsi\\test\\image\\BAR e-Rekon\\" + x
    path_full = path_full.replace("\\", "/")
    new_path.append(path_full)
    img = cv2.imread(path_full)
  return new_path

In [None]:
path_list = fullpath(imgPath)

In [None]:
print(path_list)

['/content/drive/MyDrive/skripsi/test/image/BAR e-Rekon/images/Baru/210511_019166_Berita_Acara_Rekonsiliasi_1.jpeg', '/content/drive/MyDrive/skripsi/test/image/BAR e-Rekon/images/Baru/210511_019166_Berita_Acara_Rekonsiliasi_2.jpeg', '/content/drive/MyDrive/skripsi/test/image/BAR e-Rekon/images/Baru/210511_019166_Berita_Acara_Rekonsiliasi_3.jpeg']


In [None]:
%cd /content/drive/MyDrive/skripsi/YOLOv5/Signature-Verification_System_using_YOLOv5-and-CycleGAN/Training/YOLOv5/yolov5

/content/drive/MyDrive/skripsi/YOLOv5/Signature-Verification_System_using_YOLOv5-and-CycleGAN/Training/YOLOv5/yolov5


In [None]:
!git clone https://github.com/ultralytics/yolov5
!cd yolov5 && pip install -r requirements.txt
!cd ..

fatal: destination path 'yolov5' already exists and is not an empty directory.
Collecting gitpython>=3.1.30 (from -r requirements.txt (line 5))
  Downloading GitPython-3.1.32-py3-none-any.whl (188 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m188.5/188.5 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
Collecting thop>=0.1.1 (from -r requirements.txt (line 14))
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl (15 kB)
Collecting ultralytics>=8.0.111 (from -r requirements.txt (line 18))
  Downloading ultralytics-8.0.136-py3-none-any.whl (605 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m605.4/605.4 kB[0m [31m24.4 MB/s[0m eta [36m0:00:00[0m
Collecting gitdb<5,>=4.0.1 (from gitpython>=3.1.30->-r requirements.txt (line 5))
  Downloading gitdb-4.0.10-py3-none-any.whl (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1-

In [None]:
# memanggil model hasil training
fine_tune = torch.hub.load('ultralytics/yolov5', 'custom', path='runs/train/exp/weights/best.pt', force_reload=True)

Downloading: "https://github.com/ultralytics/yolov5/zipball/master" to /root/.cache/torch/hub/master.zip
    import torch
    ckpt = torch.load("model.pt")  # applies to both official and custom models
    torch.save(ckpt, "updated-model.pt")

YOLOv5 🚀 2023-7-17 Python-3.10.12 torch-2.0.1+cu118 CPU

Fusing layers... 
Model summary: 346 layers, 87313087 parameters, 0 gradients
Adding AutoShape... 


In [None]:
def detect_objects(path):
  detections = []
  i = 1
  # cek setiap gambar halaman
  for filename in path:
    if filename.endswith('jpeg'):
      image = Image.open(filename).convert("RGB")

      results = fine_tune(image)

      q = False

      if len(results.xyxy[0]) == 0 :
        detection = {
            "QRcode" : q,
            "page"  : i
        }
        detections.append(detection)
      else:
        for result in results.xyxy[0]:
          xmin, ymin, xmax, ymax, confidence, class_id = result.tolist()

          if fine_tune.names[int(class_id)] == "QRcode":
            if confidence > 0.4:
              q = True

        detection = {
              "QRcode" : q,
              "page"  : i
        }
        detections.append(detection)
      i+=1

  return detections

In [None]:
# deteksi objek pada dokumen dari path gambar
detect = detect_objects(path_list)

In [None]:
detect

[{'QRcode': True, 'page': 1},
 {'QRcode': False, 'page': 2},
 {'QRcode': True, 'page': 3}]

### Output

In [None]:
q = False
p = []

for i in detect:
  if i['QRcode'] == True :
    q = True
    p.append(i['page'])

detection = {
              "Jenis" : pred,
              "QRcode" : q,
              "page" : p
}
text = ''
if detection['Jenis'] == 2 :
  if detection['QRcode'] == True:
        pages = ', '.join(map(str, detection['page']))
        text = "Dokumen Valid. Dokumen merupakan BAR e-Rekon dan terdapat tanda tangan elektronik di halaman " + pages
  else:
        text = "Dokumen sesuai, namun tidak terdapat tanda tangan elektronik"
else :
  text = "Dokumen tidak valid. Mohon unggah dokumen yang sesuai"

print(text)

Dokumen Valid. Dokumen merupakan BAR e-Rekon dan terdapat tanda tangan elektronik di halaman 1, 3
