In [None]:

# 1. INSTALASI LIBRARY
print("Menginstall Gradio & Sastrawi...")
!pip install -q gradio Sastrawi

import gradio as gr
import pandas as pd
import os
import re
import string
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


In [None]:

# 2. PERSIAPAN DATA (DOWNLOAD CEPAT)
# download dulu datanya supaya Chatbot INSTAN (tidak loading saat dibuka)
print("Menyiapkan Data...")
if not os.path.exists('data'):
    os.makedirs('data')

# Hapus data lama & download baru dari GitHub
!rm -f data/data_aturan.csv
!wget -q -O data/data_aturan.csv https://raw.githubusercontent.com/rhnrafif/datamining_1/main/data/data_aturan.csv

print("Data Siap digunakan!")


In [None]:

# 3. OTAK CHATBOT (NLP CLASS)
class HRDChatbot:
    def __init__(self, csv_path):
        # Load Data
        try:
            self.df = pd.read_csv(csv_path)
        except:
            # Fallback jika gagal download (biar ga error)
            self.df = pd.DataFrame(columns=['pertanyaan', 'jawaban'])

        # NLP Tools
        self.stemmer = StemmerFactory().create_stemmer()
        self.stopword = StopWordRemoverFactory().create_stop_word_remover()
        
        # Knowledge Base
        self.df['knowledge_base'] = self.df['pertanyaan'].astype(str) + " " + self.df['jawaban'].astype(str)
        self.df['clean_knowledge'] = self.df['knowledge_base'].apply(self.preprocessing)
        
        # Training Model TF-IDF
        self.vectorizer = TfidfVectorizer()
        if not self.df.empty:
            self.tfidf_matrix = self.vectorizer.fit_transform(self.df['clean_knowledge'])
        else:
            self.tfidf_matrix = None

    def preprocessing(self, text):
        text = str(text).lower()
        text = re.sub(r'\d+', '', text)
        text = text.translate(str.maketrans('', '', string.punctuation))
        text = self.stopword.remove(text)
        
        # Kamus Gaul (Normalisasi)
        text = text.replace("ngerokok", "merokok")
        text = text.replace("gimana", "bagaimana")
        text = text.replace("gak", "tidak")
        text = text.replace("gapapa", "tidak apa apa")
        text = text.replace("bikin", "buat")
        text = text.replace("telat", "terlambat")
        text = text.replace("absen", "presensi")
        
        text = self.stemmer.stem(text)
        return text

    def get_answer(self, user_input):
        if self.df.empty: return "Maaf, database sedang gangguan."
        
        clean_input = self.preprocessing(user_input)
        input_vec = self.vectorizer.transform([clean_input])
        similarity = cosine_similarity(input_vec, self.tfidf_matrix)
        
        best_idx = similarity.argmax()
        best_score = similarity[0, best_idx]
        
        if best_score < 0.15: 
            return "Maaf, saya tidak menemukan aturan terkait hal tersebut. Silakan hubungi HRD langsung ya. ðŸ˜Š"
        else:
            return self.df.iloc[best_idx]['jawaban']

# Inisialisasi Bot
bot_path = "data/data_aturan.csv"
hrd_bot = HRDChatbot(bot_path)


In [None]:

# 4. TAMPILAN ANTARMUKA (GRADIO UI)
def respond(message, history):
    # Fungsi wrapper agar connect ke Gradio ChatInterface
    return hrd_bot.get_answer(message)

# Membuat UI Chatbot
demo = gr.ChatInterface(
    fn=respond,
    title="ðŸ§  Asisten HRD Virtual",
    description="Tanyakan apa saja seputar aturan kantor (Cuti, Gaji, Seragam, dll).",
    theme=gr.themes.Soft(),
    examples=["Berapa jatah cuti setahun?", "Aturan seragam hari jumat", "Sanksi kalau telat", "Boleh merokok ga?"],
    cache_examples=False,
)

print("\n" + "="*40)
print("JALANKAN LINK DI BAWAH INI:")
print("Look for the link: Running on public URL: https://.......gradio.live")
print("="*40 + "\n")

# SHARE=TRUE ADALAH KUNCINYA (Tanpa LocalTunnel!)
demo.launch(share=True)