In [None]:
# 1. Install Library
!pip install -q streamlit Sastrawi

import os

# 2. Buat Folder Data
if not os.path.exists('data'):
    os.makedirs('data')

# 3. DOWNLOAD DATA DARI GITHUB (PAKAI WGET)
# Ini triknya: Kita download pake command line, jauh lebih cepat daripada Python
print("üöÄ Sedang mendownload data dari GitHub...")
!wget -q -O data/data_aturan.csv https://raw.githubusercontent.com/rhnrafif/datamining_1/main/data/data_aturan.csv

# Cek apakah berhasil
if os.path.exists('data/data_aturan.csv'):
    print("‚úÖ Data BERHASIL didownload dan disimpan di 'data/data_aturan.csv'")
    print("   Bot sekarang bisa baca file ini secara instan!")
else:
    print("‚ùå Gagal download. Cek URL GitHub kamu.")

In [None]:
%%writefile nlp_helper.py
import pandas as pd
import re
import string
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

class HRDChatbot:
    def __init__(self, csv_path):
        # Langsung baca file lokal (karena sudah didownload di Langkah 1)
        try:
            self.df = pd.read_csv(csv_path)
        except Exception as e:
            raise Exception(f"File tidak ditemukan di {csv_path}. Error: {e}")

        self.stemmer = StemmerFactory().create_stemmer()
        self.stopword = StopWordRemoverFactory().create_stop_word_remover()
        
        # Gabung Pertanyaan + Jawaban
        self.df['knowledge_base'] = self.df['pertanyaan'] + " " + self.df['jawaban']
        
        # Preprocessing
        self.df['clean_knowledge'] = self.df['knowledge_base'].apply(self.preprocessing)
        self.vectorizer = TfidfVectorizer()
        self.tfidf_matrix = self.vectorizer.fit_transform(self.df['clean_knowledge'])

    def preprocessing(self, text):
        text = str(text).lower()
        text = re.sub(r'\d+', '', text)
        text = text.translate(str.maketrans('', '', string.punctuation))
        text = self.stopword.remove(text)
        
        text = text.replace("ngerokok", "merokok")
        text = text.replace("gimana", "bagaimana")
        text = text.replace("gak", "tidak")
        text = text.replace("gapapa", "tidak apa apa")
        text = text.replace("bikin", "buat")
        text = text.replace("telat", "terlambat")
        text = text.replace("absen", "presensi")
        
        text = self.stemmer.stem(text)
        return text

    def get_answer(self, user_input):
        clean_input = self.preprocessing(user_input)
        input_vec = self.vectorizer.transform([clean_input])
        similarity = cosine_similarity(input_vec, self.tfidf_matrix)
        
        best_idx = similarity.argmax()
        best_score = similarity[0, best_idx]
        
        if best_score < 0.15: 
            return "Maaf, saya tidak menemukan aturan terkait hal tersebut. Hubungi HRD langsung ya. :)"
        else:
            return self.df.iloc[best_idx]['jawaban']

In [None]:
%%writefile dashboard.py
import streamlit as st
from nlp_helper import HRDChatbot
import os

# Konfigurasi Halaman
st.set_page_config(page_title="Asisten Bot Nih", page_icon="üè¢", layout="wide")

@st.cache_resource
def load_bot():
    path = "data/data_aturan.csv"
    if os.path.exists(path):
        return HRDChatbot(path)
    return None

bot = load_bot()

# --- SIDEBAR (DATASET VIEWER) ---
with st.sidebar:
    st.header("üìÇüß† Knowledge Base")
    st.write("Data aturan yang dipelajari bot:")
    st.dataframe(bot.df[['pertanyaan', 'jawaban']], hide_index=True)

# --- MAIN UI ---
st.title("Bot Sample :)")
st.markdown("Tanyakan apa saja mengenai **Cuti, Jam Kerja, Seragam, atau Gaji**.")
st.divider()

# Inisialisasi History Chat
if "messages" not in st.session_state:
    st.session_state.messages = [
        {"role": "assistant", "content": "Halo! Saya bot Sample. Ada yang bisa saya bantu tentang aturan kantor?", "avatar": "üëª"}
    ]

for msg in st.session_state.messages:
    if msg["role"] == "assistant":
        icon_avatar = "üëª"
    else:
        icon_avatar = "üßë‚Äçüíº"
    
    with st.chat_message(msg["role"], avatar=icon_avatar):
        st.markdown(msg["content"])

# --- INPUT USER ---
if prompt := st.chat_input("Ketik pertanyaan Anda di sini..."):
    # 1. Tampilkan pesan user (Baru)
    st.chat_message("user", avatar="üßë‚Äçüíº").markdown(prompt)
    # Simpan ke history
    st.session_state.messages.append({"role": "user", "content": prompt})

    # 2. Pikirkan jawaban
    with st.spinner("Mencari aturan terkait..."):
        jawaban = bot.get_answer(prompt)

    # 3. Tampilkan jawaban bot (Baru)
    st.chat_message("assistant", avatar="üëª").markdown(jawaban)
    # Simpan ke history
    st.session_state.messages.append({"role": "assistant", "content": jawaban})


In [None]:
print("Copy IP ini untuk password tunnel:")
!wget -q -O - ipv4.icanhazip.com
print("----------------------------------")
!streamlit run dashboard.py & npx localtunnel --port 8501