In [57]:
# Import necessary libraries
import os
from langchain_groq import ChatGroq
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFDirectoryLoader
import time
from dotenv import load_dotenv
from langdetect import detect

# Load environment variables
load_dotenv()

True

In [113]:
# inisialisasi model
def init_groq():
    groq_api_key = os.getenv("GROQ_API_KEY")
    if not groq_api_key:
        print("❌ GROQ_API_KEY not found!")
        return None
    
    try:
        return ChatGroq(
            groq_api_key=groq_api_key,
            model_name="gemma2-9b-it",
            temperature=0.5,
            max_tokens=1024
        )
    except Exception as e:
        print(f"Error initializing Groq: {str(e)}")
        return None

# Initialize Groq
llm = init_groq()

# Membuat Vector Embedding

In [103]:
def create_vector_embedding():
    try:
        print("Creating vector embeddings...")
        
        # periksa apakah directory sudah ada ?
        if not os.path.exists("draft_ruu_kuhp_final"):
            os.makedirs("draft_ruu_kuhp_final",exist_ok=True)
            print("📁 Created 'draft_ruu_kuhp_final' directory. Please add your PDF files there.")
            return None, None
        
        # Initialize embeddings
        embeddings = HuggingFaceEmbeddings(
            model_name="sentence-transformers/all-MiniLM-L6-v2", # Use OpenAI embedding model
            model_kwargs={'device': 'cpu'},
            encode_kwargs={
                'normalize_embeddings': True,
                'batch_size': 32
            }
        )
        
        # Load documents
        loader = PyPDFDirectoryLoader("draft_ruu_kuhp_final")
        docs = loader.load()
        
        if not docs:
            print("❌ No documents found in directory!")
            return None, None
        
        print(f"Found {len(docs)} documents")
        
        # Split documents
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=512,
            chunk_overlap= 50 # Slight overlap for semantic continuity
        )
        final_documents = text_splitter.split_documents(docs)
        
        print(f"Created {len(final_documents)} document chunks")
        
        # membuat vector store
        vectors = FAISS.from_documents(
            final_documents,
            embeddings
        )
        
        print("✅ Vector store created successfully!")
        return vectors, docs
        
    except Exception as e:
        print(f"Error during embedding creation: {str(e)}")
        return None, None

In [104]:
# Setup Prompt
prompt = ChatPromptTemplate.from_template("""
You are a highly knowledgeable legal expert and AI assistant specializing in Indonesian criminal law (KUHP). 
    Your role is to:
    - Answer legal questions about the KUHP professionally and wisely.
    - Discuss cases and suggest applicable 'pasal' (articles) with precise legal interpretations.
    - Provide clear and concise legal guidance, with accurate references to the KUHP.
    - Provide a step-by-step guide and suggest possible solutions for resolving the legal case based on the relevant laws.

    Language Guidelines:
    - Always respond in the user's preferred language. If the user does not specify, respond in formal Indonesian.
    - Use professional and respectful language, avoiding casual expressions.
    - For complex legal terms, provide explanations in simple terms without losing the formality.

<context>
{context}
</context>

Question: {input}
                                          
Based on the context and query, provide a wise, professional, and multilingual legal response. 
Include the relevant 'pasal' references when appropriate and explain them if needed.
""")

# Membuat query

In [106]:
def detect_language(query):
    """Detects the language of the user's query."""
    try:
        lang = detect(query)
        if lang == "id":
            return "Indonesian"
        elif lang == "en":
            return "English"
        else:
            return "Indonesian"  # Default to Indonesian for unsupported languages
    except Exception:
        return "Indonesian"

def process_query(user_prompt, vectors, llm, prompt):
    """
    Process a user query using the retrieval-augmented generation pipeline.
    
    Args:
        user_prompt (str): The query entered by the user.
        vectors: The initialized FAISS vector store.
        llm: The initialized language model.
        prompt: The ChatPromptTemplate.

    Returns:
        dict: The response containing the answer and related documents.
    """
    # Check if vector store and LLM are initialized
    if vectors is None:
        print("❌ Vector store not initialized! Please create embeddings first.")
        return None

    if llm is None:
        print("❌ Language model not initialized! Please check your GROQ_API_KEY.")
        return None

    try:
        # Detect language
        preferred_language = detect_language(user_prompt)
        print(f"🌐 Detected Language: {preferred_language}")

        # Create chains
        document_chain = create_stuff_documents_chain(llm, prompt)
        retriever = vectors.as_retriever(
            search_kwargs={"k": 10}  # Retrieve top 10 relevant documents
        )
        retrieval_chain = create_retrieval_chain(retriever, document_chain)

        # Prepare input for the retrieval chain
        input_data = {
            "input": user_prompt,
            "preferred_language": preferred_language
        }

        # Get response
        start = time.process_time()
        response = retrieval_chain.invoke(input_data)
        response_time = time.process_time() - start

        # Display results
        print(f"⏱️ Response time: {response_time:.2f} seconds")
        print("\nAnswer:")
        print(response['answer'])

        print("\nRelated Document Excerpts:")
        for i, doc in enumerate(response['context']):
            print(f"\nDocument {i+1}:")
            print(doc.page_content)
            if hasattr(doc, 'metadata') and doc.metadata:
                print(f"Source: {doc.metadata.get('source', 'Unknown')}")  # Show source if available
            print("-" * 50)

        return response

    except Exception as e:
        print(f"Error processing query: {str(e)}")
        return None

In [63]:
vectors, docs = create_vector_embedding()

# response = process_query(user_prompt, vectors, llm, prompt)

Creating vector embeddings...
Found 231 documents
Created 1182 document chunks
✅ Vector store created successfully!


In [114]:
user_prompt = "saya terkenap pelecehan seksual tapi tidak bisa memberikan bukti, apa yang harus saya lakukan?"
response = process_query(user_prompt, vectors, llm, prompt)

🌐 Detected Language: Indonesian
⏱️ Response time: 0.30 seconds

Answer:
Saya memahami situasi Anda sangat sulit. Menjadi korban pelecehan seksual memang berat, apalagi tanpa bukti yang kuat. 

Meskipun KUHP tidak secara spesifik mencantumkan pasal untuk "pelecehan seksual tanpa bukti", beberapa pasal bisa relevan tergantung pada bentuk pelecehan yang Anda alami.

Berikut beberapa langkah yang bisa Anda ambil:

1. **Laporkan ke pihak berwenang:**

Meskipun tanpa bukti fisik, Anda tetap bisa melaporkan kejadian tersebut ke polisi. 

- **Pasal 351 KUHP:**  Mencantumkan tentang penganiayaan, yang bisa mencakup pelecehan seksual. 
- **Pasal 294 KUHP:**  Mencantumkan tentang perbuatan cabul, yang bisa mencakup tindakan pelecehan seksual.

Saat melaporkan, jelaskan secara detail kejadian yang Anda alami, termasuk waktu, tempat, pelaku, dan apa yang terjadi. 

2. **Cari bantuan profesional:**

- **Konsultasikan dengan pengacara:** Pengacara dapat membantu Anda memahami hak-hak Anda dan proses 