In [1]:
import os
print(os.getcwd())

d:\AL FITRA\STUDI UMM\SEMESTER 6\TEORI\PENALARAN KOMPUTER\SOURCE CODE\PENALARAN KOMPUTER\CBR_Penalararan_Komputer\CBR\notebooks


In [2]:
%cd ..

d:\AL FITRA\STUDI UMM\SEMESTER 6\TEORI\PENALARAN KOMPUTER\SOURCE CODE\PENALARAN KOMPUTER\CBR_Penalararan_Komputer\CBR


In [3]:
%pip install datasets

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [4]:
# Part 1: Import Libraries and Initialize
import os
import json
import re
import logging
from datetime import datetime
import nltk
import numpy as np
import pandas as pd
from tqdm import tqdm
from nltk.tokenize import word_tokenize
from typing import List
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer, CrossEncoder
from rank_bm25 import BM25Okapi
import torch

# Define base directory (aligned with previous scripts)
try:
    BASE_DIR = os.path.dirname(os.getcwd())  # Parent of 'notebooks'
except NameError:
    BASE_DIR = os.getcwd()  # Fallback for interactive environments like Jupyter

# Define paths
PATH_CSV = os.path.join(BASE_DIR, 'data', 'processed', 'cases.csv')
PATH_QUERIES = os.path.join(BASE_DIR, 'data', 'eval', 'queries.json')
PATH_RESULTS = os.path.join(BASE_DIR, 'data', 'results')
LOG_DIR = os.path.join(BASE_DIR, 'logs')
LOG_PATH = os.path.join(LOG_DIR, 'prediction.log')

# Validate path length for Windows
MAX_PATH_LENGTH = 260

def validate_path(path):
    if len(path) > MAX_PATH_LENGTH:
        raise ValueError(f"Path {path} exceeds Windows maximum length of {MAX_PATH_LENGTH} characters")
    return path

# Ensure directories exist
for path in [LOG_DIR, os.path.dirname(PATH_CSV), os.path.dirname(PATH_QUERIES), PATH_RESULTS]:
    try:
        validate_path(path)
        os.makedirs(path, exist_ok=True)
        logging.info(f"Directory ensured: {path}")
    except ValueError as e:
        logging.error(f"Path validation failed: {e}")
        raise
    except Exception as e:
        logging.error(f"Failed to create directory {path}: {e}")
        raise

# Initialize logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(LOG_PATH, mode='a', encoding='utf-8'),  # Append mode
        logging.StreamHandler()
    ],
    force=True
)
logger = logging.getLogger(__name__)
logger.info(f"Starting prediction process at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

# Part 2: Setup Environment
def setup_environment():
    """Ensure NLTK resources are available."""
    try:
        nltk.data.find('tokenizers/punkt')
    except LookupError:
        logger.info("Downloading NLTK punkt tokenizer")
        nltk.download('punkt', quiet=True)

# Part 3: Preprocess Text
def preprocess_text(text):
    """Preprocess text: lowercase, remove specific legal terms, tokenize, normalize spaces."""
    try:
        text = str(text).lower()
        text = re.sub(r'\b(?:putusan|nomor_perkara|tahun|pengadilan|hakim)\b', '', text)
        text = re.sub(r'uu\s+no', 'undang_undang_nomor', text)
        text = re.sub(r'pasal\s+\d+', 'pasal', text)
        text = re.sub(r'[^\w\s]', ' ', text)
        text = re.sub(r'\s+', ' ', text).strip()
        tokens = word_tokenize(text)
        return ' '.join(tokens) if tokens else 'empty'
    except Exception as e:
        logger.error(f"Error preprocessing text: {e}")
        return 'empty'

# Part 4: Load Data
def load_data():
    """Load cases.csv and queries.json."""
    try:
        df = pd.read_csv(PATH_CSV)
        texts = df['ringkasan_fakta'].fillna('').apply(preprocess_text).tolist()
        case_ids = df['case_id'].tolist()
        if not texts or not case_ids:
            raise ValueError("Dataset is empty or missing required columns ('ringkasan_fakta', 'case_id').")
        with open(PATH_QUERIES, 'r', encoding='utf-8') as f:
            queries = json.load(f)
        case_solutions = {item['case_id']: item.get('solution', '') for item in queries}
        logger.info(f"Loaded {len(df)} cases from {PATH_CSV} and {len(queries)} queries from {PATH_QUERIES}")
        return df, texts, case_ids, queries, case_solutions
    except FileNotFoundError as e:
        logger.error(f"File not found: {e}")
        print(f"Error: {e}")
        raise
    except Exception as e:
        logger.error(f"Error loading data: {e}")
        print(f"Error loading data: {e}")
        raise

# Part 5: Setup TF-IDF
def setup_tfidf(texts: List[str]) -> tuple:
    """Initialize TF-IDF vectorizer with reduced features."""
    stop_words = [
        'dan', 'di', 'dari', 'ke', 'pada', 'dengan', 'untuk', 'yang', 'ini', 'itu',
        'adalah', 'tersebut', 'sebagai', 'oleh', 'atau', 'tetapi', 'karena', 'jika',
        'dalam', 'bagi', 'tentang', 'melalui', 'serta', 'maka', 'lagi', 'sudah',
        'belum', 'hanya', 'saja', 'bahwa', 'apa', 'siapa', 'bagaimana', 'kapan',
        'dimana', 'kenapa', 'sejak', 'hingga', 'agar', 'supaya', 'meskipun', 'walau',
        'kecuali', 'terhadap', 'antara', 'selain', 'setiap', 'sebelum', 'sesudah'
    ]
    try:
        vectorizer = TfidfVectorizer(max_features=2000, ngram_range=(1, 2), stop_words=stop_words)
        tfidf_matrix = vectorizer.fit_transform(texts)
        logger.info(f"TF-IDF matrix shape: {tfidf_matrix.shape}")
        return vectorizer, tfidf_matrix
    except Exception as e:
        logger.error(f"Error setting up TF-IDF: {e}")
        print(f"Error setting up TF-IDF: {e}")
        raise

# Part 6: Extract Features
def extract_features(query_vec, doc_vec, query_text: str, doc_text: str) -> np.ndarray:
    """Extract features for Logistic Regression and SVM."""
    try:
        query_vec = query_vec.toarray()[0]
        doc_vec = doc_vec.toarray()[0]
        cos_sim = cosine_similarity([query_vec], [doc_vec])[0][0]
        query_words = set(query_text.split())
        doc_words = set(doc_text.split())
        overlap = len(query_words.intersection(doc_words)) / max(len(query_words), 1)
        coverage = overlap
        return np.concatenate([query_vec, doc_vec, [cos_sim, overlap, coverage]])
    except Exception as e:
        logger.error(f"Error extracting features: {e}")
        return np.zeros(4003)  # 2000 + 2000 + 3

# Part 7: Prepare Training Data
def prepare_training_data(queries, case_ids, texts, vectorizer, tfidf_matrix):
    """Prepare training data with balanced sampling."""
    X_train = []
    y_train = []
    for item in queries:
        query = preprocess_text(item['query'])
        query_vec = vectorizer.transform([query])
        true_id = item['case_id']
        try:
            true_idx = case_ids.index(true_id)
        except ValueError:
            logger.warning(f"Case ID {true_id} not found in dataset. Skipping query: {item['query'][:50]}...")
            continue
        true_vec = tfidf_matrix[true_idx]
        pos_features = extract_features(query_vec, true_vec, query, texts[true_idx])
        neg_indices = [i for i in range(len(case_ids)) if i != true_idx]
        neg_samples = np.random.choice(neg_indices, size=min(5, len(neg_indices)), replace=False)
        for neg_idx in neg_samples:
            neg_vec = tfidf_matrix[neg_idx]
            neg_features = extract_features(query_vec, neg_vec, query, texts[neg_idx])
            X_train.append(pos_features - neg_features)
            y_train.append(1)
            X_train.append(neg_features - pos_features)
            y_train.append(0)
    X_train = np.array(X_train)
    y_train = np.array(y_train)
    logger.info(f"Training data shape: {X_train.shape}")
    logger.info(f"Class distribution: {np.bincount(y_train)}")
    return X_train, y_train

# Part 8: Train Models
def train_models(X_train, y_train):
    """Train Logistic Regression and SVM with stratified cross-validation."""
    try:
        skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
        param_grid = {'C': [0.1, 1, 10]}
        logreg = GridSearchCV(
            LogisticRegression(max_iter=1000, class_weight='balanced', solver='liblinear'),
            param_grid, cv=skf, scoring='f1', n_jobs=-1
        )
        logreg.fit(X_train, y_train)
        logger.info(f"Best Logistic Regression Parameters: {logreg.best_params_}")
        logger.info(f"Best CV F1 Score (LogReg): {logreg.best_score_:.2f}")
        print(f"Best Logistic Regression Parameters: {logreg.best_params_}")
        print(f"Best CV F1 Score (LogReg): {logreg.best_score_:.2f}")
        svm = GridSearchCV(
            LinearSVC(max_iter=1000, class_weight='balanced', tol=1e-3),
            param_grid, cv=skf, scoring='f1', n_jobs=-1
        )
        svm.fit(X_train, y_train)
        logger.info(f"Best SVM Parameters: {svm.best_params_}")
        logger.info(f"Best CV F1 Score (SVM): {svm.best_score_:.2f}")
        print(f"Best SVM Parameters: {svm.best_params_}")
        print(f"Best CV F1 Score (SVM): {svm.best_score_:.2f}")
        return logreg, svm
    except Exception as e:
        logger.error(f"Error training models: {e}")
        print(f"Error training models: {e}")
        raise

# Part 9: Setup IndoBERT and BM25
def setup_indobert_and_bm25(texts: List[str]):
    """Initialize IndoBERT and BM25 models."""
    try:
        device = 'cuda' if torch.cuda.is_available() else 'cpu'
        logger.info(f"Loading IndoBERT model on {device}")
        bi_encoder = SentenceTransformer('indobenchmark/indobert-base-p1', device=device)
        cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2', device=device, max_length=512)
        doc_embeddings = bi_encoder.encode(
            texts, convert_to_numpy=True, normalize_embeddings=True, show_progress_bar=False, batch_size=16
        )
        bm25 = BM25Okapi([t.split() for t in texts])
        logger.info(f"Document embeddings shape: {doc_embeddings.shape}")
        return bi_encoder, cross_encoder, doc_embeddings, bm25
    except Exception as e:
        logger.error(f"Error setting up Indo-BERT/BM25: {e}")
        print(f"Error setting up Indo-BERT/BM25: {e}")
        raise

# Part 10: Retrieval Functions
def logreg_retrieve(query: str, vectorizer, tfidf_matrix, case_ids, texts, logreg, k: int = 5) -> List[str]:
    """Retrieve top-k cases using Logistic Regression."""
    try:
        query = preprocess_text(query)
        query_vec = vectorizer.transform([query])
        scores = []
        for i in range(tfidf_matrix.shape[0]):
            doc_vec = tfidf_matrix[i]
            features = extract_features(query_vec, doc_vec, query, texts[i])
            score = logreg.predict_proba([features])[0][1]  # Probability of positive class
            scores.append((case_ids[i], score))
        scores.sort(key=lambda x: x[1], reverse=True)
        return [x[0] for x in scores[:k]]
    except Exception as e:
        logger.error(f"Error in LogReg retrieval for query '{query[:50]}...': {e}")
        return []

def svm_retrieve(query: str, vectorizer, tfidf_matrix, case_ids, texts, svm, k: int = 5) -> List[str]:
    """Retrieve top-k cases using SVM."""
    try:
        query = preprocess_text(query)
        query_vec = vectorizer.transform([query])
        scores = []
        for i in range(tfidf_matrix.shape[0]):
            doc_vec = tfidf_matrix[i]
            features = extract_features(query_vec, doc_vec, query, texts[i])
            score = svm.decision_function([features])[0]
            scores.append((case_ids[i], score))
        scores.sort(key=lambda x: x[1], reverse=True)
        return [x[0] for x in scores[:k]]
    except Exception as e:
        logger.error(f"Error in SVM retrieval for query '{query[:50]}...': {e}")
        return []

def indobert_retrieve(query: str, bi_encoder, cross_encoder, doc_embeddings, bm25, case_ids, texts, k: int = 10, alpha: float = 0.7) -> List[str]:
    """Retrieve top-k cases using IndoBERT with BM25 hybrid scoring."""
    try:
        query = preprocess_text(query)
        query_vec = bi_encoder.encode([query], convert_to_numpy=True, normalize_embeddings=True)[0]
        sim_scores = cosine_similarity([query_vec], doc_embeddings)[0]
        bm25_scores = bm25.get_scores(query.split())
        bm25_scores /= np.max(bm25_scores) + 1e-10
        combined = alpha * sim_scores + (1 - alpha) * bm25_scores
        top_k_idx = np.argsort(combined)[-k:][::-1]
        rerank_pairs = [[query, texts[i]] for i in top_k_idx]
        rerank_scores = cross_encoder.predict(rerank_pairs)
        reranked_idx = np.argsort(rerank_scores)[::-1][:min(5, len(rerank_scores))]
        return [case_ids[top_k_idx[i]] for i in reranked_idx]
    except Exception as e:
        logger.error(f"Error in IndoBERT retrieval for query '{query[:50]}...': {e}")
        return []

# Part 11: Predict Outcome
def predict_outcome(query: str, retrieve_fn, case_solutions, true_solution=None):
    """Predict outcome based on retrieved cases."""
    try:
        top_5_ids = retrieve_fn(query)
        solutions = [case_solutions.get(cid, '') for cid in top_5_ids]
        filtered = [s for s in solutions if s and s not in ['nan', None]]
        predicted = max(set(filtered), key=filtered.count) if filtered else 'Tidak ditemukan'
        metrics = {}
        if true_solution and filtered:
            y_true = [1 if true_solution == s else 0 for s in solutions]
            y_pred = [1 if predicted == s else 0 for s in solutions]
            metrics = {
                'accuracy': accuracy_score(y_true, y_pred),
                'precision': precision_score(y_true, y_pred, average='binary', zero_division=0),
                'recall': recall_score(y_true, y_pred, average='binary', zero_division=0),
                'f1': f1_score(y_true, y_pred, average='binary', zero_division=0)
            }
        return predicted, top_5_ids, metrics
    except Exception as e:
        logger.error(f"Error predicting outcome for query '{query[:50]}...': {e}")
        return 'Tidak ditemukan', [], {}

# Part 12: Main Function
def main():
    """Main function to run prediction pipeline."""
    try:
        setup_environment()
        df, texts, case_ids, queries, case_solutions = load_data()
        vectorizer, tfidf_matrix = setup_tfidf(texts)
        X_train, y_train = prepare_training_data(queries, case_ids, texts, vectorizer, tfidf_matrix)
        logreg, svm = train_models(X_train, y_train)
        bi_encoder, cross_encoder, doc_embeddings, bm25 = setup_indobert_and_bm25(texts)
        results_logreg = []
        results_svm = []
        results_indobert = []
        for i in tqdm(range(len(df)), desc='Predicting'):
            query_text = df.loc[i, 'ringkasan_fakta']
            case_id = df.loc[i, 'case_id']
            true_solution = case_solutions.get(case_id, '')
            pred_logreg, top_ids_logreg, metrics_logreg = predict_outcome(
                query_text,
                lambda q: logreg_retrieve(q, vectorizer, tfidf_matrix, case_ids, texts, logreg),
                case_solutions,
                true_solution
            )
            results_logreg.append({
                'query_id': case_id,
                'predicted_solution': pred_logreg,
                'top_5_case_ids': ', '.join(top_ids_logreg),
                'metrics': metrics_logreg
            })
            pred_svm, top_ids_svm, metrics_svm = predict_outcome(
                query_text,
                lambda q: svm_retrieve(q, vectorizer, tfidf_matrix, case_ids, texts, svm),
                case_solutions,
                true_solution
            )
            results_svm.append({
                'query_id': case_id,
                'predicted_solution': pred_svm,
                'top_5_case_ids': ', '.join(top_ids_svm),
                'metrics': metrics_svm
            })
            pred_indobert, top_ids_indobert, metrics_indobert = predict_outcome(
                query_text,
                lambda q: indobert_retrieve(q, bi_encoder, cross_encoder, doc_embeddings, bm25, case_ids, texts),
                case_solutions,
                true_solution
            )
            results_indobert.append({
                'query_id': case_id,
                'predicted_solution': pred_indobert,
                'top_5_case_ids': ', '.join(top_ids_indobert),
                'metrics': metrics_indobert
            })
        pd.DataFrame(results_logreg).to_csv(
            os.path.join(PATH_RESULTS, 'logreg_predictions.csv'), index=False, encoding='utf-8'
        )
        pd.DataFrame(results_svm).to_csv(
            os.path.join(PATH_RESULTS, 'svm_predictions.csv'), index=False, encoding='utf-8'
        )
        pd.DataFrame(results_indobert).to_csv(
            os.path.join(PATH_RESULTS, 'indobert_predictions.csv'), index=False, encoding='utf-8'
        )
        logger.info(f"Saved results to {PATH_RESULTS}/logreg_predictions.csv")
        logger.info(f"Saved results to {PATH_RESULTS}/svm_predictions.csv")
        logger.info(f"Saved results to {PATH_RESULTS}/indobert_predictions.csv")
        print(f"✅ Saved to {PATH_RESULTS}/logreg_predictions.csv")
        print(f"✅ Saved to {PATH_RESULTS}/svm_predictions.csv")
        print(f"✅ Saved to {PATH_RESULTS}/indobert_predictions.csv")
    except Exception as e:
        logger.error(f"An error occurred in main: {e}")
        print(f"An error occurred: {e}")

if __name__ == "__main__":
    main()




2025-06-25 18:54:39,260 - INFO - Starting prediction process at 2025-06-25 18:54:39
2025-06-25 18:54:39,286 - INFO - Loaded 50 cases from d:\AL FITRA\STUDI UMM\SEMESTER 6\TEORI\PENALARAN KOMPUTER\SOURCE CODE\PENALARAN KOMPUTER\CBR_Penalararan_Komputer\data\processed\cases.csv and 5 queries from d:\AL FITRA\STUDI UMM\SEMESTER 6\TEORI\PENALARAN KOMPUTER\SOURCE CODE\PENALARAN KOMPUTER\CBR_Penalararan_Komputer\data\eval\queries.json
2025-06-25 18:54:39,293 - INFO - TF-IDF matrix shape: (50, 138)
2025-06-25 18:54:39,306 - INFO - Training data shape: (50, 279)
2025-06-25 18:54:39,307 - INFO - Class distribution: [25 25]
2025-06-25 18:54:41,479 - INFO - Best Logistic Regression Parameters: {'C': 0.1}
2025-06-25 18:54:41,481 - INFO - Best CV F1 Score (LogReg): 1.00
2025-06-25 18:54:41,520 - INFO - Best SVM Parameters: {'C': 0.1}
2025-06-25 18:54:41,521 - INFO - Best CV F1 Score (SVM): 1.00
2025-06-25 18:54:41,522 - INFO - Loading IndoBERT model on cpu
2025-06-25 18:54:41,526 - INFO - Load pret

Best Logistic Regression Parameters: {'C': 0.1}
Best CV F1 Score (LogReg): 1.00
Best SVM Parameters: {'C': 0.1}
Best CV F1 Score (SVM): 1.00


2025-06-25 18:54:48,911 - INFO - Document embeddings shape: (50, 768)
Predicting:   0%|          | 0/50 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:   2%|▏         | 1/50 [00:00<00:13,  3.61it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:   4%|▍         | 2/50 [00:00<00:10,  4.71it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:   6%|▌         | 3/50 [00:00<00:10,  4.50it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:   8%|▊         | 4/50 [00:00<00:10,  4.30it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:  10%|█         | 5/50 [00:01<00:09,  4.71it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:  12%|█▏        | 6/50 [00:01<00:09,  4.57it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:  14%|█▍        | 7/50 [00:01<00:09,  4.77it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:  16%|█▌        | 8/50 [00:01<00:09,  4.52it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:  18%|█▊        | 9/50 [00:02<00:09,  4.36it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:  20%|██        | 10/50 [00:02<00:08,  4.66it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:  22%|██▏       | 11/50 [00:02<00:08,  4.66it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:  24%|██▍       | 12/50 [00:02<00:08,  4.67it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:  26%|██▌       | 13/50 [00:02<00:08,  4.43it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:  28%|██▊       | 14/50 [00:03<00:07,  4.63it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:  30%|███       | 15/50 [00:03<00:07,  4.62it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:  32%|███▏      | 16/50 [00:03<00:07,  4.47it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:  34%|███▍      | 17/50 [00:03<00:06,  4.80it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:  36%|███▌      | 18/50 [00:03<00:06,  4.69it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:  38%|███▊      | 19/50 [00:04<00:06,  4.97it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:  40%|████      | 20/50 [00:04<00:06,  4.66it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:  42%|████▏     | 21/50 [00:04<00:06,  4.53it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:  44%|████▍     | 22/50 [00:04<00:05,  4.88it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:  46%|████▌     | 23/50 [00:04<00:05,  4.63it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:  48%|████▊     | 24/50 [00:05<00:05,  4.65it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:  50%|█████     | 25/50 [00:05<00:05,  4.57it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:  52%|█████▏    | 26/50 [00:05<00:05,  4.37it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:  54%|█████▍    | 27/50 [00:05<00:04,  4.75it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:  56%|█████▌    | 28/50 [00:06<00:04,  4.61it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:  58%|█████▊    | 29/50 [00:06<00:04,  4.59it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:  60%|██████    | 30/50 [00:06<00:04,  4.46it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:  62%|██████▏   | 31/50 [00:06<00:04,  4.39it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:  64%|██████▍   | 32/50 [00:07<00:04,  3.98it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:  66%|██████▌   | 33/50 [00:07<00:03,  4.32it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:  68%|██████▊   | 34/50 [00:07<00:03,  4.36it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:  70%|███████   | 35/50 [00:07<00:03,  4.25it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:  72%|███████▏  | 36/50 [00:07<00:03,  4.59it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:  74%|███████▍  | 37/50 [00:08<00:02,  4.44it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:  76%|███████▌  | 38/50 [00:08<00:02,  4.69it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:  78%|███████▊  | 39/50 [00:08<00:02,  4.62it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:  80%|████████  | 40/50 [00:08<00:02,  4.72it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:  82%|████████▏ | 41/50 [00:09<00:02,  4.41it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:  84%|████████▍ | 42/50 [00:09<00:01,  4.65it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:  86%|████████▌ | 43/50 [00:09<00:01,  4.49it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:  88%|████████▊ | 44/50 [00:09<00:01,  4.40it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:  90%|█████████ | 45/50 [00:09<00:01,  4.09it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:  92%|█████████▏| 46/50 [00:10<00:00,  4.39it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:  94%|█████████▍| 47/50 [00:10<00:00,  4.23it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:  96%|█████████▌| 48/50 [00:10<00:00,  4.55it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting:  98%|█████████▊| 49/50 [00:10<00:00,  4.36it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Predicting: 100%|██████████| 50/50 [00:11<00:00,  4.52it/s]
2025-06-25 18:54:59,978 - INFO - Saved results to d:\AL FITRA\STUDI UMM\SEMESTER 6\TEORI\PENALARAN KOMPUTER\SOURCE CODE\PENALARAN KOMPUTER\CBR_Penalararan_Komputer\data\results/logreg_predictions.csv
2025-06-25 18:54:59,979 - INFO - Saved results to d:\AL FITRA\STUDI UMM\SEMESTER 6\TEORI\PENALARAN KOMPUTER\SOURCE CODE\PENALARAN KOMPUTER\CBR_Penalararan_Komputer\data\results/svm_predictions.csv
2025-06-25 18:54:59,980 - INFO - Saved results to d:\AL FITRA\STUDI UMM\SEMESTER 6\TEORI\PENALARAN KOMPUTER\SOURCE CODE\PENALARAN KOMPUTER\CBR_Penalararan_Komputer\data\results/indobert_predictions.csv


✅ Saved to d:\AL FITRA\STUDI UMM\SEMESTER 6\TEORI\PENALARAN KOMPUTER\SOURCE CODE\PENALARAN KOMPUTER\CBR_Penalararan_Komputer\data\results/logreg_predictions.csv
✅ Saved to d:\AL FITRA\STUDI UMM\SEMESTER 6\TEORI\PENALARAN KOMPUTER\SOURCE CODE\PENALARAN KOMPUTER\CBR_Penalararan_Komputer\data\results/svm_predictions.csv
✅ Saved to d:\AL FITRA\STUDI UMM\SEMESTER 6\TEORI\PENALARAN KOMPUTER\SOURCE CODE\PENALARAN KOMPUTER\CBR_Penalararan_Komputer\data\results/indobert_predictions.csv
