In [None]:
import os
import json
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans, DBSCAN, AgglomerativeClustering
from sklearn.metrics import silhouette_score, davies_bouldin_score, adjusted_rand_score
from scipy.spatial.distance import cdist
import joblib

# ✅ Read parameters from JSON file
PARAMS_FILE = "test_params.json"
if os.path.exists(PARAMS_FILE):
    with open(PARAMS_FILE, "r") as f:
        params = json.load(f)
    dataset_path = params.get("dataset_path", "")
    selected_model = params.get("model", "kmeans").lower()
else:
    print("❌ Error: JSON parameters file not found.")
    exit(1)

# ✅ Ensure dataset exists
if not os.path.exists(dataset_path):
    print(f"❌ Error: Dataset file '{dataset_path}' not found.")
    exit(1)

# ✅ Load dataset
df = pd.read_excel(dataset_path, engine="openpyxl")

if "Description" not in df.columns:
    print("❌ Error: 'Description' column is required in the dataset.")
    exit(1)

# ✅ Load the pre-trained TF-IDF vectorizer
tfidf_path = "mlmodels/tfidf_vectorizer.pkl"
if not os.path.exists(tfidf_path):
    print(f"❌ Error: TF-IDF vectorizer '{tfidf_path}' not found. Please retrain the model.")
    exit(1)

tfidf = joblib.load(tfidf_path)  # Load the same TF-IDF used in training

# ✅ Transform test data using the same TF-IDF vocabulary
X_test = tfidf.transform(df['Description'])

# ✅ Load the pre-trained model
model_path = f"mlmodels/{selected_model}_model.pkl"
if not os.path.exists(model_path):
    print(f"❌ Error: Model file '{model_path}' not found.")
    exit(1)

model = joblib.load(model_path)

# ✅ Predict Clusters
try:
    predictions = model.predict(X_test.toarray())
except Exception:
    print("❌ Error: Model prediction failed. Check compatibility.")
    exit(1)

df['Predicted_Cluster'] = predictions

# ✅ Save results
df.to_csv("test_results.csv", index=False)

print("✅ Testing complete. Results saved successfully.")
