**Connect Through SQLite Database**

In [1]:
import sqlite3
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity
from mlxtend.frequent_patterns import apriori, association_rules
import warnings
warnings.filterwarnings('ignore')

In [2]:
#Establish a connection
connection = sqlite3.connect('Symptoms Database.db')
# Create a cursor object
cursor = connection.cursor()

In [3]:
# Fetch data from the database
query = "SELECT * FROM ai_symptom_picker"
data = pd.read_sql_query(query, connection)
# Display the first few rows of the dataframe
print(data.head())

   gender age                                            summary  \
0    male  28  {"diseases": [], "procedures": [], "no_symptom...   
1    male  27  {"diseases": [], "procedures": [], "no_symptom...   
2  female  26  {"diseases": [], "procedures": [], "no_symptom...   
3    male  42  {"diseases": [], "procedures": [], "no_symptom...   
4  female  40  {"diseases": [], "procedures": [], "no_symptom...   

     search_term  
0    มีเสมหะ, ไอ  
1  ไอ, น้ำมูกไหล  
2        ปวดท้อง  
3      น้ำมูกไหล  
4         ตาแห้ง  


In [4]:
Symptoms = data
Symptoms

Unnamed: 0,gender,age,summary,search_term
0,male,28,"{""diseases"": [], ""procedures"": [], ""no_symptom...","มีเสมหะ, ไอ"
1,male,27,"{""diseases"": [], ""procedures"": [], ""no_symptom...","ไอ, น้ำมูกไหล"
2,female,26,"{""diseases"": [], ""procedures"": [], ""no_symptom...",ปวดท้อง
3,male,42,"{""diseases"": [], ""procedures"": [], ""no_symptom...",น้ำมูกไหล
4,female,40,"{""diseases"": [], ""procedures"": [], ""no_symptom...",ตาแห้ง
...,...,...,...,...
995,male,38,"{""diseases"": [], ""procedures"": [], ""no_symptom...","บวม, ปวดข้อ"
996,male,33,"{""diseases"": [], ""procedures"": [], ""no_symptom...",เจ็บคอ
997,male,45,"{""diseases"": [], ""procedures"": [], ""no_symptom...",ไข้
998,female,73,"{""diseases"": [], ""procedures"": [], ""no_symptom...",ไข้


In [5]:
Symptoms.head()

Unnamed: 0,gender,age,summary,search_term
0,male,28,"{""diseases"": [], ""procedures"": [], ""no_symptom...","มีเสมหะ, ไอ"
1,male,27,"{""diseases"": [], ""procedures"": [], ""no_symptom...","ไอ, น้ำมูกไหล"
2,female,26,"{""diseases"": [], ""procedures"": [], ""no_symptom...",ปวดท้อง
3,male,42,"{""diseases"": [], ""procedures"": [], ""no_symptom...",น้ำมูกไหล
4,female,40,"{""diseases"": [], ""procedures"": [], ""no_symptom...",ตาแห้ง


# **Association Rules** For extracting relevant symptoms and match them with another symptom set

In [6]:
class SymptomRecommender:
    """
    A recommendation system for symptoms based on association rules and similarity.
    Works like Netflix - recommends next possible symptoms based on current symptoms.
    """
    
    def __init__(self, symptoms_data):
        self.data = symptoms_data
        self.symptoms_encoded = None
        self.rules = None
        self.symptom_vectors = None
        self.all_symptoms_list = []
        
    def preprocess_data(self):
        """Preprocess and encode symptoms into binary matrix"""
        # Use search_term column (Thai symptoms)
        if 'search_term' in self.data.columns:
            self.data['symptoms_clean'] = self.data['search_term'].fillna('')
        elif 'symptoms_note_clean' in self.data.columns:
            self.data['symptoms_clean'] = self.data['symptoms_note_clean'].fillna('')
        else:
            raise ValueError("No symptom column found. Expected 'search_term' or 'symptoms_note_clean'")
        
        # Split symptoms into list (symptoms are separated by commas)
        self.data['symptoms_list'] = self.data['symptoms_clean'].str.split(',')
        
        # Get unique symptoms
        all_symptoms = set()
        for symptoms in self.data['symptoms_list']:
            if isinstance(symptoms, list):
                cleaned_symptoms = [s.strip() for s in symptoms if s and s.strip()]
                all_symptoms.update(cleaned_symptoms)
        
        self.all_symptoms_list = sorted(list(all_symptoms))
        
        # Create one-hot encoded matrix
        symptoms_encoded = pd.DataFrame(0, index=self.data.index, columns=self.all_symptoms_list)
        
        for idx, symptoms in enumerate(self.data['symptoms_list']):
            if isinstance(symptoms, list):
                for symptom in symptoms:
                    if symptom and symptom.strip():
                        symptoms_encoded.loc[idx, symptom.strip()] = 1
        
        self.symptoms_encoded = symptoms_encoded.fillna(0)
        return self.symptoms_encoded
    
    def build_association_rules(self, min_support=0.01, min_confidence=0.3):
        """Build association rules from symptom patterns"""
        symptoms_bool = self.symptoms_encoded.astype(bool)
        
        # Find frequent itemsets
        frequent_itemsets = apriori(
            symptoms_bool, 
            min_support=min_support, 
            use_colnames=True,
            max_len=5
        )
        
        if len(frequent_itemsets) == 0:
            print("Warning: No frequent itemsets found. Try lowering min_support.")
            return None
        
        # Generate association rules
        self.rules = association_rules(
            frequent_itemsets, 
            metric="lift",
            min_threshold=1.0
        )
        
        print(f"Generated {len(self.rules)} association rules")
        return self.rules
    
    def build_symptom_vectors(self):
        """Create symptom co-occurrence vectors for similarity calculation"""
        # Calculate symptom co-occurrence matrix
        co_occurrence = self.symptoms_encoded.T.dot(self.symptoms_encoded)
        
        # Normalize by symptom frequency
        symptom_freq = self.symptoms_encoded.sum(axis=0)
        self.symptom_vectors = co_occurrence.div(symptom_freq, axis=0).fillna(0)
        
        return self.symptom_vectors
    
    def recommend_symptoms(self, input_symptoms, age=None, gender=None, top_n=10, method='hybrid'):
        """
        Recommend next possible symptoms based on input symptoms.
        
        Parameters:
        -----------
        input_symptoms : str or list
            Current symptoms (e.g., "ท้องแสบ" or ["ท้องแสบ", "ปวดท้อง"])
        age : int, optional
            Patient age
        gender : str, optional
            Patient gender (Male/Female)
        top_n : int
            Number of recommendations to return
        method : str
            'rules', 'similarity', or 'hybrid' (default)
        
        Returns:
        --------
        list : Recommended symptoms with scores
        """
        # Parse input symptoms
        if isinstance(input_symptoms, str):
            input_symptoms = [s.strip() for s in input_symptoms.split(',') if s.strip()]
        
        # Validate input symptoms exist in our data
        valid_symptoms = [s for s in input_symptoms if s in self.all_symptoms_list]
        
        if not valid_symptoms:
            return {
                'recommendations': [],
                'message': f"No matching symptoms found in database. Input: {input_symptoms}"
            }
        
        recommendations = {}
        
        # Method 1: Association Rules
        if method in ['rules', 'hybrid'] and self.rules is not None:
            rule_recs = self._recommend_from_rules(valid_symptoms)
            for symptom, score in rule_recs.items():
                recommendations[symptom] = recommendations.get(symptom, 0) + score
        
        # Method 2: Similarity-based
        if method in ['similarity', 'hybrid']:
            sim_recs = self._recommend_from_similarity(valid_symptoms)
            for symptom, score in sim_recs.items():
                recommendations[symptom] = recommendations.get(symptom, 0) + score * 0.5
        
        # Remove input symptoms from recommendations
        for symptom in valid_symptoms:
            recommendations.pop(symptom, None)
        
        # Filter by demographics if provided
        if age or gender:
            recommendations = self._filter_by_demographics(recommendations, age, gender)
        
        # Sort by score
        sorted_recs = sorted(recommendations.items(), key=lambda x: x[1], reverse=True)[:top_n]
        
        return {
            'input_symptoms': valid_symptoms,
            'recommendations': [
                {'symptom': symptom, 'score': round(score, 3)} 
                for symptom, score in sorted_recs
            ],
            'patient_info': {
                'age': age,
                'gender': gender
            }
        }
    
    def _recommend_from_rules(self, input_symptoms):
        """Recommend based on association rules"""
        recommendations = {}
        
        for _, rule in self.rules.iterrows():
            antecedents = set(rule['antecedents'])
            consequents = set(rule['consequents'])
            
            # Check if input symptoms match antecedents
            if antecedents.intersection(set(input_symptoms)):
                # Recommend consequents
                for symptom in consequents:
                    if symptom not in input_symptoms:
                        score = rule['lift'] * rule['confidence']
                        recommendations[symptom] = max(recommendations.get(symptom, 0), score)
        
        return recommendations
    
    def _recommend_from_similarity(self, input_symptoms):
        """Recommend based on symptom similarity"""
        recommendations = {}
        
        if self.symptom_vectors is None:
            return recommendations
        
        for input_symptom in input_symptoms:
            if input_symptom in self.symptom_vectors.index:
                symptom_vec = self.symptom_vectors.loc[input_symptom]
                
                # Find similar symptoms
                for other_symptom in self.all_symptoms_list:
                    if other_symptom != input_symptom and other_symptom in self.symptom_vectors.index:
                        similarity = symptom_vec[other_symptom]
                        recommendations[other_symptom] = max(
                            recommendations.get(other_symptom, 0), 
                            similarity
                        )
        
        return recommendations
    
    def _filter_by_demographics(self, recommendations, age, gender):
        """Filter recommendations based on patient demographics"""
        # This is a placeholder - implement based on your data structure
        # You can analyze which symptoms are more common for specific age/gender groups
        return recommendations
    
    def fit(self):
        """Train the recommendation system"""
        print("Building recommendation system...")
        self.preprocess_data()
        self.build_association_rules()
        self.build_symptom_vectors()
        print("Recommendation system ready!")
        return self


In [7]:
# Initialize and train the recommender
print("="*60)
print("INITIALIZING SYMPTOM RECOMMENDER")
print("="*60)
print(f"Data shape: {Symptoms.shape}")
print(f"Columns: {list(Symptoms.columns)}")
print(f"\nSample symptoms from search_term:")
print(Symptoms['search_term'].head(10).tolist())

INITIALIZING SYMPTOM RECOMMENDER
Data shape: (1000, 4)
Columns: ['gender', 'age', 'summary', 'search_term']

Sample symptoms from search_term:
['มีเสมหะ, ไอ', 'ไอ, น้ำมูกไหล', 'ปวดท้อง', 'น้ำมูกไหล', 'ตาแห้ง', 'ปวดกระดูก', 'น้ำมูกไหล, คันจมูกจามบ่อย, ไอ,', 'ปวดท้อง', 'คันคอ, ไอ', 'ไอ']


In [8]:
recommender = SymptomRecommender(Symptoms)
recommender.fit()

Building recommendation system...
Generated 12 association rules
Recommendation system ready!


<__main__.SymptomRecommender at 0x1e9941b64d0>

In [9]:
print(f"\nTotal unique symptoms found: {len(recommender.all_symptoms_list)}")
print(f"Sample symptoms: {recommender.all_symptoms_list[:10]}")


Total unique symptoms found: 268
Sample symptoms: ['Animal bite', 'Blurry vision', 'Decreased stool caliber', 'Dizzy', 'DizzyBlack out', 'Dry throat', 'Ear discharge', 'Ear dischargeEar pain', 'Ear pain', 'Eye discharge']


In [10]:
# Example Usage
print("\n" + "="*60)
print("SYMPTOM RECOMMENDATION SYSTEM")
print("="*60)


SYMPTOM RECOMMENDATION SYSTEM


In [11]:
# Example 2: Multiple symptoms
print("\n" + "-"*60)
print("\nExample 2: Male, 26 years old, ปวดท้อง")
try:
    result2 = recommender.recommend_symptoms(
        input_symptoms="ปวดท้อง",
        age=26,
        gender="Female",
        top_n=10
    )

    if isinstance(result2, dict) and 'recommendations' in result2:
        if result2['recommendations']:
            print(f"\nInput symptoms: {result2.get('input_symptoms', 'N/A')}")
            print("\nRecommended next symptoms:")
            for i, rec in enumerate(result2['recommendations'], 1):
                print(f"  {i}. {rec['symptom']:<40} (score: {rec['score']:.3f})")
        else:
            print(f"No recommendations found. {result2.get('message', 'No matching symptoms in database')}")
    else:
        print(f"Unexpected result format: {result2}")
except Exception as e:
    print(f"Error in Example 2: {str(e)}")


------------------------------------------------------------

Example 2: Male, 26 years old, ปวดท้อง

Input symptoms: ['ปวดท้อง']

Recommended next symptoms:
  1. จุกแน่นท้อง                              (score: 0.071)
  2. ท้องเสีย                                 (score: 0.061)
  3. คลื่นไส้                                 (score: 0.020)
  4. คลื่นไส้อาเจียน                          (score: 0.020)
  5. อาเจียน                                  (score: 0.020)
  6. อาเจียนคลื่นไส้                          (score: 0.020)
  7. แสบท้อง                                  (score: 0.020)
  8. ถ่ายเหลว                                 (score: 0.010)
  9. ท้องผูก                                  (score: 0.010)
  10. ปวดท้องน้อย                              (score: 0.010)


In [12]:
# Example 2: Multiple symptoms
print("\n" + "-"*60)
print("\nExample 2: Female, 35 years old, ไอ, มีเสมหะคัดจมูก")
try:
    result2 = recommender.recommend_symptoms(
        input_symptoms="ไอ, มีเสมหะคัดจมูก",
        age=26,
        gender="Female",
        top_n=10
    )

    if isinstance(result2, dict) and 'recommendations' in result2:
        if result2['recommendations']:
            print(f"\nInput symptoms: {result2.get('input_symptoms', 'N/A')}")
            print("\nRecommended next symptoms:")
            for i, rec in enumerate(result2['recommendations'], 1):
                print(f"  {i}. {rec['symptom']:<40} (score: {rec['score']:.3f})")
        else:
            print(f"No recommendations found. {result2.get('message', 'No matching symptoms in database')}")
    else:
        print(f"Unexpected result format: {result2}")
except Exception as e:
    print(f"Error in Example 2: {str(e)}")


------------------------------------------------------------

Example 2: Female, 35 years old, ไอ, มีเสมหะคัดจมูก

Input symptoms: ['ไอ', 'มีเสมหะคัดจมูก']

Recommended next symptoms:
  1. มีเสมหะ                                  (score: 0.495)
  2. น้ำมูกไหล                                (score: 0.471)
  3. เจ็บคอ                                   (score: 0.029)
  4. คัดจมูก                                  (score: 0.017)
  5. มีเสมหะน้ำมูกไหล                         (score: 0.017)
  6. น้ำมูกไหลมีเสมหะ                         (score: 0.014)
  7. ไข้                                      (score: 0.014)
  8. ไอกลางคืน                                (score: 0.014)
  9. กลืนเจ็บ                                 (score: 0.009)
  10. คอแห้ง                                   (score: 0.006)


In [13]:
# Example 3: Complex case
print("\n" + "-"*60)
print("\nExample 3: Male, 42 years old, Fever")
try:
    result3 = recommender.recommend_symptoms(
        input_symptoms="Fever",
        age=42,
        gender="Male",
        top_n=10,
        method='hybrid'  # Use both rules and similarity
    )

    if isinstance(result3, dict) and 'recommendations' in result3:
        if result3['recommendations']:
            print(f"\nInput symptoms: {result3.get('input_symptoms', 'N/A')}")
            print("\nRecommended next symptoms:")
            for i, rec in enumerate(result3['recommendations'], 1):
                print(f"  {i}. {rec['symptom']:<40} (score: {rec['score']:.3f})")
        else:
            print(f"No recommendations found. {result3.get('message', 'No matching symptoms in database')}")
    else:
        print(f"Unexpected result format: {result3}")
except Exception as e:
    print(f"Error in Example 3: {str(e)}")


------------------------------------------------------------

Example 3: Male, 42 years old, Fever

Input symptoms: ['Fever']

Recommended next symptoms:
  1. Headache                                 (score: 0.050)
  2. Eye pain                                 (score: 0.025)
  3. Headachecough                            (score: 0.025)
  4. Runny nose                               (score: 0.025)
  5. Sore throat                              (score: 0.025)
  6. Sore throatStuffy nose                   (score: 0.025)
  7. cough                                    (score: 0.025)
  8. coughLightheaded                         (score: 0.025)
  9. exertion fatique                         (score: 0.025)
  10. sneezing                                 (score: 0.025)


In [14]:
# Function for interactive use
def get_symptom_recommendations(symptoms, age=None, gender=None, top_n=10):
    """
    Easy-to-use function for getting symptom recommendations
    
    Usage:
        get_symptom_recommendations("ท้องแสบ", age=26, gender="Male")
    """
    return recommender.recommend_symptoms(symptoms, age, gender, top_n)

In [15]:
print("\n" + "="*60)
print("You can now use: get_symptom_recommendations('your_symptom', age, gender)")
print("="*60)


You can now use: get_symptom_recommendations('your_symptom', age, gender)


In [16]:
# Test the interactive function
test_result = get_symptom_recommendations("ดาแท้ง", age=40, gender="Female", top_n=5)

if isinstance(test_result, dict) and test_result.get('recommendations'):
    print(f"\n✅ Function Test: get_symptom_recommendations('ดาแท้ง', age=40, gender='Female', top_n=5)")
    print(f"   Input: {test_result.get('input_symptoms', 'N/A')}")
    print("   Top 5 Recommendations:")
    for i, rec in enumerate(test_result['recommendations'], 1):
        print(f"     {i}. {rec['symptom']} (score: {rec['score']:.3f})")
else:
    print(f"   Result: {test_result.get('message', 'No recommendations')}")

   Result: No matching symptoms found in database. Input: ['ดาแท้ง']


In [17]:
print("\n" + "="*60)
print("✅ System Ready! Use get_symptom_recommendations() for quick queries")
print("="*60)


✅ System Ready! Use get_symptom_recommendations() for quick queries


# ============================================================================
# EVALUATION METRICS
# ============================================================================

In [18]:
class RecommenderEvaluator:
    """
    Evaluate the symptom recommendation system using various metrics.
    Similar to how Netflix evaluates recommendation quality.
    """
    
    def __init__(self, recommender, data):
        self.recommender = recommender
        self.data = data
        
    def train_test_split(self, test_size=0.2, random_state=42):
        """Split data into train and test sets"""
        np.random.seed(random_state)
        indices = np.random.permutation(len(self.data))
        test_size_n = int(len(self.data) * test_size)
        
        test_indices = indices[:test_size_n]
        train_indices = indices[test_size_n:]
        
        return train_indices, test_indices
    
    def precision_at_k(self, recommendations, actual_symptoms, k=10):
        """
        Precision@K: How many recommended symptoms are actually relevant?
        """
        if not recommendations or not actual_symptoms:
            return 0.0
        
        top_k_recs = [r['symptom'] for r in recommendations[:k]]
        relevant = len(set(top_k_recs) & set(actual_symptoms))
        
        return relevant / min(k, len(top_k_recs)) if top_k_recs else 0.0
    
    def recall_at_k(self, recommendations, actual_symptoms, k=10):
        """
        Recall@K: How many actual symptoms did we recommend?
        """
        if not actual_symptoms:
            return 0.0
        
        top_k_recs = [r['symptom'] for r in recommendations[:k]]
        relevant = len(set(top_k_recs) & set(actual_symptoms))
        
        return relevant / len(actual_symptoms)
    
    def mean_average_precision(self, recommendations, actual_symptoms, k=10):
        """
        MAP@K: Average precision across different positions
        """
        if not actual_symptoms:
            return 0.0
        
        top_k_recs = [r['symptom'] for r in recommendations[:k]]
        
        score = 0.0
        num_hits = 0.0
        
        for i, rec in enumerate(top_k_recs):
            if rec in actual_symptoms:
                num_hits += 1.0
                score += num_hits / (i + 1.0)
        
        return score / min(len(actual_symptoms), k) if actual_symptoms else 0.0
    
    def ndcg_at_k(self, recommendations, actual_symptoms, k=10):
        """
        NDCG@K: Normalized Discounted Cumulative Gain
        Measures ranking quality (position matters)
        """
        if not actual_symptoms:
            return 0.0
        
        top_k_recs = [r['symptom'] for r in recommendations[:k]]
        
        # DCG
        dcg = 0.0
        for i, rec in enumerate(top_k_recs):
            if rec in actual_symptoms:
                dcg += 1.0 / np.log2(i + 2)  # +2 because log2(1) = 0
        
        # IDCG (ideal DCG)
        idcg = sum(1.0 / np.log2(i + 2) for i in range(min(len(actual_symptoms), k)))
        
        return dcg / idcg if idcg > 0 else 0.0
    
    def hit_rate_at_k(self, recommendations, actual_symptoms, k=10):
        """
        Hit Rate@K: Did we recommend at least one relevant symptom?
        """
        if not actual_symptoms:
            return 0.0
        
        top_k_recs = [r['symptom'] for r in recommendations[:k]]
        return 1.0 if set(top_k_recs) & set(actual_symptoms) else 0.0
    
    def coverage(self, all_recommendations):
        """
        Coverage: What percentage of all symptoms can the system recommend?
        """
        recommended_symptoms = set()
        for recs in all_recommendations:
            recommended_symptoms.update([r['symptom'] for r in recs])
        
        total_symptoms = len(self.recommender.all_symptoms_list)
        return len(recommended_symptoms) / total_symptoms if total_symptoms > 0 else 0.0
    
    def diversity(self, recommendations, k=10):
        """
        Diversity: How different are the recommendations from each other?
        """
        if len(recommendations) < 2:
            return 0.0
        
        top_k_recs = [r['symptom'] for r in recommendations[:k]]
        
        if len(top_k_recs) < 2:
            return 0.0
        
        # Calculate pairwise diversity
        diversity_sum = 0.0
        count = 0
        
        for i in range(len(top_k_recs)):
            for j in range(i + 1, len(top_k_recs)):
                sym1, sym2 = top_k_recs[i], top_k_recs[j]
                if sym1 in self.recommender.symptom_vectors.index and \
                   sym2 in self.recommender.symptom_vectors.index:
                    sim = self.recommender.symptom_vectors.loc[sym1, sym2]
                    diversity_sum += (1 - sim)
                    count += 1
        
        return diversity_sum / count if count > 0 else 0.0
    
    def evaluate(self, test_indices=None, k_values=[5, 10, 20], sample_size=100):
        """
        Comprehensive evaluation of the recommendation system
        """
        if test_indices is None:
            _, test_indices = self.train_test_split()
        
        all_metrics = {k: {
            'precision': [],
            'recall': [],
            'map': [],
            'ndcg': [],
            'hit_rate': []
        } for k in k_values}
        
        all_recommendations = []
        diversity_scores = []
        
        print("\nEvaluating recommendation system...")
        print("="*60)
        
        successful_tests = 0
        failed_tests = 0
        
        for idx in test_indices[:sample_size]:
            row = self.data.iloc[idx]
            symptoms_list = row['symptoms_list']
            
            # Skip invalid rows
            if not isinstance(symptoms_list, list) or len(symptoms_list) < 2:
                continue
            
            # Clean symptoms list
            symptoms_list = [s.strip() for s in symptoms_list if s and str(s).strip()]
            
            if len(symptoms_list) < 2:
                continue
            
            # Use first symptom as input, rest as ground truth
            # For single-symptom testing (more realistic)
            input_symptoms = [symptoms_list[0]]
            actual_symptoms = symptoms_list[1:]
            
            # Get recommendations
            try:
                result = self.recommender.recommend_symptoms(
                    input_symptoms=input_symptoms,
                    top_n=max(k_values),
                    method='hybrid'
                )
                
                if not isinstance(result, dict) or 'recommendations' not in result:
                    failed_tests += 1
                    continue
                    
                recommendations = result['recommendations']
                
                if not recommendations:
                    failed_tests += 1
                    continue
                
                all_recommendations.append(recommendations)
                successful_tests += 1
                
                # Calculate diversity
                div = self.diversity(recommendations, k=10)
                diversity_scores.append(div)
                
                # Calculate metrics for each k
                for k in k_values:
                    all_metrics[k]['precision'].append(
                        self.precision_at_k(recommendations, actual_symptoms, k)
                    )
                    all_metrics[k]['recall'].append(
                        self.recall_at_k(recommendations, actual_symptoms, k)
                    )
                    all_metrics[k]['map'].append(
                        self.mean_average_precision(recommendations, actual_symptoms, k)
                    )
                    all_metrics[k]['ndcg'].append(
                        self.ndcg_at_k(recommendations, actual_symptoms, k)
                    )
                    all_metrics[k]['hit_rate'].append(
                        self.hit_rate_at_k(recommendations, actual_symptoms, k)
                    )
                    
            except Exception as e:
                failed_tests += 1
                continue
        
        print(f"\n✓ Successful tests: {successful_tests}")
        print(f"✗ Failed tests: {failed_tests}")
        
        if successful_tests == 0:
            print("\n⚠️  WARNING: No successful recommendations generated!")
            print("   This might indicate:")
            print("   - No association rules found (try lowering min_support)")
            print("   - Symptoms in test set don't match training data")
            print("   - Data format issues")
            return None
        
        # Calculate coverage
        coverage_score = self.coverage(all_recommendations)
        
        # Print results
        print("\n📊 EVALUATION RESULTS")
        print("="*60)
        
        for k in k_values:
            if all_metrics[k]['precision']:
                print(f"\n🎯 Metrics @ K={k}:")
                print(f"  Precision@{k}:    {np.mean(all_metrics[k]['precision']):.4f}")
                print(f"  Recall@{k}:       {np.mean(all_metrics[k]['recall']):.4f}")
                print(f"  MAP@{k}:          {np.mean(all_metrics[k]['map']):.4f}")
                print(f"  NDCG@{k}:         {np.mean(all_metrics[k]['ndcg']):.4f}")
                print(f"  Hit Rate@{k}:     {np.mean(all_metrics[k]['hit_rate']):.4f}")
        
        print(f"\n📈 System Metrics:")
        print(f"  Coverage:         {coverage_score:.4f}")
        if diversity_scores:
            print(f"  Avg Diversity:    {np.mean(diversity_scores):.4f}")
        
        print("\n" + "="*60)
        
        # Return summary
        return {
            'metrics_by_k': all_metrics,
            'coverage': coverage_score,
            'diversity': np.mean(diversity_scores) if diversity_scores else 0.0,
            'successful_tests': successful_tests,
            'failed_tests': failed_tests,
            'summary': {
                f'precision@{k}': np.mean(all_metrics[k]['precision']) if all_metrics[k]['precision'] else 0.0
                for k in k_values
            }
        }
    
    def cross_validate(self, n_folds=5, k=10, sample_per_fold=50):
        """
        K-fold cross validation
        """
        print(f"\n🔄 Running {n_folds}-Fold Cross Validation...")
        print("="*60)
        
        # Get valid indices (rows with at least 2 symptoms)
        valid_indices = []
        for idx in range(len(self.data)):
            row = self.data.iloc[idx]
            symptoms_list = row['symptoms_list']
            if isinstance(symptoms_list, list):
                symptoms_list = [s.strip() for s in symptoms_list if s and str(s).strip()]
                if len(symptoms_list) >= 2:
                    valid_indices.append(idx)
        
        if len(valid_indices) < n_folds:
            print(f"⚠️  Not enough valid samples ({len(valid_indices)}) for {n_folds}-fold CV")
            return None
        
        # Shuffle valid indices
        np.random.seed(42)
        valid_indices = np.array(valid_indices)
        np.random.shuffle(valid_indices)
        
        fold_size = len(valid_indices) // n_folds
        all_fold_results = []
        
        for fold in range(n_folds):
            print(f"\nFold {fold + 1}/{n_folds}")
            start_idx = fold * fold_size
            end_idx = start_idx + fold_size if fold < n_folds - 1 else len(valid_indices)
            
            test_indices = valid_indices[start_idx:end_idx]
            
            # Evaluate this fold
            fold_metrics = {
                'precision': [],
                'recall': [],
                'ndcg': []
            }
            
            successful = 0
            attempted = 0
            
            for idx in test_indices[:sample_per_fold]:
                attempted += 1
                row = self.data.iloc[idx]
                symptoms_list = row['symptoms_list']
                
                # Clean symptoms
                symptoms_list = [s.strip() for s in symptoms_list if s and str(s).strip()]
                
                if len(symptoms_list) < 2:
                    continue
                
                input_symptoms = [symptoms_list[0]]
                actual_symptoms = symptoms_list[1:]
                
                try:
                    result = self.recommender.recommend_symptoms(
                        input_symptoms=input_symptoms,
                        top_n=k,
                        method='hybrid'
                    )
                    
                    if not isinstance(result, dict) or 'recommendations' not in result:
                        continue
                        
                    recommendations = result['recommendations']
                    
                    if not recommendations:
                        continue
                    
                    prec = self.precision_at_k(recommendations, actual_symptoms, k)
                    rec = self.recall_at_k(recommendations, actual_symptoms, k)
                    ndcg = self.ndcg_at_k(recommendations, actual_symptoms, k)
                    
                    fold_metrics['precision'].append(prec)
                    fold_metrics['recall'].append(rec)
                    fold_metrics['ndcg'].append(ndcg)
                    successful += 1
                    
                except Exception as e:
                    continue
            
            print(f"  Attempted: {attempted}, Successful: {successful}")
            
            if fold_metrics['precision']:
                fold_result = {
                    'precision': np.mean(fold_metrics['precision']),
                    'recall': np.mean(fold_metrics['recall']),
                    'ndcg': np.mean(fold_metrics['ndcg']),
                    'n_samples': successful
                }
                all_fold_results.append(fold_result)
                
                print(f"  Precision@{k}: {fold_result['precision']:.4f}")
                print(f"  Recall@{k}:    {fold_result['recall']:.4f}")
                print(f"  NDCG@{k}:      {fold_result['ndcg']:.4f}")
            else:
                print(f"  ⚠️  No successful predictions in this fold")
        
        if not all_fold_results:
            print("\n⚠️  WARNING: No successful folds!")
            print("   Try increasing sample_per_fold or checking data quality")
            return None
        
        # Calculate average across folds
        avg_results = {
            'precision': np.mean([f['precision'] for f in all_fold_results]),
            'recall': np.mean([f['recall'] for f in all_fold_results]),
            'ndcg': np.mean([f['ndcg'] for f in all_fold_results]),
            'precision_std': np.std([f['precision'] for f in all_fold_results]),
            'recall_std': np.std([f['recall'] for f in all_fold_results]),
            'ndcg_std': np.std([f['ndcg'] for f in all_fold_results]),
            'n_folds_successful': len(all_fold_results)
        }
        
        print(f"\n📊 Cross-Validation Results (K={k}):")
        print("="*60)
        print(f"  Successful Folds: {avg_results['n_folds_successful']}/{n_folds}")
        print(f"  Precision@{k}: {avg_results['precision']:.4f} ± {avg_results['precision_std']:.4f}")
        print(f"  Recall@{k}:    {avg_results['recall']:.4f} ± {avg_results['recall_std']:.4f}")
        print(f"  NDCG@{k}:      {avg_results['ndcg']:.4f} ± {avg_results['ndcg_std']:.4f}")
        print("="*60)
        
        return avg_results

In [19]:
# Initialize evaluator
evaluator = RecommenderEvaluator(recommender, Symptoms)

In [20]:
# Pre-evaluation diagnostics
print("\n" + "="*70)
print("PRE-EVALUATION DIAGNOSTICS")
print("="*70)


PRE-EVALUATION DIAGNOSTICS


In [21]:
# Test with a few random symptoms
print("\nTesting recommendation generation with sample symptoms:")
test_symptoms = recommender.all_symptoms_list[:5]  # Take first 5 symptoms
for sym in test_symptoms:
    result = recommender.recommend_symptoms(sym, top_n=3)
    if result['recommendations']:
        print(f"✓ '{sym}' → {len(result['recommendations'])} recommendations")
    else:
        print(f"✗ '{sym}' → No recommendations")


Testing recommendation generation with sample symptoms:
✓ 'Animal bite' → 3 recommendations
✓ 'Blurry vision' → 3 recommendations
✓ 'Decreased stool caliber' → 3 recommendations
✓ 'Dizzy' → 3 recommendations
✓ 'DizzyBlack out' → 3 recommendations


In [22]:
print(f"\nAssociation Rules: {len(recommender.rules) if recommender.rules is not None else 0}")
print(f"Symptom Vectors: {recommender.symptom_vectors.shape if recommender.symptom_vectors is not None else 'None'}")


Association Rules: 12
Symptom Vectors: (268, 268)


In [23]:
# Run evaluation
print("\n" + "="*70)
print("RUNNING EVALUATION METRICS")
print("="*70)


RUNNING EVALUATION METRICS


In [24]:
evaluation_results = evaluator.evaluate(k_values=[5, 10, 20], sample_size=100)


Evaluating recommendation system...

✓ Successful tests: 43
✗ Failed tests: 0

📊 EVALUATION RESULTS

🎯 Metrics @ K=5:
  Precision@5:    0.1721
  Recall@5:       0.7442
  MAP@5:          0.5222
  NDCG@5:         0.5841
  Hit Rate@5:     0.7674

🎯 Metrics @ K=10:
  Precision@10:    0.1023
  Recall@10:       0.8779
  MAP@10:          0.5411
  NDCG@10:         0.6274
  Hit Rate@10:     0.8837

🎯 Metrics @ K=20:
  Precision@20:    0.0547
  Recall@20:       0.9302
  MAP@20:          0.5461
  NDCG@20:         0.6418
  Hit Rate@20:     0.9302

📈 System Metrics:
  Coverage:         0.4440
  Avg Diversity:    0.9595



In [25]:
# Run cross-validation
cv_results = evaluator.cross_validate(n_folds=5, k=10)


🔄 Running 5-Fold Cross Validation...

Fold 1/5
  Attempted: 50, Successful: 50
  Precision@10: 0.0900
  Recall@10:    0.8500
  NDCG@10:      0.6111

Fold 2/5
  Attempted: 50, Successful: 50
  Precision@10: 0.1040
  Recall@10:    0.9350
  NDCG@10:      0.6460

Fold 3/5
  Attempted: 50, Successful: 50
  Precision@10: 0.1040
  Recall@10:    0.9300
  NDCG@10:      0.6945

Fold 4/5
  Attempted: 50, Successful: 50
  Precision@10: 0.1040
  Recall@10:    0.8800
  NDCG@10:      0.6605

Fold 5/5
  Attempted: 50, Successful: 50
  Precision@10: 0.1040
  Recall@10:    0.9114
  NDCG@10:      0.6791

📊 Cross-Validation Results (K=10):
  Successful Folds: 5/5
  Precision@10: 0.1012 ± 0.0056
  Recall@10:    0.9013 ± 0.0321
  NDCG@10:      0.6583 ± 0.0288
