In [None]:
import os
import re
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from collections import defaultdict

# ================== CONFIG ==================
ENHANCED_DIR = "/home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0922/enhanced_analysis/"
PRIMARY_DIR  = "/home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0922/primary_analysis"
GENRE_COL = "genres_all"
USER_COL  = "user_id"

# If True, divide by ALL users in each file (users with zero matches count as 0).
# If False, divide by only users with ≥1 match.
INCLUDE_ZEROS = True

# Pretty girly color palette

# ================== SHARPER COLOR PALETTE ==================
GIRLY_COLORS = {
    'original': '#1f77b4',      # Blue
    'enhanced_1': '#ff7f0e',    # Orange
    'enhanced_2': '#2ca02c',    # Green
    'enhanced_3': '#d62728',    # Red
    'enhanced_4': '#9467bd',    # Purple
    'primary_1': '#8c564b',     # Brown
    'primary_2': '#e377c2',     # Pink
    'primary_3': '#7f7f7f',     # Gray
    'primary_4': '#bcbd22'      # Olive
}


# ================== FILENAME PATTERNS ==================
RE_ORIGINAL = re.compile(r"^ORIGINAL_(\d+)recommendation\.csv$", re.IGNORECASE)
RE_ENHANCED = re.compile(r"^enhanced_([^_]+(?:_[^_]+)*)_([0-9]+)_(\d+)recommendation\.csv$", re.IGNORECASE)
RE_PRIMARY  = re.compile(r"^primary_p_([^_]+(?:_[^_]+)*)_([0-9]+)_(\d+)recommendation\.csv$", re.IGNORECASE)

def parse_file_meta(filename):
    """
    Return dict with:
      'source': 'enhanced'|'primary'|'original'
      'genre': str|None
      'size': int|None
      'k': int
    or None if not matching.
    """
    m = RE_ORIGINAL.match(filename)
    if m:
        return {"source": "original", "genre": None, "size": None, "k": int(m.group(1))}
    m = RE_ENHANCED.match(filename)
    if m:
        return {"source": "enhanced", "genre": m.group(1), "size": int(m.group(2)), "k": int(m.group(3))}
    m = RE_PRIMARY.match(filename)
    if m:
        return {"source": "primary", "genre": m.group(1), "size": int(m.group(2)), "k": int(m.group(3))}
    return None

# ================== GENRE TOKENIZATION ==================
def tokenize_genres(cell):
    """Split a genre cell into normalized tokens (case-insensitive), matching exact tokens (not substrings)."""
    if pd.isna(cell):
        return set()
    tokens = re.split(r"[;,|]", str(cell))
    return set(t.strip().lower() for t in tokens if t.strip())

def user_genre_counts(df, target_genre):
    """Series indexed by user_id: #rows containing the target_genre token in GENRE_COL."""
    target = str(target_genre).lower()
    match_mask = df[GENRE_COL].apply(lambda x: target in tokenize_genres(x))
    matched = df[match_mask]
    counts = matched.groupby(USER_COL).size()  # only users with ≥1 match
    if INCLUDE_ZEROS:
        all_users = df[USER_COL].unique()
        counts = counts.reindex(all_users, fill_value=0)
    return counts

def avg_genre_per_user_for_file(csv_path, target_genre):
    try:
        df = pd.read_csv(csv_path)
        if USER_COL not in df.columns or GENRE_COL not in df.columns:
            print(f"⚠️ Missing columns in {csv_path}: {df.columns.tolist()}")
            return 0.0
        counts = user_genre_counts(df, target_genre)
        return 0.0 if counts.empty else counts.mean()
    except Exception as e:
        print(f"❌ Error processing {csv_path}: {str(e)}")
        return 0.0

def get_user_genre_counts_for_binning(csv_path, target_genre):
    """Return Series of genre counts per user for binning analysis."""
    try:
        df = pd.read_csv(csv_path)
        if USER_COL not in df.columns or GENRE_COL not in df.columns:
            print(f"⚠️ Missing columns in {csv_path}")
            return pd.Series()
        return user_genre_counts(df, target_genre)
    except Exception as e:
        print(f"❌ Error in binning for {csv_path}: {str(e)}")
        return pd.Series()

def create_user_bins(all_users, n_bins=10):
    """Create n_bins of equal size from sorted users."""
    sorted_users = sorted(all_users)
    bin_size = len(sorted_users) // n_bins
    bins = []
    
    for i in range(n_bins):
        start_idx = i * bin_size
        if i == n_bins - 1:  # Last bin gets any remaining users
            end_idx = len(sorted_users)
        else:
            end_idx = (i + 1) * bin_size
        bins.append(sorted_users[start_idx:end_idx])
    
    return bins

def calculate_bin_averages(user_counts_dict, user_bins, target_genre):
    """Calculate average genre count per bin for each dataset."""
    bin_averages = {}
    
    for dataset_name, user_counts in user_counts_dict.items():
        if user_counts.empty:
            bin_averages[dataset_name] = [0.0] * len(user_bins)
            continue
            
        bin_avgs = []
        for bin_users in user_bins:
            # Get counts for users in this bin
            bin_counts = [user_counts.get(user, 0) for user in bin_users]
            bin_avg = np.mean(bin_counts) if bin_counts else 0.0
            bin_avgs.append(bin_avg)
        bin_averages[dataset_name] = bin_avgs
    
    return bin_averages

class ComprehensiveTextAnalysis:
    """Class to manage comprehensive text analysis across all genres and K values."""
    
    def __init__(self, analysis_type, output_dir):
        self.analysis_type = analysis_type
        self.output_dir = output_dir
        self.text_filename = f"0922_{analysis_type}_comprehensive_analysis.txt"
        self.text_path = os.path.join(output_dir, self.text_filename)
        self.analyses = []
        
    def add_analysis(self, genre, k, bin_averages, user_bins):
        """Add a genre-k analysis to the comprehensive report."""
        self.analyses.append({
            'genre': genre,
            'k': k,
            'bin_averages': bin_averages,
            'user_bins': user_bins
        })
    
    def save_comprehensive_report(self):
        """Save all analyses to one comprehensive text file."""
        with open(self.text_path, 'w') as f:
            # Write header
            f.write("🌸" * 80 + "\n")
            f.write(f"0922 EXPERIMENT - COMPREHENSIVE {self.analysis_type.upper()} ANALYSIS REPORT\n")
            f.write("🌸" * 80 + "\n\n")
            
            f.write(f"Analysis Date: {pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
            f.write(f"Experiment: 0922 (August 22, 2025)\n")
            f.write(f"Analysis Type: {self.analysis_type.title()}\n")
            f.write(f"Total Genre-K Combinations Analyzed: {len(self.analyses)}\n")
            
            if self.analyses:
                total_users = sum(len(bin_users) for bin_users in self.analyses[0]['user_bins'])
                f.write(f"Total Users Analyzed: {total_users:,}\n")
                f.write(f"User Binning: 10 equal bins\n")
            
            f.write("\n")
            
            # Table of Contents
            f.write("💕 TABLE OF CONTENTS 💕\n")
            f.write("-" * 40 + "\n")
            for i, analysis in enumerate(self.analyses, 1):
                genre = analysis['genre']
                k = analysis['k']
                f.write(f"{i:2d}. {genre} Genre - Top-{k} Analysis\n")
            f.write("\n" + "="*80 + "\n\n")
            
            # Individual analyses
            for i, analysis in enumerate(self.analyses, 1):
                self._write_individual_analysis(f, i, analysis)
            
            # Summary section
            self._write_summary_section(f)
            
        return self.text_path
    
    def _write_individual_analysis(self, f, section_num, analysis):
        """Write individual genre-k analysis."""
        genre = analysis['genre']
        k = analysis['k']
        bin_averages = analysis['bin_averages']
        user_bins = analysis['user_bins']
        
        f.write(f"🌺 SECTION {section_num}: {genre.upper()} GENRE - TOP-{k} ANALYSIS 🌺\n")
        f.write("="*80 + "\n\n")
        
        f.write(f"Experiment: 0922\n")
        f.write(f"Genre: {genre}\n")
        f.write(f"Top-K: {k}\n")
        f.write(f"Number of User Bins: {len(user_bins)}\n")
        f.write(f"Users per Analysis: {sum(len(bin_users) for bin_users in user_bins):,}\n\n")
        
        # Dataset overview
        f.write("📊 DATASETS ANALYZED:\n")
        f.write("-" * 40 + "\n")
        for dataset_name in bin_averages.keys():
            overall_avg = np.mean(bin_averages[dataset_name])
            f.write(f"• {dataset_name}: Overall Average = {overall_avg:.4f}\n")
        f.write("\n")
        
        # Bin-by-bin analysis (condensed)
        f.write("🔍 BIN-BY-BIN SUMMARY:\n")
        f.write("="*50 + "\n")
        
        # Sort datasets for consistent ordering
        sorted_datasets = []
        if "ORIGINAL" in bin_averages:
            sorted_datasets.append("ORIGINAL")
        
        other_datasets = [d for d in bin_averages.keys() if d != "ORIGINAL"]
        def extract_size(dataset_name):
            if '_' in dataset_name:
                try:
                    return int(dataset_name.split('_')[-1])
                except:
                    return 0
            return 0
        other_datasets.sort(key=extract_size)
        sorted_datasets.extend(other_datasets)
        
        # Condensed bin analysis table
        f.write(f"{'Bin':<4} | {'ORIGINAL':<10}")
        for dataset in sorted_datasets[1:]:
            f.write(f" | {dataset:<12}")
        f.write("\n")
        f.write("-" * (15 + 15 * len(sorted_datasets)) + "\n")
        
        for bin_idx in range(len(user_bins)):
            bin_num = bin_idx + 1
            f.write(f"{bin_num:<4} | ")
            
            baseline_val = bin_averages.get("ORIGINAL", [0]*10)[bin_idx] if "ORIGINAL" in bin_averages else 0
            f.write(f"{baseline_val:<10.4f}")
            
            for dataset_name in sorted_datasets[1:]:
                value = bin_averages[dataset_name][bin_idx]
                if baseline_val > 0:
                    change_pct = ((value - baseline_val) / baseline_val) * 100
                    f.write(f" | {value:6.4f}({change_pct:+5.1f}%)")
                else:
                    f.write(f" | {value:6.4f}(  N/A  )")
            f.write("\n")
        
        f.write("\n")
        
        # Comparative analysis summary
        if "ORIGINAL" in bin_averages and len(sorted_datasets) > 1:
            f.write("⚖️ PERFORMANCE SUMMARY:\n")
            f.write("-" * 30 + "\n")
            
            baseline_values = bin_averages["ORIGINAL"]
            
            for dataset_name in sorted_datasets[1:]:
                values = bin_averages[dataset_name]
                improvements = 0
                degradations = 0
                total_change = 0
                valid_comparisons = 0
                
                for val, baseline in zip(values, baseline_values):
                    if baseline > 0:
                        change_pct = ((val - baseline) / baseline) * 100
                        total_change += change_pct
                        valid_comparisons += 1
                        if change_pct > 0.1:
                            improvements += 1
                        elif change_pct < -0.1:
                            degradations += 1
                
                if valid_comparisons > 0:
                    avg_change = total_change / valid_comparisons
                    emoji = "🎉" if avg_change > 5 else "😊" if avg_change > 0 else "😢" if avg_change < -5 else "😐"
                    
                    f.write(f"🆚 {dataset_name}: {avg_change:+6.2f}% avg{emoji} ")
                    f.write(f"({improvements}/{len(user_bins)} bins improved)\n")
        
        f.write("\n" + "🌸" * 80 + "\n\n")
    
    def _write_summary_section(self, f):
        """Write overall summary across all analyses."""
        f.write("🌈 0922 EXPERIMENT COMPREHENSIVE SUMMARY 🌈\n")
        f.write("="*80 + "\n\n")
        
        if not self.analyses:
            f.write("No analyses to summarize.\n")
            return
        
        # Overall statistics
        f.write("📋 EXPERIMENT OVERVIEW:\n")
        f.write("-" * 25 + "\n")
        
        all_genres = [a['genre'] for a in self.analyses]
        all_ks = [a['k'] for a in self.analyses]
        unique_genres = sorted(set(all_genres))
        unique_ks = sorted(set(all_ks))
        
        f.write(f"• Experiment Date: August 22, 2025\n")
        f.write(f"• Total analyses: {len(self.analyses)}\n")
        f.write(f"• Genres analyzed: {len(unique_genres)} ({', '.join(unique_genres)})\n")
        f.write(f"• K values analyzed: {len(unique_ks)} ({', '.join(map(str, unique_ks))})\n\n")
        
        # Best and worst performers
        f.write("🏆 TOP PERFORMERS (0922 EXPERIMENT):\n")
        f.write("-" * 40 + "\n")
        
        best_improvements = []
        
        for analysis in self.analyses:
            genre = analysis['genre']
            k = analysis['k']
            bin_averages = analysis['bin_averages']
            
            if "ORIGINAL" not in bin_averages:
                continue
                
            baseline_values = bin_averages["ORIGINAL"]
            
            for dataset_name, values in bin_averages.items():
                if dataset_name == "ORIGINAL":
                    continue
                    
                total_change = 0
                valid_comparisons = 0
                
                for val, baseline in zip(values, baseline_values):
                    if baseline > 0:
                        change_pct = ((val - baseline) / baseline) * 100
                        total_change += change_pct
                        valid_comparisons += 1
                
                if valid_comparisons > 0:
                    avg_change = total_change / valid_comparisons
                    best_improvements.append({
                        'genre': genre,
                        'k': k,
                        'dataset': dataset_name,
                        'avg_change': avg_change
                    })
        
        if best_improvements:
            best_improvements.sort(key=lambda x: x['avg_change'], reverse=True)
            
            f.write("🥇 TOP 5 BEST RESULTS:\n")
            for i, item in enumerate(best_improvements[:5], 1):
                emoji = "🌟" if item['avg_change'] > 2 else "✨" if item['avg_change'] > 0 else "⭐"
                f.write(f"{i}. {item['genre']} Top-{item['k']} - {item['dataset']}: {item['avg_change']:+.2f}% {emoji}\n")
            
            f.write(f"\n🥉 WORST 5 RESULTS:\n")
            for i, item in enumerate(best_improvements[-5:], 1):
                emoji = "💔" if item['avg_change'] < -10 else "😢" if item['avg_change'] < -5 else "😐"
                f.write(f"{i}. {item['genre']} Top-{item['k']} - {item['dataset']}: {item['avg_change']:+.2f}% {emoji}\n")
        
        f.write("\n")
        f.write("💭 0922 EXPERIMENT CONCLUSIONS:\n")
        f.write("-" * 35 + "\n")
        f.write("1. This experiment tests updated bias injection strategies\n")
        f.write("2. User binning reveals segmented impact across user populations\n")
        f.write("3. Results compared against original baseline performance\n")
        f.write("4. Analysis covers multiple genres and recommendation depths\n\n")
        
        f.write("🌸" * 80 + "\n")
        f.write("END OF 0922 EXPERIMENT ANALYSIS REPORT\n")
        f.write("🌸" * 80 + "\n")

# ================== FOLDER PROCESSOR ==================
def process_folder(input_dir, folder_type, enhanced_dir=None):
    """Process folder with user binning analysis and comprehensive text output"""
    print(f"🌸 Starting 0922 {folder_type.upper()} analysis...")
    print(f"📁 Directory: {input_dir}")
    
    # Create directory if it doesn't exist
    os.makedirs(input_dir, exist_ok=True)
    
    # Check if directory has CSV files
    if not os.path.exists(input_dir):
        print(f"❌ Directory does not exist: {input_dir}")
        return {}
    
    files = [f for f in os.listdir(input_dir) if f.endswith(".csv")]
    if not files:
        print(f"⚠️ No CSV files found in {input_dir}")
        return {}
    
    print(f"📊 Found {len(files)} CSV files")
    
    meta = []
    for f in files:
        info = parse_file_meta(f)
        if info:
            info["filename"] = f
            meta.append(info)
        else:
            print(f"⚠️ File doesn't match pattern: {f}")
    
    if not meta:
        print("❌ No files matched expected patterns")
        return {}
    
    # Where to save
    out_plot_dir = os.path.join(input_dir, "0922_genre_avg_plots")
    os.makedirs(out_plot_dir, exist_ok=True)
    out_summary_csv = os.path.join(input_dir, f"0922_{folder_type}_genre_avg_summary.csv")

    # Initialize comprehensive text analysis
    text_analysis = ComprehensiveTextAnalysis(folder_type, input_dir)

    # Available genres
    if folder_type == "primary":
        genres = sorted({m["genre"] for m in meta if m["source"] == "primary" and m["genre"] is not None})
    else:
        genres = sorted({m["genre"] for m in meta if m["source"] == "enhanced" and m["genre"] is not None})
    
    print(f"🎯 Found genres: {genres}")
    
    if not genres:
        print("❌ No genres found")
        return {}
    
    # Pre-index meta by type
    originals_by_k = {}
    synthetic_by_genre_k = defaultdict(list)

    for m in meta:
        if m["source"] == "original":
            originals_by_k[m["k"]] = m
        elif m["source"] in ["enhanced", "primary"]:
            synthetic_by_genre_k[(m["genre"], m["k"])].append(m)
    
    # Handle original files for primary folder
    if folder_type == "primary" and enhanced_dir and os.path.exists(enhanced_dir):
        enhanced_files = [f for f in os.listdir(enhanced_dir) if f.endswith(".csv")]
        for f in enhanced_files:
            info = parse_file_meta(f)
            if info and info["source"] == "original":
                info["filename"] = f
                info["filepath"] = os.path.join(enhanced_dir, f)
                originals_by_k[info["k"]] = info
    
    # Get user binning from original file
    user_bins = None
    if originals_by_k:
        sample_original = list(originals_by_k.values())[0]
        sample_path = sample_original.get("filepath", os.path.join(input_dir, sample_original["filename"]))
        
        try:
            sample_df = pd.read_csv(sample_path)
            if USER_COL in sample_df.columns:
                all_users = sorted(sample_df[USER_COL].unique())
                user_bins = create_user_bins(all_users, n_bins=10)
                print(f"📊 Created user bins: {len(all_users):,} users in 10 bins")
            else:
                print(f"⚠️ No {USER_COL} column in {sample_path}")
        except Exception as e:
            print(f"❌ Error reading original file: {str(e)}")
    
    if user_bins is None:
        print("❌ Could not create user bins")
        return {}

    summary_rows = []
    plot_count = 0

    # Process each genre
    for genre in genres:
        Ks = sorted({k for (g,k) in synthetic_by_genre_k.keys() if g == genre} | set(originals_by_k.keys()))
        
        for k in Ks:
            print(f"🎯 Processing {genre} - Top-{k}")
            
            user_counts_dict = {}
            
            # Original baseline
            if k in originals_by_k:
                orig_info = originals_by_k[k]
                orig_path = orig_info.get("filepath", os.path.join(input_dir, orig_info["filename"]))
                user_counts = get_user_genre_counts_for_binning(orig_path, genre)
                if not user_counts.empty:
                    user_counts_dict["ORIGINAL"] = user_counts
                    summary_rows.append({
                        "folder": folder_type,
                        "genre": genre,
                        "k": k,
                        "dataset": orig_info["filename"].replace(".csv", ""),
                        "source": "original",
                        "size": None,
                        "avg_per_user": user_counts.mean()
                    })

            # Synthetic datasets
            synthetic_files = sorted(synthetic_by_genre_k.get((genre, k), []), key=lambda x: x["size"])
            sizes = []
            
            for m in synthetic_files:
                f = m["filename"]
                file_path = os.path.join(input_dir, f)
                user_counts = get_user_genre_counts_for_binning(file_path, genre)
                if not user_counts.empty:
                    user_counts_dict[f"{folder_type.title()}_{m['size']}"] = user_counts
                    sizes.append(m['size'])
                    
                    summary_rows.append({
                        "folder": folder_type,
                        "genre": genre,
                        "k": k,
                        "dataset": f.replace(".csv", ""),
                        "source": folder_type,
                        "size": m["size"],
                        "avg_per_user": user_counts.mean()
                    })

            # Analysis and plotting
            if user_counts_dict:
                bin_averages = calculate_bin_averages(user_counts_dict, user_bins, genre)
                text_analysis.add_analysis(genre, k, bin_averages, user_bins)
                
                # Create plot
                try:
                    fig, ax = plt.subplots(figsize=(14, 8))
                    
                    bin_numbers = list(range(1, 11))
                    bar_width = 0.15
                    
                    datasets = []
                    colors = []
                    
                    if "ORIGINAL" in bin_averages:
                        datasets.append(("ORIGINAL", bin_averages["ORIGINAL"]))
                        colors.append(GIRLY_COLORS['original'])
                    
                    # Add synthetic datasets with girly colors
                    color_keys = [f'{folder_type}_1', f'{folder_type}_2', f'{folder_type}_3', f'{folder_type}_4']
                    for i, size in enumerate(sorted(sizes)):
                        key = f"{folder_type.title()}_{size}"
                        if key in bin_averages:
                            datasets.append((key, bin_averages[key]))
                            if i < len(color_keys):
                                colors.append(GIRLY_COLORS.get(color_keys[i], '#FFB6C1'))
                            else:
                                colors.append('#FFB6C1')
                    
                    # Plot bars
                    for i, (label, values) in enumerate(datasets):
                        x_positions = [x + i * bar_width for x in bin_numbers]
                        bars = ax.bar(x_positions, values, bar_width, 
                                     label=label, color=colors[i], alpha=0.8, 
                                     edgecolor='white', linewidth=0.8)
                    
                    # Styling
                    ax.set_xlabel('User Bins (1=Lowest IDs, 10=Highest IDs)', fontsize=12, fontweight='bold')
                    ax.set_ylabel(f'Average #{genre} Recommendations per User', fontsize=12, fontweight='bold')
                    ax.set_title(f'🌸 0922 {folder_type.title()}: {genre} Genre - Top-{k} 🌸\nUser Binning Comparison', 
                               fontsize=14, fontweight='bold', color='#FF1493')
                    ax.set_xticks([x + bar_width * (len(datasets)-1) / 2 for x in bin_numbers])
                    ax.set_xticklabels([f'Bin {i}' for i in bin_numbers])
                    ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left', frameon=True, fancybox=True, shadow=True)
                    ax.grid(True, alpha=0.3, axis='y', color='pink', linestyle='--')
                    ax.set_facecolor('#FFFAFD')
                    
                    plt.tight_layout()
                    
                    out_png = os.path.join(out_plot_dir, f"0922_{folder_type}_{genre}_top{k}_binned.png")
                    plt.savefig(out_png, dpi=300, bbox_inches='tight', facecolor='white')
                    plt.close()
                    plot_count += 1
                    print(f"📊 Saved: {os.path.basename(out_png)}")
                    
                except Exception as e:
                    print(f"❌ Plot error for {genre} Top-{k}: {str(e)}")

    # Save results
    try:
        text_path = text_analysis.save_comprehensive_report()
        print(f"📝 Saved comprehensive text: {os.path.basename(text_path)}")
        
        if summary_rows:
            summary_df = pd.DataFrame(summary_rows).sort_values(["genre", "k", "source", "size"], na_position="first")
            summary_df.to_csv(out_summary_csv, index=False)
            print(f"✅ Saved summary CSV: {os.path.basename(out_summary_csv)}")
            
    except Exception as e:
        print(f"❌ Error saving results: {str(e)}")

    print(f"🎉 {folder_type.title()} analysis complete: {plot_count} plots generated")
    return {}

# ================== MAIN EXECUTION ==================
def main():
    print("🌸" * 80)
    print("0922 EXPERIMENT - COMPREHENSIVE GENRE ANALYSIS")
    print("🌸" * 80)
    
    # Process enhanced folder
    print("\n🔮 ENHANCED ANALYSIS:")
    process_folder(ENHANCED_DIR, "enhanced")
    
    print("\n" + "="*60)
    
    # Process primary folder  
    print("\n🎯 PRIMARY ANALYSIS:")
    process_folder(PRIMARY_DIR, "primary", enhanced_dir=ENHANCED_DIR)
    
    print("\n🌸" * 80)
    print("0922 EXPERIMENT ANALYSIS COMPLETED!")
    print("🌸" * 80)

if __name__ == "__main__":
    main()

🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸
0922 EXPERIMENT - COMPREHENSIVE GENRE ANALYSIS
🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸🌸

🔮 ENHANCED ANALYSIS:
🌸 Starting 0922 ENHANCED analysis...
📁 Directory: /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0922/enhanced_analysis/
📊 Found 3 CSV files
⚠️ File doesn't match pattern: improved_Adult_100_25recommendation.csv
⚠️ File doesn't match pattern: improved_Adult_100_35recommendation.csv
⚠️ File doesn't match pattern: improved_Adult_100_15recommendation.csv
❌ No files matched expected patterns


🎯 PRIMARY ANALYSIS:
🌸 Starting 0922 PRIMARY analysis...
📁 Directory: /home/moshtasa/Research/phd-svd-recsys/SVD/Book/result/rec/top_re/0922/primary_analysis
📊 Found 159 CSV files
🎯 Found genres: ['Adult', 'Adventure', 'Children_s', 'Classics', 'Drama', 'Fantasy', 'Historical', 'Horror', 'Mystery', 'Nonfiction', 'Romance', 'Science_Fiction', 'Thriller']
📊 

  plt.tight_layout()
  plt.savefig(out_png, dpi=300, bbox_inches='tight', facecolor='white')


📊 Saved: 0922_primary_Adult_top15_binned.png
🎯 Processing Adult - Top-25


  plt.tight_layout()
  plt.savefig(out_png, dpi=300, bbox_inches='tight', facecolor='white')


📊 Saved: 0922_primary_Adult_top25_binned.png
🎯 Processing Adult - Top-35
