# An√°lisis Radi√≥mico Avanzado para Detecci√≥n de C√°ncer

Este notebook implementa an√°lisis radi√≥mico completo utilizando PyRadiomics para extraer caracter√≠sticas cuantitativas de im√°genes m√©dicas y realizar an√°lisis estad√≠stico para identificaci√≥n de biomarcadores.

## Contenido:
- Extracci√≥n de caracter√≠sticas radi√≥micas
- An√°lisis estad√≠stico de caracter√≠sticas
- Selecci√≥n de caracter√≠sticas relevantes
- An√°lisis de correlaciones
- Clustering y segmentaci√≥n de patrones
- Visualizaciones avanzadas

In [None]:
# Configuraci√≥n inicial
import sys
import os
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# Agregar src al path
src_path = Path('../src').absolute()
if str(src_path) not in sys.path:
    sys.path.append(str(src_path))

# Imports principales
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import json
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.feature_selection import SelectKBest, f_classif

# Configurar visualizaciones
plt.style.use('seaborn-v0_8')
sns.set_palette("viridis")
plt.rcParams['figure.figsize'] = (12, 8)

print(f"An√°lisis Radi√≥mico iniciado: {datetime.now()}")

In [None]:
# Importar m√≥dulos del proyecto
try:
    from analysis.radiomics_analysis import RadiomicsAnalyzer
    from utils.dicom_processor import DICOMProcessor
    print("‚úì M√≥dulos de an√°lisis radi√≥mico importados")
except ImportError as e:
    print(f"‚ùå Error importando m√≥dulos: {e}")
    print("Aseg√∫rese de tener PyRadiomics y SimpleITK instalados")

## 1. Inicializaci√≥n del Analizador Radi√≥mico

In [None]:
# Inicializar analizador radi√≥mico
try:
    radiomics_analyzer = RadiomicsAnalyzer()
    print("‚úì Analizador radi√≥mico inicializado")
    
    # Mostrar configuraci√≥n
    print(f"\nüìã Configuraci√≥n radi√≥mica:")
    print(f"  - Clases de caracter√≠sticas: {radiomics_analyzer.feature_classes}")
    print(f"  - Ancho de bin: {radiomics_analyzer.bin_width}")
    print(f"  - Normalizaci√≥n: {radiomics_analyzer.normalize}")
    print(f"  - Resampleo: {radiomics_analyzer.resampling}")
    
except Exception as e:
    print(f"‚ùå Error inicializando analizador: {e}")
    radiomics_analyzer = None

## 2. Preparaci√≥n de Datos de Ejemplo

Para este ejemplo, simularemos datos radi√≥micos ya que la extracci√≥n real requiere im√°genes DICOM con m√°scaras de segmentaci√≥n.

In [None]:
# Simular datos radi√≥micos para demostraci√≥n
# En un caso real, estos datos vendr√≠an de radiomics_analyzer.extract_features_batch()

np.random.seed(42)
n_images = 100
n_benign = 60
n_malignant = 40

# Generar caracter√≠sticas radi√≥micas simuladas
feature_names = [
    'original_firstorder_Energy',
    'original_firstorder_Entropy', 
    'original_firstorder_Kurtosis',
    'original_firstorder_Mean',
    'original_firstorder_Median',
    'original_firstorder_Skewness',
    'original_firstorder_Variance',
    'original_glcm_Contrast',
    'original_glcm_Correlation',
    'original_glcm_Energy',
    'original_glcm_Homogeneity',
    'original_glrlm_GrayLevelVariance',
    'original_glrlm_RunLengthVariance',
    'original_glszm_GrayLevelVariance',
    'original_glszm_SizeZoneVariability',
    'original_shape_Elongation',
    'original_shape_Flatness',
    'original_shape_Sphericity',
    'original_shape_SurfaceArea',
    'original_shape_Volume'
]

# Generar datos con diferencias sutiles entre benigno y maligno
radiomics_data = []

for i in range(n_images):
    is_malignant = i >= n_benign
    
    # Simular caracter√≠sticas con diferencias estad√≠sticas
    features = {}
    
    for j, feature_name in enumerate(feature_names):
        if is_malignant:
            # Valores ligeramente diferentes para casos malignos
            base_value = np.random.normal(1.2, 0.3)
        else:
            # Valores para casos benignos
            base_value = np.random.normal(1.0, 0.25)
        
        # Agregar variabilidad espec√≠fica por tipo de caracter√≠stica
        if 'firstorder' in feature_name:
            features[feature_name] = base_value * np.random.uniform(0.8, 1.5)
        elif 'glcm' in feature_name:
            features[feature_name] = base_value * np.random.uniform(0.5, 2.0)
        elif 'shape' in feature_name:
            features[feature_name] = base_value * np.random.uniform(0.7, 1.8)
        else:
            features[feature_name] = base_value * np.random.uniform(0.6, 1.4)
    
    features['image_id'] = f'image_{i:03d}'
    features['label'] = 1 if is_malignant else 0
    features['diagnosis'] = 'Maligno' if is_malignant else 'Benigno'
    
    radiomics_data.append(features)

# Crear DataFrame
radiomics_df = pd.DataFrame(radiomics_data)
radiomics_df.set_index('image_id', inplace=True)

print(f"üìä Dataset radi√≥mico simulado:")
print(f"  - Total im√°genes: {len(radiomics_df)}")
print(f"  - Casos benignos: {n_benign}")
print(f"  - Casos malignos: {n_malignant}")
print(f"  - Caracter√≠sticas: {len(feature_names)}")

display(radiomics_df.head())

## 3. An√°lisis Estad√≠stico Descriptivo

In [None]:
# Estad√≠sticas descriptivas por grupo
numeric_features = radiomics_df.select_dtypes(include=[np.number]).drop(['label'], axis=1)

print("üìà ESTAD√çSTICAS DESCRIPTIVAS POR GRUPO")
print("=" * 50)

# Separar por diagn√≥stico
benign_data = numeric_features[radiomics_df['diagnosis'] == 'Benigno']
malignant_data = numeric_features[radiomics_df['diagnosis'] == 'Maligno']

# Calcular estad√≠sticas
stats_summary = pd.DataFrame({
    'Benigno_Media': benign_data.mean(),
    'Benigno_Std': benign_data.std(),
    'Maligno_Media': malignant_data.mean(),
    'Maligno_Std': malignant_data.std()
})

# Calcular diferencias
stats_summary['Diferencia_Media'] = (stats_summary['Maligno_Media'] - stats_summary['Benigno_Media'])
stats_summary['Diferencia_Relativa'] = (stats_summary['Diferencia_Media'] / stats_summary['Benigno_Media']) * 100

# Mostrar resumen
print("\nTop 10 caracter√≠sticas con mayor diferencia relativa:")
top_diff = stats_summary.abs().sort_values('Diferencia_Relativa', ascending=False).head(10)
display(top_diff.round(3))

# Visualizar distribuciones
fig, axes = plt.subplots(2, 3, figsize=(18, 12))
axes = axes.flatten()

top_features = top_diff.index[:6]

for i, feature in enumerate(top_features):
    # Histograma comparativo
    axes[i].hist(benign_data[feature], alpha=0.7, label='Benigno', bins=20, color='skyblue')
    axes[i].hist(malignant_data[feature], alpha=0.7, label='Maligno', bins=20, color='salmon')
    axes[i].set_title(f'{feature}')
    axes[i].set_xlabel('Valor')
    axes[i].set_ylabel('Frecuencia')
    axes[i].legend()
    axes[i].grid(True, alpha=0.3)

plt.suptitle('Distribuciones de Caracter√≠sticas Radi√≥micas M√°s Discriminativas', fontsize=16)
plt.tight_layout()
plt.show()

## 4. An√°lisis Estad√≠stico Inferencial

In [None]:
# Realizar an√°lisis estad√≠stico usando el analizador
if radiomics_analyzer:
    try:
        labels = radiomics_df['label'].values
        features_only = radiomics_df.drop(['label', 'diagnosis'], axis=1)
        
        statistical_results = radiomics_analyzer.perform_statistical_analysis(
            features_only, labels
        )
        
        if statistical_results and 'statistical_tests' in statistical_results:
            tests_df = pd.DataFrame(statistical_results['statistical_tests'])
            
            print(f"üß™ RESULTADOS DE PRUEBAS ESTAD√çSTICAS")
            print(f"=" * 50)
            print(f"Total de caracter√≠sticas analizadas: {len(tests_df)}")
            
            # Caracter√≠sticas significativas (p < 0.05)
            significant_features = tests_df[tests_df['significant_corrected'] == True]
            print(f"Caracter√≠sticas significativas (p < 0.05, corregido): {len(significant_features)}")
            
            if len(significant_features) > 0:
                print("\nüéØ Top 10 caracter√≠sticas m√°s significativas:")
                top_significant = significant_features.nsmallest(10, 'p_value_corrected')
                display(top_significant[['feature', 'test_type', 'p_value_corrected', 
                                       'cohens_d', 'effect_size']].round(6))
                
                # Visualizar p-values
                plt.figure(figsize=(12, 6))
                
                plt.subplot(1, 2, 1)
                plt.hist(tests_df['p_value_corrected'], bins=20, edgecolor='black')
                plt.axvline(x=0.05, color='red', linestyle='--', label='Œ± = 0.05')
                plt.xlabel('P-value (corregido)')
                plt.ylabel('Frecuencia')
                plt.title('Distribuci√≥n de P-values Corregidos')
                plt.legend()
                plt.grid(True, alpha=0.3)
                
                plt.subplot(1, 2, 2)
                plt.hist(tests_df['cohens_d'].abs(), bins=20, edgecolor='black')
                plt.axvline(x=0.2, color='green', linestyle='--', label='Peque√±o')
                plt.axvline(x=0.5, color='orange', linestyle='--', label='Mediano')
                plt.axvline(x=0.8, color='red', linestyle='--', label='Grande')
                plt.xlabel('|Cohen\'s d|')
                plt.ylabel('Frecuencia')
                plt.title('Distribuci√≥n de Tama√±os de Efecto')
                plt.legend()
                plt.grid(True, alpha=0.3)
                
                plt.tight_layout()
                plt.show()
            
        else:
            print("‚ùå No se pudieron realizar pruebas estad√≠sticas")
            
    except Exception as e:
        print(f"‚ùå Error en an√°lisis estad√≠stico: {e}")
        statistical_results = {}
else:
    print("‚ö†Ô∏è  Analizador radi√≥mico no disponible")
    statistical_results = {}

## 5. An√°lisis de Correlaciones

In [None]:
# An√°lisis de correlaciones
if radiomics_analyzer:
    try:
        correlation_results = radiomics_analyzer.analyze_feature_correlations(features_only)
        
        if correlation_results and 'correlation_matrix' in correlation_results:
            corr_matrix = correlation_results['correlation_matrix']
            high_corr_pairs = correlation_results['high_correlation_pairs']
            
            print(f"üîó AN√ÅLISIS DE CORRELACIONES")
            print(f"=" * 50)
            print(f"Pares altamente correlacionados (|r| > 0.8): {len(high_corr_pairs)}")
            
            if high_corr_pairs:
                print("\nTop 10 correlaciones m√°s altas:")
                high_corr_df = pd.DataFrame(high_corr_pairs)
                top_corr = high_corr_df.reindex(high_corr_df['correlation'].abs().sort_values(ascending=False).index)
                display(top_corr.head(10).round(3))
            
            # Visualizar matriz de correlaci√≥n
            plt.figure(figsize=(14, 12))
            
            # Seleccionar subconjunto para visualizaci√≥n
            features_subset = corr_matrix.columns[:15]  # Primeras 15 caracter√≠sticas
            corr_subset = corr_matrix.loc[features_subset, features_subset]
            
            mask = np.triu(np.ones_like(corr_subset, dtype=bool))
            sns.heatmap(corr_subset, mask=mask, annot=True, cmap='RdBu_r', center=0,
                       square=True, linewidths=0.5, cbar_kws={"shrink": .5}, fmt='.2f')
            plt.title('Matriz de Correlaci√≥n de Caracter√≠sticas Radi√≥micas (Subconjunto)')
            plt.xticks(rotation=45, ha='right')
            plt.yticks(rotation=0)
            plt.tight_layout()
            plt.show()
            
        else:
            print("‚ùå No se pudo realizar an√°lisis de correlaciones")
            
    except Exception as e:
        print(f"‚ùå Error en an√°lisis de correlaciones: {e}")
        correlation_results = {}
else:
    # An√°lisis b√°sico de correlaciones sin el analizador
    corr_matrix = features_only.corr()
    
    plt.figure(figsize=(14, 12))
    features_subset = corr_matrix.columns[:15]
    corr_subset = corr_matrix.loc[features_subset, features_subset]
    
    mask = np.triu(np.ones_like(corr_subset, dtype=bool))
    sns.heatmap(corr_subset, mask=mask, annot=True, cmap='RdBu_r', center=0,
               square=True, linewidths=0.5, cbar_kws={"shrink": .5}, fmt='.2f')
    plt.title('Matriz de Correlaci√≥n de Caracter√≠sticas Radi√≥micas (B√°sica)')
    plt.xticks(rotation=45, ha='right')
    plt.yticks(rotation=0)
    plt.tight_layout()
    plt.show()

## 6. An√°lisis de Componentes Principales (PCA)

In [None]:
# An√°lisis de Componentes Principales
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features_only)

# PCA
pca = PCA(n_components=min(10, features_only.shape[1]), random_state=42)
pca_features = pca.fit_transform(features_scaled)

print(f"üîç AN√ÅLISIS DE COMPONENTES PRINCIPALES")
print(f"=" * 50)
print(f"Varianza explicada por los primeros 10 componentes:")
for i, var_exp in enumerate(pca.explained_variance_ratio_[:10]):
    print(f"  PC{i+1}: {var_exp:.3f} ({var_exp*100:.1f}%)")

print(f"\nVarianza acumulativa:")
cumvar = np.cumsum(pca.explained_variance_ratio_)
for i, cumvar_exp in enumerate(cumvar[:10]):
    print(f"  PC1-PC{i+1}: {cumvar_exp:.3f} ({cumvar_exp*100:.1f}%)")

# Visualizaciones PCA
fig, axes = plt.subplots(2, 2, figsize=(15, 12))

# Scree plot
axes[0,0].plot(range(1, len(pca.explained_variance_ratio_)+1), 
               pca.explained_variance_ratio_, 'bo-')
axes[0,0].set_xlabel('Componente Principal')
axes[0,0].set_ylabel('Varianza Explicada')
axes[0,0].set_title('Scree Plot')
axes[0,0].grid(True, alpha=0.3)

# Varianza acumulativa
axes[0,1].plot(range(1, len(cumvar)+1), cumvar, 'ro-')
axes[0,1].axhline(y=0.8, color='g', linestyle='--', label='80%')
axes[0,1].axhline(y=0.95, color='orange', linestyle='--', label='95%')
axes[0,1].set_xlabel('Componente Principal')
axes[0,1].set_ylabel('Varianza Acumulativa')
axes[0,1].set_title('Varianza Acumulativa')
axes[0,1].legend()
axes[0,1].grid(True, alpha=0.3)

# Proyecci√≥n PC1 vs PC2
colors = ['blue' if label == 0 else 'red' for label in radiomics_df['label']]
labels_text = ['Benigno' if label == 0 else 'Maligno' for label in radiomics_df['label']]

for i, (color, label_text) in enumerate(zip(set(colors), ['Benigno', 'Maligno'])):
    mask = np.array(colors) == color
    axes[1,0].scatter(pca_features[mask, 0], pca_features[mask, 1], 
                     c=color, label=label_text, alpha=0.7)

axes[1,0].set_xlabel(f'PC1 ({pca.explained_variance_ratio_[0]:.1%} varianza)')
axes[1,0].set_ylabel(f'PC2 ({pca.explained_variance_ratio_[1]:.1%} varianza)')
axes[1,0].set_title('Proyecci√≥n PCA: PC1 vs PC2')
axes[1,0].legend()
axes[1,0].grid(True, alpha=0.3)

# Contribuci√≥n de caracter√≠sticas a PC1
pc1_contributions = pd.Series(pca.components_[0], index=features_only.columns)
top_pc1 = pc1_contributions.abs().nlargest(10)

top_pc1.plot(kind='barh', ax=axes[1,1])
axes[1,1].set_title('Top 10 Contribuciones a PC1')
axes[1,1].set_xlabel('Contribuci√≥n Absoluta')

plt.tight_layout()
plt.show()

## 7. An√°lisis de Clustering

In [None]:
# An√°lisis de clustering
if radiomics_analyzer:
    try:
        clustering_results = radiomics_analyzer.cluster_analysis(features_only, n_clusters=3)
        
        if clustering_results and 'cluster_labels' in clustering_results:
            cluster_labels = clustering_results['cluster_labels']
            n_clusters = clustering_results['n_clusters']
            silhouette = clustering_results['silhouette_score']
            
            print(f"üéØ AN√ÅLISIS DE CLUSTERING")
            print(f"=" * 50)
            print(f"N√∫mero de clusters: {n_clusters}")
            print(f"Silhouette score: {silhouette:.3f}")
            
            # Analizar composici√≥n de clusters
            cluster_composition = pd.crosstab(
                pd.Series(cluster_labels, name='Cluster'),
                radiomics_df['diagnosis'],
                margins=True
            )
            print("\nüìä Composici√≥n de clusters:")
            display(cluster_composition)
            
            # Visualizar clusters en espacio PCA
            plt.figure(figsize=(15, 5))
            
            # Clusters en PCA
            plt.subplot(1, 3, 1)
            scatter = plt.scatter(pca_features[:, 0], pca_features[:, 1], 
                                c=cluster_labels, cmap='viridis', alpha=0.7)
            plt.xlabel(f'PC1 ({pca.explained_variance_ratio_[0]:.1%})')
            plt.ylabel(f'PC2 ({pca.explained_variance_ratio_[1]:.1%})')
            plt.title('Clusters en Espacio PCA')
            plt.colorbar(scatter)
            plt.grid(True, alpha=0.3)
            
            # Comparaci√≥n con diagn√≥stico real
            plt.subplot(1, 3, 2)
            colors_diag = ['blue' if label == 0 else 'red' for label in radiomics_df['label']]
            for i, (color, label_text) in enumerate(zip(['blue', 'red'], ['Benigno', 'Maligno'])):
                mask = np.array(colors_diag) == color
                plt.scatter(pca_features[mask, 0], pca_features[mask, 1], 
                           c=color, label=label_text, alpha=0.7)
            plt.xlabel(f'PC1 ({pca.explained_variance_ratio_[0]:.1%})')
            plt.ylabel(f'PC2 ({pca.explained_variance_ratio_[1]:.1%})')
            plt.title('Diagn√≥stico Real')
            plt.legend()
            plt.grid(True, alpha=0.3)
            
            # Matriz de confusi√≥n clusters vs diagn√≥stico
            plt.subplot(1, 3, 3)
            confusion_matrix = pd.crosstab(
                pd.Series(cluster_labels, name='Cluster'),
                radiomics_df['diagnosis'],
                normalize='columns'
            )
            sns.heatmap(confusion_matrix, annot=True, cmap='Blues', fmt='.2f')
            plt.title('Clusters vs Diagn√≥stico\n(Normalizado por columna)')
            
            plt.tight_layout()
            plt.show()
            
        else:
            print("‚ùå No se pudo realizar an√°lisis de clustering")
            
    except Exception as e:
        print(f"‚ùå Error en an√°lisis de clustering: {e}")
        clustering_results = {}
else:
    # Clustering b√°sico usando sklearn directamente
    from sklearn.cluster import KMeans
    from sklearn.metrics import silhouette_score
    
    kmeans = KMeans(n_clusters=3, random_state=42)
    cluster_labels = kmeans.fit_predict(features_scaled)
    silhouette = silhouette_score(features_scaled, cluster_labels)
    
    print(f"üéØ AN√ÅLISIS DE CLUSTERING (B√ÅSICO)")
    print(f"=" * 50)
    print(f"N√∫mero de clusters: 3")
    print(f"Silhouette score: {silhouette:.3f}")
    
    # Visualizar en PCA
    plt.figure(figsize=(10, 4))
    
    plt.subplot(1, 2, 1)
    scatter = plt.scatter(pca_features[:, 0], pca_features[:, 1], 
                         c=cluster_labels, cmap='viridis', alpha=0.7)
    plt.xlabel(f'PC1 ({pca.explained_variance_ratio_[0]:.1%})')
    plt.ylabel(f'PC2 ({pca.explained_variance_ratio_[1]:.1%})')
    plt.title('Clusters en Espacio PCA')
    plt.colorbar(scatter)
    plt.grid(True, alpha=0.3)
    
    plt.subplot(1, 2, 2)
    cluster_composition = pd.crosstab(
        pd.Series(cluster_labels, name='Cluster'),
        radiomics_df['diagnosis']
    )
    cluster_composition.plot(kind='bar', ax=plt.gca())
    plt.title('Composici√≥n de Clusters')
    plt.xlabel('Cluster')
    plt.ylabel('N√∫mero de Casos')
    plt.legend(title='Diagn√≥stico')
    plt.xticks(rotation=0)
    
    plt.tight_layout()
    plt.show()

## 8. Selecci√≥n de Caracter√≠sticas

In [None]:
# Selecci√≥n de caracter√≠sticas
if radiomics_analyzer:
    try:
        # Probar diferentes m√©todos de selecci√≥n
        methods = ['univariate', 'rfe', 'lasso']
        selection_results = {}
        
        for method in methods:
            print(f"\nüîç Selecci√≥n de caracter√≠sticas: {method.upper()}")
            
            result = radiomics_analyzer.perform_feature_selection(
                features_only, radiomics_df['label'].values, 
                method=method, k=10
            )
            
            if result and 'selected_features' in result:
                selection_results[method] = result
                selected_features = result['selected_features']
                
                print(f"  Caracter√≠sticas seleccionadas: {len(selected_features)}")
                print(f"  Top 5: {selected_features[:5]}")
            else:
                print(f"  ‚ùå Error en selecci√≥n {method}")
        
        # Comparar m√©todos de selecci√≥n
        if selection_results:
            print(f"\nüìä COMPARACI√ìN DE M√âTODOS DE SELECCI√ìN")
            print(f"=" * 50)
            
            # Crear diagrama de Venn simplificado
            all_selected = set()
            method_features = {}
            
            for method, result in selection_results.items():
                features = set(result['selected_features'])
                method_features[method] = features
                all_selected.update(features)
            
            # Caracter√≠sticas comunes
            if len(method_features) >= 2:
                methods_list = list(method_features.keys())
                common_features = method_features[methods_list[0]]
                
                for method in methods_list[1:]:
                    common_features = common_features.intersection(method_features[method])
                
                print(f"Caracter√≠sticas seleccionadas por TODOS los m√©todos ({len(common_features)}):")
                for feature in sorted(common_features):
                    print(f"  - {feature}")
            
            # Visualizar comparaci√≥n
            plt.figure(figsize=(12, 8))
            
            # Crear matriz de selecci√≥n
            selection_matrix = pd.DataFrame(index=sorted(all_selected), 
                                          columns=list(method_features.keys()))
            
            for method, features in method_features.items():
                selection_matrix[method] = [1 if feat in features else 0 
                                          for feat in selection_matrix.index]
            
            sns.heatmap(selection_matrix, annot=True, cmap='RdYlBu_r', 
                       cbar_kws={'label': 'Seleccionada (1) / No seleccionada (0)'})
            plt.title('Caracter√≠sticas Seleccionadas por M√©todo')
            plt.xlabel('M√©todo de Selecci√≥n')
            plt.ylabel('Caracter√≠sticas')
            plt.xticks(rotation=45)
            plt.yticks(rotation=0)
            plt.tight_layout()
            plt.show()
            
    except Exception as e:
        print(f"‚ùå Error en selecci√≥n de caracter√≠sticas: {e}")
        selection_results = {}
else:
    # Selecci√≥n b√°sica usando SelectKBest
    print(f"üîç SELECCI√ìN DE CARACTER√çSTICAS (B√ÅSICA)")
    print(f"=" * 50)
    
    selector = SelectKBest(score_func=f_classif, k=10)
    X_selected = selector.fit_transform(features_scaled, radiomics_df['label'])
    
    selected_features = features_only.columns[selector.get_support()].tolist()
    feature_scores = dict(zip(features_only.columns, selector.scores_))
    
    print(f"Caracter√≠sticas seleccionadas: {len(selected_features)}")
    print(f"\nTop 10 caracter√≠sticas por F-score:")
    
    scores_df = pd.DataFrame(list(feature_scores.items()), 
                           columns=['Feature', 'F_Score'])
    scores_df = scores_df.sort_values('F_Score', ascending=False)
    display(scores_df.head(10))
    
    # Visualizar scores
    plt.figure(figsize=(12, 6))
    plt.bar(range(len(selected_features)), 
           [feature_scores[feat] for feat in selected_features])
    plt.xlabel('Caracter√≠sticas Seleccionadas')
    plt.ylabel('F-Score')
    plt.title('F-Scores de Caracter√≠sticas Seleccionadas')
    plt.xticks(range(len(selected_features)), selected_features, rotation=45, ha='right')
    plt.tight_layout()
    plt.show()

## 9. Resumen y Exportaci√≥n de Resultados

In [None]:
# Generar reporte completo
if radiomics_analyzer:
    try:
        final_report = radiomics_analyzer.generate_feature_report(
            features_only, radiomics_df['label'].values
        )
        
        print(f"üìã REPORTE RADI√ìMICO COMPLETO")
        print(f"=" * 50)
        
        if 'summary' in final_report:
            summary = final_report['summary']
            print(f"\nüìä Resumen:")
            print(f"  - N√∫mero de im√°genes: {summary['n_images']}")
            print(f"  - N√∫mero de caracter√≠sticas: {summary['n_features']}")
            
            if 'feature_classes' in summary:
                print(f"  - Clases de caracter√≠sticas:")
                for class_name, features in summary['feature_classes'].items():
                    print(f"    * {class_name}: {len(features)} caracter√≠sticas")
        
        # Guardar reporte completo
        radiomics_analyzer.save_results(final_report, '../results/radiomics_report.json')
        print(f"\nüíæ Reporte completo guardado en: results/radiomics_report.json")
        
    except Exception as e:
        print(f"‚ùå Error generando reporte: {e}")

# Guardar datos procesados
radiomics_df.to_csv('../results/radiomics_features.csv')
print(f"üíæ Caracter√≠sticas radi√≥micas guardadas en: results/radiomics_features.csv")

# Guardar estad√≠sticas descriptivas
if 'stats_summary' in locals():
    stats_summary.to_csv('../results/radiomics_statistics.csv')
    print(f"üíæ Estad√≠sticas guardadas en: results/radiomics_statistics.csv")

# Guardar componentes principales
pca_df = pd.DataFrame(
    pca_features, 
    columns=[f'PC{i+1}' for i in range(pca_features.shape[1])],
    index=radiomics_df.index
)
pca_df['diagnosis'] = radiomics_df['diagnosis']
pca_df['label'] = radiomics_df['label']
pca_df.to_csv('../results/pca_features.csv')
print(f"üíæ Componentes principales guardados en: results/pca_features.csv")

print(f"\n‚úÖ An√°lisis radi√≥mico completado: {datetime.now()}")
print(f"\nüéØ CONCLUSIONES PRINCIPALES:")
print(f"  1. Se analizaron {len(feature_names)} caracter√≠sticas radi√≥micas")
print(f"  2. {n_benign} casos benignos vs {n_malignant} casos malignos")
if 'statistical_results' in locals() and 'statistical_tests' in statistical_results:
    sig_count = sum(1 for test in statistical_results['statistical_tests'] 
                   if test.get('significant_corrected', False))
    print(f"  3. {sig_count} caracter√≠sticas estad√≠sticamente significativas")
print(f"  4. Los primeros 3 componentes principales explican {cumvar[2]:.1%} de la varianza")
print(f"  5. El clustering revel√≥ {3} grupos distintos en los datos")
print(f"\nüî¨ Los resultados sugieren patrones radi√≥micos distinguibles entre casos benignos y malignos")