# 3D CNN and Radiograph Feature Fusion - PCA and LASSO Classification
## Feature Fusion, Dimensionality Reduction, and Classification Analysis

This notebook:
- Loads CNN features from deep learning models
- Loads non-diagnostic radiograph features
- Fuses CNN and radiograph features into unified feature set
- Performs class-based PCA for dimensionality reduction
- Uses LASSO regression for feature selection
- Trains and evaluates multi-class classification model
- Reports comprehensive ML metrics and visualizations

## 1. Imports and Configuration

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import warnings

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LassoCV, LogisticRegression
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import (
    confusion_matrix, accuracy_score, precision_score, recall_score,
    f1_score, roc_auc_score, classification_report, roc_curve, auc
)
from sklearn.preprocessing import label_binarize
import pickle
import json

warnings.filterwarnings('ignore')
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

print("All imports successful!")

## 2. Configuration and Paths

In [None]:
# Paths
BASE_DIR = Path('C:/FeatureEx')
RADIOMICS_DIR = BASE_DIR / 'radiomics_3d'
CNN_FEATURES_DIR = BASE_DIR / 'cnn_features'  # Directory containing CNN features
RADIOGRAPH_FEATURES_DIR = BASE_DIR / 'radiograph_features'  # Directory containing radiograph features
METADATA_FILE = BASE_DIR / 'classification_metadata.xlsx'
OUTPUT_DIR = BASE_DIR / 'fusion_pca_results'
OUTPUT_DIR.mkdir(exist_ok=True)

# Configuration
NUM_PCA_COMPONENTS = 50  # Number of PCA components to retain
NUM_CLASSES = 5  # Number of classification classes
CLASS_NAMES = ['Class 0', 'Class 1', 'Class 2', 'Class 3', 'Class 4']
LASSO_MAX_ITER = 5000
LASSO_ALPHA_MIN = 0.0001
LASSO_ALPHA_MAX = 1.0

print(f"Base directory: {BASE_DIR}")
print(f"CNN features directory: {CNN_FEATURES_DIR}")
print(f"Radiograph features directory: {RADIOGRAPH_FEATURES_DIR}")
print(f"Metadata file: {METADATA_FILE}")
print(f"Output directory: {OUTPUT_DIR}")
print(f"\nConfiguration:")
print(f"  PCA components: {NUM_PCA_COMPONENTS}")
print(f"  Number of classes: {NUM_CLASSES}")
print(f"  LASSO alpha range: [{LASSO_ALPHA_MIN}, {LASSO_ALPHA_MAX}]")

## 3. Load CNN Features

In [None]:
# Load CNN features
print("Loading CNN features...")
cnn_features_file = CNN_FEATURES_DIR / 'cnn_features.pkl'

with open(cnn_features_file, 'rb') as f:
    cnn_data = pickle.load(f)

# Extract CNN features and metadata
cnn_features_df = cnn_data['features_df']
cnn_feature_names = cnn_data['feature_names']
cnn_sample_ids = cnn_data['sample_ids']

print(f"Loaded CNN features:")
print(f"  Samples: {len(cnn_features_df)}")
print(f"  Features: {len(cnn_feature_names)}")
print(f"  Feature names (first 5): {cnn_feature_names[:5]}")
print(f"  DataFrame shape: {cnn_features_df.shape}")
print(f"  DataFrame columns (first 10): {list(cnn_features_df.columns[:10])}")

## 4. Load Radiograph Features

In [None]:
# Load radiograph (non-diagnostic) features
print("Loading radiograph features...")
radiograph_features_file = RADIOGRAPH_FEATURES_DIR / 'radiograph_features.pkl'

with open(radiograph_features_file, 'rb') as f:
    radiograph_data = pickle.load(f)

# Extract radiograph features and metadata
radiograph_features_df = radiograph_data['features_df']
radiograph_feature_names = radiograph_data['feature_names']
radiograph_sample_ids = radiograph_data['sample_ids']

print(f"Loaded radiograph features:")
print(f"  Samples: {len(radiograph_features_df)}")
print(f"  Features: {len(radiograph_feature_names)}")
print(f"  Feature names (first 5): {radiograph_feature_names[:5]}")
print(f"  DataFrame shape: {radiograph_features_df.shape}")
print(f"  DataFrame columns (first 10): {list(radiograph_features_df.columns[:10])}")

## 5. Fuse CNN and Radiograph Features

In [None]:
# Fuse CNN and radiograph features
print("Fusing CNN and radiograph features...")

# Ensure both dataframes have sample_id column
if 'sample_id' not in cnn_features_df.columns:
    cnn_features_df['sample_id'] = cnn_sample_ids
if 'sample_id' not in radiograph_features_df.columns:
    radiograph_features_df['sample_id'] = radiograph_sample_ids

# Merge on sample_id to ensure alignment
# Use inner join to keep only samples present in both datasets
merged_df = cnn_features_df.merge(
    radiograph_features_df,
    on='sample_id',
    how='inner',
    suffixes=('_cnn', '_radiograph')
)

# Combine feature names
all_feature_names = list(cnn_feature_names) + list(radiograph_feature_names)

print(f"\nFeature fusion results:")
print(f"  CNN features: {len(cnn_feature_names)}")
print(f"  Radiograph features: {len(radiograph_feature_names)}")
print(f"  Total fused features: {len(all_feature_names)}")
print(f"  Samples with both feature types: {len(merged_df)}")
print(f"  Merged dataframe shape: {merged_df.shape}")

# Verify no duplicate feature names
if len(all_feature_names) != len(set(all_feature_names)):
    print("\nWARNING: Duplicate feature names detected!")
    duplicates = [name for name in all_feature_names if all_feature_names.count(name) > 1]
    print(f"Duplicates: {set(duplicates)}")
else:
    print("\nAll feature names are unique!")

## 6. Load Classification Metadata

In [None]:
# Load metadata
print("Loading classification metadata...")
metadata_df = pd.read_excel(METADATA_FILE, sheet_name='samples')

print(f"Loaded metadata:")
print(f"  Samples: {len(metadata_df)}")
print(f"  Columns: {list(metadata_df.columns)}")
print(f"\nMetadata preview:")
print(metadata_df.head())

print(f"\nClass distribution:")
print(metadata_df['label'].value_counts().sort_index())

print(f"\nData split:")
print(metadata_df['split'].value_counts())

## 7. Merge Fused Features with Labels

In [None]:
# Merge fused features with labels
print("Merging fused features with classification labels...")

# Create mapping from sample_id to label
label_map = dict(zip(metadata_df['sample_id'], metadata_df['label']))

# Add labels to merged dataframe
merged_df['class_label'] = merged_df['sample_id'].map(label_map)

# Check for unmatched samples
unmatched = merged_df['class_label'].isna().sum()
if unmatched > 0:
    print(f"WARNING: {unmatched} samples have no matching label")
    print(f"Removing unmatched samples...")
    merged_df = merged_df.dropna(subset=['class_label'])
    merged_df['class_label'] = merged_df['class_label'].astype(int)

print(f"\nMerged data with labels:")
print(f"  Total samples with labels: {len(merged_df)}")
print(f"  Total fused features: {len(all_feature_names)}")
print(f"\nClass distribution in merged data:")
print(merged_df['class_label'].value_counts().sort_index())

# Prepare data for analysis
X = merged_df[all_feature_names].values  # Feature matrix
y = merged_df['class_label'].values       # Labels
sample_ids_final = merged_df['sample_id'].values

print(f"\nData shapes:")
print(f"  X (fused features): {X.shape}")
print(f"  y (labels): {y.shape}")

## 8. Data Preprocessing and Scaling

In [None]:
# Standardize features
print("Standardizing fused features...")
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

print(f"Feature scaling completed:")
print(f"  Mean (should be ~0): {X_scaled.mean(axis=0)[:5]}")
print(f"  Std (should be ~1): {X_scaled.std(axis=0)[:5]}")

# Save scaler for later use
scaler_path = OUTPUT_DIR / 'feature_scaler.pkl'
with open(scaler_path, 'wb') as f:
    pickle.dump(scaler, f)
print(f"\nScaler saved to: {scaler_path}")

## 9. Class-Based PCA Analysis

In [None]:
# Perform PCA
print("Performing PCA on fused features...")
pca = PCA(n_components=NUM_PCA_COMPONENTS)
X_pca = pca.fit_transform(X_scaled)

print(f"PCA completed:")
print(f"  Components: {pca.n_components_}")
print(f"  Explained variance ratio: {pca.explained_variance_ratio_[:5]}")
print(f"  Cumulative variance: {np.cumsum(pca.explained_variance_ratio_)[-1]:.4f}")

# Plot explained variance
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Scree plot
axes[0].bar(range(1, NUM_PCA_COMPONENTS + 1), pca.explained_variance_ratio_)
axes[0].set_xlabel('Principal Component')
axes[0].set_ylabel('Explained Variance Ratio')
axes[0].set_title('PCA Scree Plot (Fused Features)')
axes[0].grid(True, alpha=0.3)

# Cumulative explained variance
axes[1].plot(np.cumsum(pca.explained_variance_ratio_))
axes[1].axhline(y=0.95, color='r', linestyle='--', label='95% variance')
axes[1].set_xlabel('Number of Components')
axes[1].set_ylabel('Cumulative Explained Variance')
axes[1].set_title('Cumulative Explained Variance')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'pca_variance_analysis.png', dpi=150, bbox_inches='tight')
plt.show()

print(f"\nPCA plot saved to: {OUTPUT_DIR / 'pca_variance_analysis.png'}")

# Save PCA model
pca_path = OUTPUT_DIR / 'pca_model.pkl'
with open(pca_path, 'wb') as f:
    pickle.dump(pca, f)
print(f"PCA model saved to: {pca_path}")

## 10. PCA Visualization by Class

In [None]:
# Visualize PCA results
print("Visualizing PCA results by class...")
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# 2D PCA plot (PC1 vs PC2)
for class_label in np.unique(y):
    mask = y == class_label
    axes[0].scatter(X_pca[mask, 0], X_pca[mask, 1], 
                   label=f'Class {class_label}', alpha=0.7, s=50)
axes[0].set_xlabel(f'PC1 ({pca.explained_variance_ratio_[0]:.2%} var)')
axes[0].set_ylabel(f'PC2 ({pca.explained_variance_ratio_[1]:.2%} var)')
axes[0].set_title('PCA: PC1 vs PC2 (Fused Features)')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# 2D PCA plot (PC1 vs PC3)
for class_label in np.unique(y):
    mask = y == class_label
    axes[1].scatter(X_pca[mask, 0], X_pca[mask, 2], 
                   label=f'Class {class_label}', alpha=0.7, s=50)
axes[1].set_xlabel(f'PC1 ({pca.explained_variance_ratio_[0]:.2%} var)')
axes[1].set_ylabel(f'PC3 ({pca.explained_variance_ratio_[2]:.2%} var)')
axes[1].set_title('PCA: PC1 vs PC3 (Fused Features)')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'pca_scatter_plot.png', dpi=150, bbox_inches='tight')
plt.show()

print(f"PCA scatter plot saved to: {OUTPUT_DIR / 'pca_scatter_plot.png'}")

## 11. LASSO Feature Selection on Fused Features

In [None]:
# LASSO for feature selection
print("Performing LASSO feature selection on fused features...")
print(f"Training LASSO on {len(all_feature_names)} fused features...")

# Use LassoCV to find optimal alpha
lasso_cv = LassoCV(
    cv=5,
    max_iter=LASSO_MAX_ITER,
    alphas=np.logspace(np.log10(LASSO_ALPHA_MIN), np.log10(LASSO_ALPHA_MAX), 100),
    random_state=42
)

print("Training LassoCV (this may take a moment)...")
lasso_cv.fit(X_scaled, y)

print(f"\nLASSO training completed:")
print(f"  Optimal alpha: {lasso_cv.alpha_:.6f}")
print(f"  CV score (RÂ²): {lasso_cv.score(X_scaled, y):.4f}")

# Get selected features
selected_features_mask = lasso_cv.coef_ != 0
selected_features = np.array(all_feature_names)[selected_features_mask]
selected_indices = np.where(selected_features_mask)[0]

print(f"\nFeature selection results:")
print(f"  Original fused features: {len(all_feature_names)}")
print(f"  Selected features: {len(selected_features)}")
print(f"  Selection ratio: {len(selected_features) / len(all_feature_names) * 100:.1f}%")

# Categorize selected features
cnn_selected = sum(1 for f in selected_features if f in cnn_feature_names)
radiograph_selected = sum(1 for f in selected_features if f in radiograph_feature_names)

print(f"\nFeature source breakdown:")
print(f"  CNN features selected: {cnn_selected}/{len(cnn_feature_names)}")
print(f"  Radiograph features selected: {radiograph_selected}/{len(radiograph_feature_names)}")

print(f"\nTop selected features:")
top_coefs_idx = np.argsort(np.abs(lasso_cv.coef_[selected_features_mask]))[-10:][::-1]
for i, idx in enumerate(top_coefs_idx, 1):
    feat_idx = selected_indices[idx]
    feature_source = 'CNN' if all_feature_names[feat_idx] in cnn_feature_names else 'Radiograph'
    print(f"  {i}. {all_feature_names[feat_idx]:50s} [{feature_source:10s}] (coef: {lasso_cv.coef_[feat_idx]:8.4f})")

# Save LASSO model and selected features
lasso_path = OUTPUT_DIR / 'lasso_model.pkl'
with open(lasso_path, 'wb') as f:
    pickle.dump(lasso_cv, f)

selected_features_info = {
    'feature_names': selected_features.tolist(),
    'feature_indices': selected_indices.tolist(),
    'feature_sources': ['CNN' if f in cnn_feature_names else 'Radiograph' for f in selected_features],
    'coefficients': lasso_cv.coef_[selected_features_mask].tolist(),
    'alpha': lasso_cv.alpha_,
    'cv_score': lasso_cv.score(X_scaled, y),
    'cnn_selected_count': int(cnn_selected),
    'radiograph_selected_count': int(radiograph_selected)
}

with open(OUTPUT_DIR / 'selected_features.json', 'w') as f:
    json.dump(selected_features_info, f, indent=2)

print(f"\nLASSO model saved to: {lasso_path}")
print(f"Selected features info saved to: {OUTPUT_DIR / 'selected_features.json'}")

## 12. LASSO Coefficients Visualization

In [None]:
# Visualize LASSO coefficients
print("Visualizing LASSO coefficients...")

fig, axes = plt.subplots(2, 1, figsize=(14, 10))

# Alpha path
axes[0].loglog(lasso_cv.alphas_, lasso_cv.mse_path_.mean(axis=1))
axes[0].axvline(lasso_cv.alpha_, color='r', linestyle='--', label=f'Optimal alpha: {lasso_cv.alpha_:.4f}')
axes[0].set_xlabel('Alpha (Regularization Strength)')
axes[0].set_ylabel('Mean Squared Error')
axes[0].set_title('LASSO: Alpha Selection')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Top coefficients
top_n = 20
top_indices = np.argsort(np.abs(lasso_cv.coef_))[-top_n:][::-1]
top_features = [all_feature_names[i] for i in top_indices]
top_coefs = lasso_cv.coef_[top_indices]
top_sources = ['CNN' if f in cnn_feature_names else 'Radiograph' for f in top_features]

colors = ['#1f77b4' if s == 'CNN' else '#ff7f0e' for s in top_sources]
axes[1].barh(range(len(top_coefs)), top_coefs, color=colors)
axes[1].set_yticks(range(len(top_coefs)))
axes[1].set_yticklabels(top_features, fontsize=9)
axes[1].set_xlabel('Coefficient Value')
axes[1].set_title(f'Top {top_n} LASSO Coefficients (Blue=CNN, Orange=Radiograph)')
axes[1].axvline(0, color='black', linestyle='-', linewidth=0.5)
axes[1].grid(True, alpha=0.3, axis='x')

plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'lasso_coefficients.png', dpi=150, bbox_inches='tight')
plt.show()

print(f"LASSO coefficients plot saved to: {OUTPUT_DIR / 'lasso_coefficients.png'}")

## 13. Train Classification Model using Selected Features

In [None]:
# Use selected features for classification
print("Training classification model with selected fused features...")

X_selected = X_scaled[:, selected_features_mask]

print(f"\nUsing {len(selected_features)} selected features for classification")
print(f"Feature matrix shape: {X_selected.shape}")

# Train logistic regression classifier
classifier = OneVsRestClassifier(
    LogisticRegression(max_iter=1000, random_state=42)
)

print("Training classifier...")
classifier.fit(X_selected, y)
print("Training completed!")

# Make predictions on training data
y_pred = classifier.predict(X_selected)
y_pred_proba = classifier.predict_proba(X_selected)

print(f"\nPredictions generated:")
print(f"  Predicted labels shape: {y_pred.shape}")
print(f"  Prediction probabilities shape: {y_pred_proba.shape}")

# Save classifier
classifier_path = OUTPUT_DIR / 'classifier_model.pkl'
with open(classifier_path, 'wb') as f:
    pickle.dump(classifier, f)
print(f"\nClassifier saved to: {classifier_path}")

## 14. Comprehensive Model Evaluation

In [None]:
# Calculate metrics
print("\n" + "="*70)
print("CLASSIFICATION PERFORMANCE METRICS (FUSED FEATURES)")
print("="*70)

# Basic metrics
accuracy = accuracy_score(y, y_pred)
precision_weighted = precision_score(y, y_pred, average='weighted', zero_division=0)
recall_weighted = recall_score(y, y_pred, average='weighted', zero_division=0)
f1_weighted = f1_score(y, y_pred, average='weighted', zero_division=0)

print(f"\nOverall Metrics (weighted average):")
print(f"  Accuracy:  {accuracy:.4f}")
print(f"  Precision: {precision_weighted:.4f}")
print(f"  Recall:    {recall_weighted:.4f}")
print(f"  F1-Score:  {f1_weighted:.4f}")

# Per-class metrics
print(f"\nPer-Class Metrics:")
for class_label in np.unique(y):
    mask = y == class_label
    class_acc = accuracy_score(y[mask], y_pred[mask])
    class_prec = precision_score(y[mask], y_pred[mask], average='weighted', zero_division=0)
    class_rec = recall_score(y[mask], y_pred[mask], average='weighted', zero_division=0)
    class_f1 = f1_score(y[mask], y_pred[mask], average='weighted', zero_division=0)
    
    print(f"\n  Class {class_label}: ({mask.sum()} samples)")
    print(f"    Accuracy:  {class_acc:.4f}")
    print(f"    Precision: {class_prec:.4f}")
    print(f"    Recall:    {class_rec:.4f}")
    print(f"    F1-Score:  {class_f1:.4f}")

print(f"\n" + "="*70)

## 15. Confusion Matrix

In [None]:
# Confusion matrix
print("\nConfusion Matrix:")
cm = confusion_matrix(y, y_pred)
print(cm)
print(f"\nConfusion Matrix shape: {cm.shape}")

# Visualize confusion matrix
fig, ax = plt.subplots(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=range(NUM_CLASSES), 
            yticklabels=range(NUM_CLASSES),
            cbar_kws={'label': 'Count'},
            ax=ax)
ax.set_xlabel('Predicted Label')
ax.set_ylabel('True Label')
ax.set_title('Confusion Matrix (Fused Features)')
plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'confusion_matrix.png', dpi=150, bbox_inches='tight')
plt.show()

print(f"\nConfusion matrix plot saved to: {OUTPUT_DIR / 'confusion_matrix.png'}")

# Save confusion matrix
np.save(OUTPUT_DIR / 'confusion_matrix.npy', cm)

## 16. Detailed Classification Report

In [None]:
# Classification report
print("\n" + "="*70)
print("DETAILED CLASSIFICATION REPORT")
print("="*70)
print(classification_report(y, y_pred, target_names=[f'Class {i}' for i in range(NUM_CLASSES)]))

# Save classification report
report_dict = classification_report(y, y_pred, output_dict=True, 
                                   target_names=[f'Class {i}' for i in range(NUM_CLASSES)])
with open(OUTPUT_DIR / 'classification_report.json', 'w') as f:
    json.dump(report_dict, f, indent=2)

print(f"\nClassification report saved to: {OUTPUT_DIR / 'classification_report.json'}")

## 17. ROC Curves (One-vs-Rest)

In [None]:
# ROC curves for each class
print("\nCalculating ROC curves...")

y_bin = label_binarize(y, classes=range(NUM_CLASSES))

fig, axes = plt.subplots(2, 3, figsize=(15, 10))
axes = axes.flatten()

roc_auc_scores = {}

for i in range(NUM_CLASSES):
    fpr, tpr, _ = roc_curve(y_bin[:, i], y_pred_proba[:, i])
    roc_auc = auc(fpr, tpr)
    roc_auc_scores[f'Class {i}'] = roc_auc
    
    axes[i].plot(fpr, tpr, color='darkorange', lw=2, 
                label=f'ROC curve (AUC = {roc_auc:.3f})')
    axes[i].plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    axes[i].set_xlim([0.0, 1.0])
    axes[i].set_ylim([0.0, 1.05])
    axes[i].set_xlabel('False Positive Rate')
    axes[i].set_ylabel('True Positive Rate')
    axes[i].set_title(f'ROC Curve - Class {i}')
    axes[i].legend(loc="lower right")
    axes[i].grid(True, alpha=0.3)

# Remove the extra subplot
fig.delaxes(axes[5])

plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'roc_curves.png', dpi=150, bbox_inches='tight')
plt.show()

print(f"\nROC-AUC Scores:")
for class_name, score in roc_auc_scores.items():
    print(f"  {class_name}: {score:.4f}")

print(f"\nROC curves plot saved to: {OUTPUT_DIR / 'roc_curves.png'}")

## 18. Metrics Summary and Export

In [None]:
# Create comprehensive metrics dictionary
metrics_summary = {
    'fusion_info': {
        'cnn_features_count': len(cnn_feature_names),
        'radiograph_features_count': len(radiograph_feature_names),
        'total_fused_features': len(all_feature_names)
    },
    'overall_metrics': {
        'accuracy': float(accuracy),
        'precision_weighted': float(precision_weighted),
        'recall_weighted': float(recall_weighted),
        'f1_score_weighted': float(f1_weighted)
    },
    'roc_auc_scores': roc_auc_scores,
    'confusion_matrix': cm.tolist(),
    'model_info': {
        'fused_features_before_selection': len(all_feature_names),
        'selected_features': len(selected_features),
        'cnn_selected': int(cnn_selected),
        'radiograph_selected': int(radiograph_selected),
        'selection_ratio': float(len(selected_features) / len(all_feature_names)),
        'pca_components': NUM_PCA_COMPONENTS,
        'pca_explained_variance': float(np.cumsum(pca.explained_variance_ratio_)[-1]),
        'lasso_alpha': float(lasso_cv.alpha_),
        'total_samples': len(y),
        'num_classes': NUM_CLASSES
    },
    'classification_report': report_dict
}

# Save metrics
with open(OUTPUT_DIR / 'metrics_summary.json', 'w') as f:
    json.dump(metrics_summary, f, indent=2)

print("\n" + "="*70)
print("METRICS SAVED")
print("="*70)
print(f"\nAll metrics saved to: {OUTPUT_DIR / 'metrics_summary.json'}")

print(f"\nGenerated files:")
for file in sorted(OUTPUT_DIR.glob('*')):
    if file.is_file():
        print(f"  - {file.name}")

## 19. Summary and Conclusions

In [None]:
print("\n" + "="*70)
print("FUSION ANALYSIS SUMMARY")
print("="*70)

print(f"\n1. FEATURE FUSION")
print(f"   - CNN features: {len(cnn_feature_names)}")
print(f"   - Radiograph features: {len(radiograph_feature_names)}")
print(f"   - Total fused features: {len(all_feature_names)}")
print(f"   - Samples with both modalities: {len(merged_df)}")

print(f"\n2. PCA ANALYSIS")
print(f"   - Reduced {len(all_feature_names)} fused features to {NUM_PCA_COMPONENTS} components")
print(f"   - Retained {np.cumsum(pca.explained_variance_ratio_)[-1]:.2%} of variance")

print(f"\n3. LASSO FEATURE SELECTION")
print(f"   - Selected {len(selected_features)} features ({len(selected_features)/len(all_feature_names)*100:.1f}%)")
print(f"   - CNN features selected: {cnn_selected}/{len(cnn_feature_names)}")
print(f"   - Radiograph features selected: {radiograph_selected}/{len(radiograph_feature_names)}")
print(f"   - Optimal regularization alpha: {lasso_cv.alpha_:.6f}")

print(f"\n4. CLASSIFICATION PERFORMANCE")
print(f"   - Overall Accuracy: {accuracy:.4f}")
print(f"   - Weighted Precision: {precision_weighted:.4f}")
print(f"   - Weighted Recall: {recall_weighted:.4f}")
print(f"   - Weighted F1-Score: {f1_weighted:.4f}")

print(f"\n5. CLASS-SPECIFIC PERFORMANCE")
for class_label in np.unique(y):
    mask = y == class_label
    class_acc = accuracy_score(y[mask], y_pred[mask])
    print(f"   - Class {class_label}: {class_acc:.4f} accuracy ({mask.sum()} samples)")

print(f"\n6. OUTPUT DIRECTORY")
print(f"   - Results saved to: {OUTPUT_DIR}")
print(f"   - Plots: PCA variance, scatter plots, LASSO coefficients, confusion matrix, ROC curves")
print(f"   - Models: PCA, LASSO, Classifier, Feature scaler")
print(f"   - Metrics: Comprehensive JSON summary with fusion details")

print(f"\n" + "="*70)