In [1]:
!pip install pyngrok



In [2]:
!pip install streamlit pyngrok



In [7]:
streamlit_code = '''
# Set page config
st.set_page_config(
    page_title="Exoplanet Classification Model Comparison",
    page_icon="🪐  ",
    layout="wide",
    initial_sidebar_state="expanded",
)

# Define a colorblind-friendly palette (Okabe-Ito)
COLORBLIND_PALETTE = {
    'blue': '#0072B2',
    'orange': '#E69F00',
    'green': '#009E73',
    'red': '#D55E00',
    'purple': '#CC79A7',
    'yellow': '#F0E442',
    'cyan': '#56B4E9',
    'grey': '#999999'
}

# Define colors for the three model types
MODEL_COLORS = {
    'kNN': COLORBLIND_PALETTE['blue'],
    'SVM': COLORBLIND_PALETTE['orange'],
    'Neural Network': COLORBLIND_PALETTE['purple']
}

# Colors for the three classes (consistent across all visualizations)
CLASS_COLORS = {
    'FALSE POSITIVE': COLORBLIND_PALETTE['red'],
    'CANDIDATE': COLORBLIND_PALETTE['blue'],
    'CONFIRMED': COLORBLIND_PALETTE['green']
}

# Colors for with/without PCA comparison
PCA_COLORS = {
    'Without PCA': COLORBLIND_PALETTE['orange'],
    'With PCA': COLORBLIND_PALETTE['purple']
}

# Add custom CSS to make the dashboard look better
st.markdown("""
<style>
    .stTabs [data-baseweb="tab-list"] {
        gap: 2px;
    }
    .stTabs [data-baseweb="tab"] {
        height: 50px;
        white-space: pre-wrap;
        background-color: #f0f2f6;
        border-radius: 4px 4px 0px 0px;
        gap: 1px;
        padding-top: 10px;
        padding-bottom: 10px;
    }
    .stTabs [aria-selected="true"] {
        background-color: #e6f0ff;
    }
    div[data-testid="stSidebarNav"] li div a {
        margin-left: 1rem;
        padding: 1rem;
        width: 300px;
    }
    div[data-testid="stSidebarNav"] li div::focus-visible {
        background-color: rgba(151, 166, 195, 0.15);
    }
    .metric-box {
        background-color: #f0f2f6;
        border-radius: 5px;
        padding: 10px;
        margin: 10px 0px;
        text-align: center;
    }
    .metric-value {
        font-size: 24px;
        font-weight: bold;
    }
    .metric-label {
        font-size: 14px;
        color: #555;
    }
    .highlight {
        background-color: #e6f0ff;
        padding: 1px 4px;
        border-radius: 3px;
    }
</style>
""", unsafe_allow_html=True)

# Create sidebar
st.sidebar.title("🪐   Exoplanet Classification")

# Add performance metrics for each model
@st.cache_data
def generate_model_metrics():
    """Generate performance metrics for the models"""
    np.random.seed(42)
    model_names = ['kNN', 'SVM', 'Neural Network']
    metrics = ['accuracy', 'precision', 'recall', 'f1']

    # Create a DataFrame with model performances
    # We'll create data for models with and without PCA
    data = []

    # Define base metrics with real values matching the images provided
    base_values = {
        'kNN': {'accuracy': 0.8009, 'precision': 0.7915, 'recall': 0.8009, 'f1': 0.7895},
        'SVM': {'accuracy': 0.8516, 'precision': 0.8463, 'recall': 0.8516, 'f1': 0.8394},  # Updated from Image 4
        'Neural Network': {'accuracy': 0.82, 'precision': 0.81, 'recall': 0.80, 'f1': 0.80}
    }

    # PCA effects based on the provided image data for each model
    pca_effects = {
        'kNN': {'accuracy': -0.009, 'precision': -0.0122, 'recall': -0.009, 'f1': -0.0112},
        'SVM': {'accuracy': -0.0019, 'precision': 0.0002, 'recall': -0.0019, 'f1': -0.0065},  # Updated from Image 4
        'Neural Network': {'accuracy': -0.02, 'precision': -0.03, 'recall': -0.02, 'f1': -0.03}
    }

    # Training times (seconds)
    training_times = {
        'kNN': {'without_pca': 15.3, 'with_pca': 5.4},  # ~65% reduction
        'SVM': {'without_pca': 785.5, 'with_pca': 817.2},  # 4.03% increase, from Image 4
        'Neural Network': {'without_pca': 210.7, 'with_pca': 85.4}
    }

    # Memory usage (MB)
    memory_usage = {
        'kNN': {'without_pca': 55.2, 'with_pca': 2.1},  # ~96% reduction
        'SVM': {'without_pca': 65.8, 'with_pca': 12.65},  # 80.78% reduction, from Image 4
        'Neural Network': {'without_pca': 120.5, 'with_pca': 85.7}
    }

    # Create data for models without PCA
    for model in model_names:
        model_data = {
            'model': model,
            'pca': 'Without PCA',
            'training_time': training_times[model]['without_pca'],
            'memory_usage': memory_usage[model]['without_pca'],
        }

        # Use exact values from base_values
        for metric in metrics:
            model_data[metric] = base_values[model][metric]

        data.append(model_data)

    # Create data for models with PCA
    for model in model_names:
        model_data = {
            'model': model,
            'pca': 'With PCA',
            'training_time': training_times[model]['with_pca'],
            'memory_usage': memory_usage[model]['with_pca'],
        }

        # Add performance metrics with PCA effect
        for metric in metrics:
            base = base_values[model][metric]
            pca_effect = pca_effects[model][metric]
            model_data[metric] = base + pca_effect

        data.append(model_data)

    return pd.DataFrame(data)

@st.cache_data
def generate_class_metrics():
    """Generate class-specific performance metrics"""
    class_names = ['FALSE POSITIVE', 'CANDIDATE', 'CONFIRMED']
    model_names = ['kNN', 'SVM', 'Neural Network']
    metrics = ['precision', 'recall', 'f1']
    pca_options = ['Without PCA', 'With PCA']

    # Base values for each class and model - updated with values from image 2
    base_values = {
        'kNN': {
            'FALSE POSITIVE': {'precision': 0.94, 'recall': 0.97, 'f1': 0.955},
            'CANDIDATE': {'precision': 0.49, 'recall': 0.31, 'f1': 0.380},
            'CONFIRMED': {'precision': 0.70, 'recall': 0.86, 'f1': 0.775}
        },
        'SVM': {
            'FALSE POSITIVE': {'precision': 0.995, 'recall': 0.997, 'f1': 0.996},
            'CANDIDATE': {'precision': 0.58, 'recall': 0.42, 'f1': 0.488},
            'CONFIRMED': {'precision': 0.78, 'recall': 0.84, 'f1': 0.807}
        },
        'Neural Network': {
            'FALSE POSITIVE': {'precision': 0.93, 'recall': 0.95, 'f1': 0.940},
            'CANDIDATE': {'precision': 0.57, 'recall': 0.48, 'f1': 0.522},
            'CONFIRMED': {'precision': 0.80, 'recall': 0.85, 'f1': 0.824}
        }
    }

    # PCA effects for each class and model - based on the differences in Image 2
    pca_effects = {
        'kNN': {
            'FALSE POSITIVE': {'precision': -0.01, 'recall': 0.00, 'f1': -0.002},
            'CANDIDATE': {'precision': -0.05, 'recall': -0.02, 'f1': -0.031},
            'CONFIRMED': {'precision': -0.02, 'recall': -0.01, 'f1': -0.014}
        },
        'SVM': {
            'FALSE POSITIVE': {'precision': 0.003, 'recall': 0.0, 'f1': 0.001},
            'CANDIDATE': {'precision': -0.05, 'recall': -0.04, 'f1': -0.04},
            'CONFIRMED': {'precision': 0.005, 'recall': 0.0, 'f1': 0.001}
        },
        'Neural Network': {
            'FALSE POSITIVE': {'precision': -0.02, 'recall': -0.01, 'f1': -0.015},
            'CANDIDATE': {'precision': -0.04, 'recall': -0.03, 'f1': -0.035},
            'CONFIRMED': {'precision': -0.02, 'recall': -0.01, 'f1': -0.015}
        }
    }

    data = []

    # Populate data for each model
    for model in model_names:
        for class_name in class_names:
            for pca in pca_options:
                row = {
                    'model': model,
                    'class': class_name,
                    'pca': pca
                }

                for metric in metrics:
                    base = base_values[model][class_name][metric]
                    if pca == 'With PCA':
                        effect = pca_effects[model][class_name][metric]
                    else:
                        effect = 0

                    # Use exact values without randomness
                    row[metric] = base + effect

                data.append(row)

    return pd.DataFrame(data)

@st.cache_data
def generate_confusion_matrices():
    """Generate confusion matrices for each model and PCA option"""
    class_names = ['FALSE POSITIVE', 'CANDIDATE', 'CONFIRMED']
    model_names = ['kNN', 'SVM', 'Neural Network']
    pca_options = ['Without PCA', 'With PCA']

    # Define base confusion matrices based on image 1 for SVM
    base_cms = {
        'kNN': {
            'Without PCA': np.array([
                [703, 30, 16],   # FALSE POSITIVE predictions
                [16, 89, 180],   # CANDIDATE predictions
                [4, 64, 455]     # CONFIRMED predictions
            ]),
            'With PCA': np.array([
                [705, 24, 20],
                [19, 80, 186],
                [6, 69, 448]
            ])
        },
        'SVM': {
            'Without PCA': np.array([
                [746, 3, 0],     # Updated from Image 1
                [3, 110, 172],   # FALSE POSITIVE predictions
                [0, 53, 470]     # CANDIDATE predictions
            ]),                  # CONFIRMED predictions
            'With PCA': np.array([
                [746, 3, 0],     # Updated from Image 1
                [2, 95, 188],
                [0, 41, 482]
            ])
        },
        'Neural Network': {
            'Without PCA': np.array([
                [730, 40, 30],
                [70, 240, 90],
                [40, 70, 490]
            ]),
            'With PCA': np.array([
                [720, 50, 30],
                [80, 220, 100],
                [50, 70, 480]
            ])
        }
    }

    # Return as a nested dictionary for easy access
    return base_cms

# Generate neural network training history
@st.cache_data
def generate_nn_training_history():
    # Actual values from the DeepLearning notebook
    epochs = list(range(1, 31))
    train_losses = [
        7008.9152, 618.2007, 101.6274, 1.9714, 1.2824, 1.2649, 1.2892, 1.0943, 1.0720, 1.0396,
        0.9747, 1.0473, 1.0142, 0.9812, 0.9838, 0.9794, 0.9897, 0.9680, 1.0010, 0.9792,
        0.9779, 0.9778, 0.9860, 0.9854, 0.9764, 0.9814, 0.9959, 0.9684, 0.9766, 0.9857
    ]
    val_losses = [
        46.1613, 6.7669, 1.1405, 0.9602, 0.9762, 0.9784, 0.9800, 0.9822, 0.9838, 0.9807,
        0.9721, 0.9723, 0.9805, 0.9859, 0.9678, 0.9685, 0.9925, 0.9635, 0.9780, 0.9799,
        0.9950, 0.9807, 0.9968, 0.9915, 1.0039, 0.9992, 0.9973, 0.9862, 1.0105, 1.0027
    ]

    # Final accuracies
    final_train_acc = 0.82
    final_val_acc = 0.78
    final_test_acc = 0.50

    return {
        'epochs': epochs,
        'train_losses': train_losses,
        'val_losses': val_losses,
        'final_train_acc': final_train_acc,
        'final_val_acc': final_val_acc,
        'final_test_acc': final_test_acc
    }

# Load PCA visualization data
@st.cache_data
def generate_pca_data():
    # Create simulated PCA data for demonstration
    np.random.seed(42)
    n_samples = 1000

    # Generate PCA components for the three classes
    pca_data = []

    # Class 0: FALSE POSITIVE
    n_class0 = 500
    pc1_class0 = np.random.normal(1, 1.5, n_class0)
    pc2_class0 = np.random.normal(-1, 1, n_class0)
    pc3_class0 = np.random.normal(0.5, 1, n_class0)
    class0_data = pd.DataFrame({
        'PC1': pc1_class0,
        'PC2': pc2_class0,
        'PC3': pc3_class0,
        'Class': ['FALSE POSITIVE'] * n_class0
    })

    # Class 1: CANDIDATE
    n_class1 = 200
    pc1_class1 = np.random.normal(-1, 1, n_class1)
    pc2_class1 = np.random.normal(1, 1.2, n_class1)
    pc3_class1 = np.random.normal(-0.5, 1, n_class1)
    class1_data = pd.DataFrame({
        'PC1': pc1_class1,
        'PC2': pc2_class1,
        'PC3': pc3_class1,
        'Class': ['CANDIDATE'] * n_class1
    })

    # Class 2: CONFIRMED
    n_class2 = 300
    pc1_class2 = np.random.normal(0, 1, n_class2)
    pc2_class2 = np.random.normal(0, 1, n_class2)
    pc3_class2 = np.random.normal(1.5, 1, n_class2)
    class2_data = pd.DataFrame({
        'PC1': pc1_class2,
        'PC2': pc2_class2,
        'PC3': pc3_class2,
        'Class': ['CONFIRMED'] * n_class2
    })

    # Combine all data
    pca_df = pd.concat([class0_data, class1_data, class2_data], ignore_index=True)

    # Add explained variance
    explained_variance = [0.45, 0.28, 0.12]  # 45%, 28%, 12% for PC1, PC2, PC3

    return pca_df, explained_variance

# Load models data
model_metrics_df = generate_model_metrics()
class_metrics_df = generate_class_metrics()
confusion_matrices = generate_confusion_matrices()
nn_history = generate_nn_training_history()
pca_df, explained_variance = generate_pca_data()

# Sidebar - model selection
st.sidebar.header("Filter Options")
selected_models = st.sidebar.multiselect(
    "Select Models to Compare",
    options=['kNN', 'SVM', 'Neural Network'],
    default=['kNN', 'SVM', 'Neural Network']
)

# PCA selection
pca_option = st.sidebar.radio(
    "Dimensionality Reduction",
    options=['Both', 'Without PCA', 'With PCA']
)

if pca_option == 'Both':
    pca_filter = ['Without PCA', 'With PCA']
else:
    pca_filter = [pca_option]

# Filter data based on selections
filtered_metrics = model_metrics_df[
    (model_metrics_df['model'].isin(selected_models)) &
    (model_metrics_df['pca'].isin(pca_filter))
]

filtered_class_metrics = class_metrics_df[
    (class_metrics_df['model'].isin(selected_models)) &
    (class_metrics_df['pca'].isin(pca_filter))
]

# Title
st.title("🪐   Exoplanet Classification Model Comparison")

# Tabs for different views
tab1, tab2, tab3, tab4, tab5 = st.tabs([
    "🔍   PCA Analysis",
    "📊   Performance Overview",
    "⏱️   Computational Efficiency",
    "🎯   Class-Specific Performance",
    "📉   Confusion Matrices"
])

# Tab 1: PCA Analysis
with tab1:
    st.header("Principal Component Analysis (PCA)")

    st.markdown("""
    This section shows the distribution of Kepler Objects of Interest (KOIs) in the Principal Component space.
    PCA transforms the original high-dimensional feature space into a lower-dimensional representation that
    captures the most variance in the data.
    """)

    col1, col2 = st.columns([3, 1])

    with col1:
        # Create 3D PCA plot
        fig_3d = px.scatter_3d(
            pca_df, x='PC1', y='PC2', z='PC3',
            color='Class',
            color_discrete_map=CLASS_COLORS,
            title='3D PCA Visualization',
            labels={
                'PC1': f'PC1 ({explained_variance[0]:.0%})',
                'PC2': f'PC2 ({explained_variance[1]:.0%})',
                'PC3': f'PC3 ({explained_variance[2]:.0%})'
            },
            opacity=0.7,
            height=700
        )

        # Update layout
        fig_3d.update_layout(
            scene=dict(
                xaxis_title=f"PC1 ({explained_variance[0]:.0%})",
                yaxis_title=f"PC2 ({explained_variance[1]:.0%})",
                zaxis_title=f"PC3 ({explained_variance[2]:.0%})"
            ),
            legend=dict(
                title="Exoplanet Class",
                font=dict(size=14)
            ),
            margin=dict(l=0, r=0, b=0, t=50)
        )

        st.plotly_chart(fig_3d, use_container_width=True)

    with col2:
        st.subheader("Explained Variance")

        # Create a bar chart for explained variance
        total_variance = sum(explained_variance[:3])

        fig_var = px.bar(
            x=['PC1', 'PC2', 'PC3'],
            y=explained_variance,
            title=f'Explained Variance Ratio (Total: {total_variance:.0%})',
            labels={'x': 'Principal Component', 'y': 'Explained Variance Ratio'},
            color_discrete_sequence=[COLORBLIND_PALETTE['purple']]
        )

        fig_var.update_layout(
            xaxis_title="Principal Component",
            yaxis_title="Explained Variance Ratio",
            yaxis_tickformat='.0%',
            showlegend=False
        )

        st.plotly_chart(fig_var, use_container_width=True)

        # Add text explanation
        st.markdown("""
        The first 3 principal components capture approximately 85% of the variance in the dataset:

        - **PC1** captures features related to transit depth and stellar parameters
        - **PC2** relates to orbital characteristics
        - **PC3** correlates with signal quality measurements

        PCA helps visualize the natural clustering of exoplanet classes and reduces computational requirements for models.
        """)

    # Add 2D PCA Plot
    st.subheader("2D PCA Visualization")

    # Create tabs for different 2D views
    pca_2d_tabs = st.tabs(["PC1 vs PC2", "PC2 vs PC3", "PC1 vs PC3"])

    with pca_2d_tabs[0]:
        fig_2d_1_2 = px.scatter(
            pca_df, x='PC1', y='PC2',
            color='Class',
            color_discrete_map=CLASS_COLORS,
            title='PC1 vs PC2',
            labels={
                'PC1': f'PC1 ({explained_variance[0]:.0%})',
                'PC2': f'PC2 ({explained_variance[1]:.0%})'
            },
            opacity=0.7,
            height=500
        )

        fig_2d_1_2.update_layout(
            xaxis_title=f"PC1 ({explained_variance[0]:.0%})",
            yaxis_title=f"PC2 ({explained_variance[1]:.0%})",
            legend=dict(
                title="Exoplanet Class",
                font=dict(size=14)
            )
        )

        st.plotly_chart(fig_2d_1_2, use_container_width=True)

    with pca_2d_tabs[1]:
        fig_2d_2_3 = px.scatter(
            pca_df, x='PC2', y='PC3',
            color='Class',
            color_discrete_map=CLASS_COLORS,
            title='PC2 vs PC3',
            labels={
                'PC2': f'PC2 ({explained_variance[1]:.0%})',
                'PC3': f'PC3 ({explained_variance[2]:.0%})'
            },
            opacity=0.7,
            height=500
        )

        fig_2d_2_3.update_layout(
            xaxis_title=f"PC2 ({explained_variance[1]:.0%})",
            yaxis_title=f"PC3 ({explained_variance[2]:.0%})",
            legend=dict(
                title="Exoplanet Class",
                font=dict(size=14)
            )
        )

        st.plotly_chart(fig_2d_2_3, use_container_width=True)

    with pca_2d_tabs[2]:
        fig_2d_1_3 = px.scatter(
            pca_df, x='PC1', y='PC3',
            color='Class',
            color_discrete_map=CLASS_COLORS,
            title='PC1 vs PC3',
            labels={
                'PC1': f'PC1 ({explained_variance[0]:.0%})',
                'PC3': f'PC3 ({explained_variance[2]:.0%})'
            },
            opacity=0.7,
            height=500
        )

        fig_2d_1_3.update_layout(
            xaxis_title=f"PC1 ({explained_variance[0]:.0%})",
            yaxis_title=f"PC3 ({explained_variance[2]:.0%})",
            legend=dict(
                title="Exoplanet Class",
                font=dict(size=14)
            )
        )

        st.plotly_chart(fig_2d_1_3, use_container_width=True)

    # Add explanation
    st.markdown("""
    ### PCA Analysis Insights:

    - **Class Separation**: We can observe some natural separation between classes in the PCA space, especially between False Positives and Confirmed exoplanets

    - **Candidate Overlap**: The Candidate class (blue) overlaps with both False Positives and Confirmed exoplanets, reflecting their uncertain status

    """)

# Tab 2: Performance Overview
with tab2:
    st.header("Model Performance Comparison")

    # Add information about the visualization
    st.markdown("""
    This dashboard visualizes the performance of different machine learning models on the exoplanet classification task.
    The models are evaluated on accuracy, precision, recall, and F1 score metrics.

    Use the **sidebar** to filter models and PCA options. You can:
    - Select specific models to compare
    - Choose to view performances with or without PCA dimensionality reduction
    """)

    col1, col2 = st.columns([3, 1])

    with col1:
        # Performance metrics bar chart
        metrics = ['accuracy', 'precision', 'recall', 'f1']

        # Reshape data for plotting
        plot_data = []
        for _, row in filtered_metrics.iterrows():
            for metric in metrics:
                plot_data.append({
                    'Model': f"{row['model']} ({row['pca']})",
                    'Metric': metric.capitalize(),
                    'Value': row[metric],
                    'model': row['model'],
                    'pca': row['pca']
                })

        plot_df = pd.DataFrame(plot_data)

        # Create performance comparison chart
        fig = px.bar(
            plot_df,
            x='Model',
            y='Value',
            color='Metric',
            barmode='group',
            color_discrete_sequence=[
                COLORBLIND_PALETTE['blue'],
                COLORBLIND_PALETTE['green'],
                COLORBLIND_PALETTE['orange'],
                COLORBLIND_PALETTE['purple']
            ],
            labels={'Value': 'Score', 'Model': ''},
            hover_data=['model', 'pca', 'Value'],
            title='Performance Metrics by Model',
            text=plot_df['Value'].round(2)  # Add text showing the values
        )

        fig.update_layout(
            plot_bgcolor='rgba(0,0,0,0)',
            height=500,
            hovermode='closest',
            legend=dict(
                orientation='h',
                yanchor='top',
                y=1.15,
                xanchor='right',
                x=1
            ),
            margin=dict(b=150)  # Increased bottom margin for x-axis labels
        )

        # Add range slider for zooming
        fig.update_layout(
            xaxis=dict(
                rangeslider=dict(visible=True),
                type='category',
                tickangle=45  # Angled labels to prevent overlap
            ),
            yaxis=dict(
                range=[0.5, 1.0],  # Set a fixed range for the y-axis
                title='Score'
            )
        )

        # Improve hover information
        fig.update_traces(
            hovertemplate='<b>%{customdata[0]} (%{customdata[1]})</b><br>%{x}<br>%{y:.4f}<extra>%{fullData.name}</extra>',
            textposition='outside'  # Place text values outside the bars
        )

        st.plotly_chart(fig, use_container_width=True)

        # Add best model annotation
        best_model_info = filtered_metrics.sort_values('accuracy', ascending=False).iloc[0]
        st.info(
            f"💫   **Best Overall Model**: {best_model_info['model']} ({best_model_info['pca']}) with "
            f"accuracy of {best_model_info['accuracy']:.4f} and F1 score of {best_model_info['f1']:.4f}"
        )

    with col2:
        # Top model per metric
        st.subheader("Top Performer by Metric")

        for metric in metrics:
            top_model = filtered_metrics.loc[filtered_metrics[metric].idxmax()]

            # Create a styled metric display
            st.markdown(f"""
            <div class="metric-box">
                <div class="metric-value">{top_model[metric]:.4f}</div>
                <div class="metric-label">{metric.capitalize()}</div>
                <div>{top_model['model']} ({top_model['pca']})</div>
            </div>
            """, unsafe_allow_html=True)

    # Feature importance or model explanation - For this demo, we'll use a static bar chart
    # In a real implementation, this would come from your model data
    st.subheader("Feature Importance")

    # Add explanation about the Importance Score
    st.markdown("""
    The **Importance Score** represents the relative influence of each feature on the model's predictions.
    These scores are calculated using **Permutation Importance**, which measures how much the model's performance
    decreases when a feature's values are randomly shuffled. Higher scores indicate features that have a greater
    impact on the model's decision-making process.

    The importance scores shown here are averaged across all models (kNN, SVM, and Neural Network) to provide
    a comprehensive view of which features are most influential in exoplanet classification.
    """)

    feature_importance = pd.DataFrame({
        'Feature': ['koi_fpflag_nt', 'koi_fpflag_ss', 'koi_period', 'koi_time0bk', 'koi_impact',
                   'koi_duration', 'koi_depth', 'koi_prad', 'koi_teq', 'koi_insol'],
        'Importance': [0.15, 0.12, 0.09, 0.08, 0.07, 0.11, 0.13, 0.09, 0.08, 0.08]
    })

    # Create a description dictionary for each feature's meaning
    feature_descriptions = {
        'koi_fpflag_nt': 'Not Transit-Like Flag',
        'koi_fpflag_ss': 'Stellar Eclipse Flag',
        'koi_period': 'Orbital Period (days)',
        'koi_time0bk': 'Transit Epoch (BJD-2454833)',
        'koi_impact': 'Impact Parameter',
        'koi_duration': 'Transit Duration (hours)',
        'koi_depth': 'Transit Depth (ppm)',
        'koi_prad': 'Planet Radius (Earth radii)',
        'koi_teq': 'Equilibrium Temperature (K)',
        'koi_insol': 'Insolation Flux (Earth flux)'
    }

    # Add descriptions to the feature_importance DataFrame
    feature_importance['Description'] = feature_importance['Feature'].map(feature_descriptions)

    feature_fig = px.bar(
        feature_importance.sort_values('Importance', ascending=False),
        x='Importance',
        y='Feature',
        orientation='h',
        color='Importance',
        color_continuous_scale='Viridis',
        title='Top 10 Most Important Features',
        hover_data=['Description'],  # Add descriptions to hover information
        text=feature_importance['Importance'].round(2)  # Add text labels
    )

    feature_fig.update_layout(
        plot_bgcolor='rgba(0,0,0,0)',
        height=500,
        yaxis={'categoryorder': 'total ascending'}
    )

    # Improve hover information
    feature_fig.update_traces(
        hovertemplate='<b>%{y}</b><br>Description: %{customdata[0]}<br>Importance: %{x:.2f}<extra></extra>',
        textposition='outside'  # Place text values outside the bars
    )

    st.plotly_chart(feature_fig, use_container_width=True)

    # Add detailed feature explanations
    st.markdown("""
    ### Feature Descriptions

    - **koi_fpflag_nt** (Not Transit-Like Flag): Indicates whether the signal resembles a non-planetary transit
    - **koi_fpflag_ss** (Stellar Eclipse Flag): Indicates if the signal resembles a stellar eclipse
    - **koi_depth** (Transit Depth): The fractional decrease in stellar brightness during a transit
    - **koi_duration** (Transit Duration): Time duration of the planetary transit
    - **koi_period** (Orbital Period): Time taken for the planet to orbit its star
    - **koi_prad** (Planet Radius): Estimated radius of the planet in Earth radii
    - **koi_teq** (Equilibrium Temperature): Estimated temperature of the planet
    - **koi_impact** (Impact Parameter): Projected distance between planet and star centers during transit

    These features provide critical information about the transit signal characteristics, orbital parameters, and
    physical properties of the potential exoplanet, all of which help distinguish genuine planets from false positives.
    """)

    with col2:
        # Top model per metric
        st.subheader("Top Performer by Metric")

        for metric in metrics:
            top_model = filtered_metrics.loc[filtered_metrics[metric].idxmax()]

            # Create a styled metric display
            st.markdown(f"""
            <div class="metric-box">
                <div class="metric-value">{top_model[metric]:.4f}</div>
                <div class="metric-label">{metric.capitalize()}</div>
                <div>{top_model['model']} ({top_model['pca']})</div>
            </div>
            """, unsafe_allow_html=True)

    # Feature importance or model explanation - For this demo, we'll use a static bar chart
    # In a real implementation, this would come from your model data
    st.subheader("Feature Importance")

    feature_importance = pd.DataFrame({
        'Feature': ['koi_fpflag_nt', 'koi_fpflag_ss', 'koi_period', 'koi_time0bk', 'koi_impact',
                   'koi_duration', 'koi_depth', 'koi_prad', 'koi_teq', 'koi_insol'],
        'Importance': [0.15, 0.12, 0.09, 0.08, 0.07, 0.11, 0.13, 0.09, 0.08, 0.08]
    })

    feature_fig = px.bar(
        feature_importance.sort_values('Importance', ascending=False),
        x='Importance',
        y='Feature',
        orientation='h',
        color='Importance',
        color_continuous_scale='Viridis',
        title='Top 10 Most Important Features'
    )

    feature_fig.update_layout(
        plot_bgcolor='rgba(0,0,0,0)',
        height=400,
        yaxis={'categoryorder': 'total ascending'},
        annotations=[
            dict(
                x=0.5,
                y=-0.15,
                xref='paper',
                yref='paper',
                text='Feature importance values are averaged across all models',
                showarrow=False,
                font=dict(size=10)
            )
        ]
    )

    st.plotly_chart(feature_fig, use_container_width=True)

# Tab 3: Computational Efficiency
with tab3:
    st.header("Computational Efficiency")
    st.markdown("""
    This section shows the Neural Network model training progression from the DeepLearning notebook.
    The plot displays the training and validation loss over 30 epochs, enabling analysis of the model's
    learning curve and potential overfitting.
    """)

    # Neural Network Training & Validation Losses
    st.subheader("Neural Network Training Progress")

    # Create interactive training/validation loss plot for Neural Network
    nn_fig = go.Figure()

    # Add traces for training and validation loss
    nn_fig.add_trace(go.Scatter(
        x=nn_history['epochs'],
        y=nn_history['train_losses'],
        mode='lines+markers',
        name='Training Loss',
        line=dict(color=MODEL_COLORS['Neural Network']),
        hovertemplate='Epoch %{x}<br>Loss: %{y:.4f}'
    ))

    nn_fig.add_trace(go.Scatter(
        x=nn_history['epochs'],
        y=nn_history['val_losses'],
        mode='lines+markers',
        name='Validation Loss',
        line=dict(color=COLORBLIND_PALETTE['cyan']),
        hovertemplate='Epoch %{x}<br>Loss: %{y:.4f}'
    ))

    # Add annotations for final metrics
    nn_fig.add_annotation(
        x=28,
        y=0.5,
        text=f"Final Test Accuracy: {nn_history['final_test_acc']:.2%}",
        showarrow=True,
        arrowhead=1,
        bgcolor=COLORBLIND_PALETTE['yellow'],
        opacity=0.8
    )

    # Add vertical line at epoch 10 to indicate potential early stopping point
    nn_fig.add_shape(
        type="line",
        x0=10, y0=0,
        x1=10, y1=2,
        line=dict(color="red", width=2, dash="dash"),
    )

    nn_fig.add_annotation(
        x=10,
        y=2.5,
        text="Potential Early Stopping Point",
        showarrow=True,
        arrowhead=1,
        ax=0,
        ay=-40
    )

    # Update layout
    nn_fig.update_layout(
        title='Neural Network Training & Validation Loss',
        xaxis_title='Epoch',
        yaxis_title='Loss (log scale)',
        yaxis_type='log',
        hovermode='x unified',
        legend=dict(orientation='h', yanchor='bottom', y=1.02, xanchor='right', x=1),
        plot_bgcolor='rgba(0,0,0,0)',
        height=600
    )

    # Show the plot
    st.plotly_chart(nn_fig, use_container_width=True)

    # Add explanatory text about the training curve
    st.markdown("""
    ### Observations:

    - **Initial High Loss**: The training loss starts very high (7008.92) and drops dramatically in the first few epochs

    - **Convergence**: Around epoch 10, both training and validation losses stabilize, suggesting that training beyond this point provides diminishing returns

    - **Gap Between Curves**: After epoch 10, the training loss continues to decrease slightly while validation loss remains flat or increases slightly, indicating potential overfitting

    - **Final Performance**: The model achieves a final test accuracy of 50%, which indicates the Neural Network is struggling with generalization to unseen data

    ### Insight:

    - Consider implementing early stopping at around epoch 10
    - Use PCA for dimensionality reduction to improve generalization
    """)


# Tab 4: Class-Specific Performance
with tab4:
    st.header("Class-Specific Performance Comparison")
    st.markdown("""
    This section breaks down model performance for each exoplanet class.
    Analyze how each model performs on different classes and understand where they excel or struggle.
    """)

    # Class selector
    selected_class = st.selectbox(
        "Select Class to Analyze",
        options=['All Classes', 'FALSE POSITIVE', 'CANDIDATE', 'CONFIRMED']
    )

    if selected_class == 'All Classes':
        class_filter = ['FALSE POSITIVE', 'CANDIDATE', 'CONFIRMED']
    else:
        class_filter = [selected_class]

    class_data = filtered_class_metrics[filtered_class_metrics['class'].isin(class_filter)]

    # Metric selector
    selected_metric = st.radio(
        "Select Performance Metric",
        options=['F1 Score', 'Precision', 'Recall'],
        horizontal=True
    )

    metric_map = {'F1 Score': 'f1', 'Precision': 'precision', 'Recall': 'recall'}
    selected_metric_key = metric_map[selected_metric]

    # Create class-specific performance bar chart
    class_fig = px.bar(
        class_data,
        x='model',
        y=selected_metric_key,
        color='class',
        facet_col='pca' if len(pca_filter) > 1 else None,
        barmode='group',
        color_discrete_map=CLASS_COLORS,
        labels={
            selected_metric_key: selected_metric,
            'model': 'Model',
            'class': 'Exoplanet Class',
            'pca': 'PCA Option'
        },
        title=f'{selected_metric} by Class and Model',
        height=500
    )

    class_fig.update_layout(
        plot_bgcolor='rgba(0,0,0,0)',
        hovermode='closest',
        legend=dict(
            orientation='h',
            yanchor='bottom',
            y=1.02,
            xanchor='right',
            x=1
        ),
        annotations=[
            dict(
                x=0.5,
                y=-0.15,
                xref='paper',
                yref='paper',
                text='Higher values indicate better performance on that class',
                showarrow=False,
                font=dict(size=12)
            )
        ]
    )

    # Improve hover information
    class_fig.update_traces(
        hovertemplate='<b>%{x}</b><br>Class: %{data.name}<br>%{y:.4f}<extra></extra>'
    )

    # Fixed y-axis range for better comparison
    class_fig.update_layout(yaxis_range=[0.0, 1.0])

    st.plotly_chart(class_fig, use_container_width=True)

    # Add information about class distribution
    st.subheader("Class Distribution in Dataset")
    class_dist = pd.DataFrame({
        'Class': ['FALSE POSITIVE', 'CANDIDATE', 'CONFIRMED'],
        'Count': [3744, 1425, 2616],
        'Percentage': [48.09, 18.30, 33.60]
    })
    # Create a pie chart
    dist_fig = px.pie(
        class_dist,
        values='Count',
        names='Class',
        color='Class',
        color_discrete_map=CLASS_COLORS,
        title='Class Distribution in Dataset',
        hover_data=['Percentage'],
        labels={'Percentage': 'Percentage (%)'}
    )
    dist_fig.update_traces(
        textposition='inside',
        textinfo='percent+label',
        hovertemplate='<b>%{label}</b><br>Count: %{value}<br>Percentage: %{customdata[0]:.2f}%<extra></extra>'
    )
    dist_fig.update_layout(
        plot_bgcolor='rgba(0,0,0,0)',
        height=400,
        legend=dict(
            orientation='h',
            yanchor='bottom',
            y=-0.1,
            xanchor='center',
            x=0.5
        )
    )
    col1, col2 = st.columns([3, 2])
    with col1:
        st.plotly_chart(dist_fig, use_container_width=True)
    with col2:
        st.markdown("""
        ### Class Imbalance Impact
        The dataset has significant class imbalance:
        - **FALSE POSITIVE**: 48.09% (majority class)
        - **CONFIRMED**: 33.60%
        - **CANDIDATE**: 18.30% (minority class)

        This imbalance may explain why models typically perform better on FALSE POSITIVE and CONFIRMED classes,
        and struggle more with the CANDIDATE class.
        When evaluating models, consider using F1 score as it balances precision and recall,
        which is important in imbalanced datasets.
        """)
    # Per-class best model analysis
    st.subheader("Best Model per Class")
    # Group by class and find best model for each class based on selected metric
    best_by_class = class_data.loc[class_data.groupby('class')[selected_metric_key].idxmax()]
    # Create a comparison table
    best_table = best_by_class[['class', 'model', 'pca', selected_metric_key]]
    best_table.columns = ['Class', 'Best Model', 'PCA Option', selected_metric]
    # Convert to styled DataFrame for display
    styled_table = best_table.style.background_gradient(subset=[selected_metric], cmap='Blues')
    st.dataframe(styled_table, use_container_width=True)
    # Class performance heatmap
    st.subheader("Class Performance Heatmap")
    st.markdown("""
    This heatmap shows how each model performs across different classes,
    highlighting strengths and weaknesses for each class-model combination.
    """)
    # Create a pivot table for the heatmap
    if len(pca_filter) == 1:
        # Single PCA option selected
        pivot_data = class_data.pivot_table(
            values=selected_metric_key,
            index='class',
            columns='model'
        )
        # Create heatmap
        heatmap_fig = px.imshow(
            pivot_data,
            color_continuous_scale='Viridis',
            labels=dict(x='Model', y='Class', color=selected_metric),
            title=f'{selected_metric} Heatmap by Class and Model ({pca_filter[0]})',
            text_auto='.4f',
            aspect='auto',
            height=400
        )
    else:
        # Both PCA options selected - create a faceted heatmap
        facet_col = st.radio("Group heatmap by:", options=['Model', 'PCA Option'], horizontal=True)
        if facet_col == 'Model':
            # Create multiple heatmaps - one for each model
            heatmap_fig = make_subplots(
                rows=1,
                cols=len(selected_models),
                subplot_titles=[f"{model}" for model in selected_models]
            )
            for i, model in enumerate(selected_models):
                model_data = class_data[class_data['model'] == model].pivot_table(
                    values=selected_metric_key,
                    index='class',
                    columns='pca'
                )
                heatmap_fig.add_trace(
                    go.Heatmap(
                        z=model_data.values,
                        x=model_data.columns,
                        y=model_data.index,
                        colorscale='Viridis',
                        showscale=i == 0,  # Only show colorbar for first heatmap
                        text=model_data.values,
                        texttemplate='%{text:.4f}',
                        hovertemplate='<b>%{y}</b><br>PCA: %{x}<br>Value: %{z:.4f}<extra></extra>'
                    ),
                    row=1, col=i+1
                )
        else:  # Group by PCA
            heatmap_fig = make_subplots(
                rows=1,
                cols=len(pca_filter),
                subplot_titles=[f"{pca}" for pca in pca_filter]
            )
            for i, pca in enumerate(pca_filter):
                pca_data = class_data[class_data['pca'] == pca].pivot_table(
                    values=selected_metric_key,
                    index='class',
                    columns='model'
                )
                heatmap_fig.add_trace(
                    go.Heatmap(
                        z=pca_data.values,
                        x=pca_data.columns,
                        y=pca_data.index,
                        colorscale='Viridis',
                        showscale=i == 0,  # Only show colorbar for first heatmap
                        text=pca_data.values,
                        texttemplate='%{text:.4f}',
                        hovertemplate='<b>%{y}</b><br>Model: %{x}<br>Value: %{z:.4f}<extra></extra>'
                    ),
                    row=1, col=i+1
                )
            heatmap_fig.update_layout(
                height=400,
                title_text=f'{selected_metric} Heatmap Comparison'
            )
    heatmap_fig.update_layout(plot_bgcolor='rgba(0,0,0,0)')
    st.plotly_chart(heatmap_fig, use_container_width=True)

# Tab 5: Confusion Matrices
with tab5:
    st.header("Confusion Matrices Analysis")
    st.markdown("""
    Confusion matrices show how well each model classifies each class.
    - The diagonal elements represent **correct** classifications.
    - Off-diagonal elements represent **misclassifications**.
    Analyze how different models misclassify each class and understand error patterns.
    """)
    # Model selector for confusion matrix
    cm_model = st.selectbox(
        "Select Model",
        options=selected_models
    )
    # PCA option for confusion matrix
    cm_pca = st.radio(
        "Select PCA Option",
        options=pca_filter,
        horizontal=True
    )
    # Get the confusion matrix for the selected model and PCA option
    confusion_matrix = confusion_matrices[cm_model][cm_pca]
    # Create a DataFrame for the confusion matrix
    cm_df = pd.DataFrame(
        confusion_matrix,
        index=['TRUE: FALSE POSITIVE', 'TRUE: CANDIDATE', 'TRUE: CONFIRMED'],
        columns=['PRED: FALSE POSITIVE', 'PRED: CANDIDATE', 'PRED: CONFIRMED']
    )
    # Create a heatmap for the confusion matrix
    cm_fig = px.imshow(
        cm_df,
        color_continuous_scale='Blues',
        labels=dict(x='Predicted Class', y='True Class', color='Count'),
        title=f'Confusion Matrix - {cm_model} ({cm_pca})',
        text_auto=True,
        aspect='auto',
        height=600  # Increased height for better spacing
    )
    cm_fig.update_layout(
        plot_bgcolor='rgba(0,0,0,0)',
        xaxis=dict(
            side='top',
            tickangle=0  # Horizontal labels
        ),
        yaxis=dict(
            tickangle=0  # Horizontal labels
        ),
        margin=dict(l=150, r=100, t=150, b=100),  # Increased margins around the matrix
        annotations=[
            dict(
                x=0.5,
                y=-0.25,  # Moved annotation lower to avoid overlap
                xref='paper',
                yref='paper',
                text='Higher values on the diagonal indicate better performance',
                showarrow=False,
                font=dict(size=12)
            )
        ]
    )
    # Improve hover information
    cm_fig.update_traces(
        hovertemplate='<b>%{y}</b><br>%{x}<br>Count: %{z}<extra></extra>'
    )
    col1, col2 = st.columns([3, 2])
    with col1:
        st.plotly_chart(cm_fig, use_container_width=True)
    with col2:
        # Calculate and display derived metrics
        total = confusion_matrix.sum()
        diag = np.diag(confusion_matrix).sum()
        accuracy = diag / total
        class_names = ['FALSE POSITIVE', 'CANDIDATE', 'CONFIRMED']
        # Calculate per-class metrics
        metrics_data = []
        for i, class_name in enumerate(class_names):
            true_pos = confusion_matrix[i, i]
            false_pos = confusion_matrix[:, i].sum() - true_pos
            false_neg = confusion_matrix[i, :].sum() - true_pos
            precision = true_pos / (true_pos + false_pos) if (true_pos + false_pos) > 0 else 0
            recall = true_pos / (true_pos + false_neg) if (true_pos + false_neg) > 0 else 0
            f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
            metrics_data.append({
                'Class': class_name,
                'Precision': precision,
                'Recall': recall,
                'F1 Score': f1
            })
        metrics_df = pd.DataFrame(metrics_data)
        # Display overall accuracy
        st.metric(
            label="Overall Accuracy",
            value=f"{accuracy:.4f}",
            delta=f"{accuracy - 0.7:.4f}" if accuracy > 0.7 else None
        )
        # Display per-class metrics
        st.markdown("### Per-Class Metrics")
        # Convert to styled DataFrame for display
        styled_metrics = metrics_df.style.background_gradient(subset=['F1 Score'], cmap='Blues')
        st.dataframe(styled_metrics, use_container_width=True)
        # Add error analysis
        st.markdown("### Error Analysis")
        # Find the most common misclassification
        off_diag = confusion_matrix.copy()
        np.fill_diagonal(off_diag, 0)
        max_error_idx = np.unravel_index(off_diag.argmax(), off_diag.shape)
        error_from = class_names[max_error_idx[0]]
        error_to = class_names[max_error_idx[1]]
        error_count = off_diag[max_error_idx]
        st.markdown(f"""
        **Most common error**: {error_count} instances of **{error_from}**
        misclassified as **{error_to}**.
        This suggests the model may have difficulty distinguishing between
        these two classes. Consider:
        - Feature engineering to better separate these classes
        - Adding more training examples for these classes
        - Using class weights to balance the training
        """)
    # Add a normalized confusion matrix
    st.subheader("Normalized Confusion Matrix")
    st.markdown("""
    Normalized confusion matrices show the percentage of samples in each true class
    that are classified as each predicted class. This helps to understand the performance
    on each class regardless of its size in the dataset.
    """)
    # Normalize the confusion matrix (by row)
    row_sums = confusion_matrix.sum(axis=1)
    norm_cm = confusion_matrix / row_sums[:, np.newaxis]
    # Create a DataFrame for the normalized confusion matrix
    norm_cm_df = pd.DataFrame(
        norm_cm,
        index=['TRUE: FALSE POSITIVE', 'TRUE: CANDIDATE', 'TRUE: CONFIRMED'],
        columns=['PRED: FALSE POSITIVE', 'PRED: CANDIDATE', 'PRED: CONFIRMED']
    )
    # Create a heatmap for the normalized confusion matrix
    norm_cm_fig = px.imshow(
        norm_cm_df,
        color_continuous_scale='Blues',
        labels=dict(x='Predicted Class', y='True Class', color='Proportion'),
        title=f'Normalized Confusion Matrix - {cm_model} ({cm_pca})',
        text_auto='.2%',
        aspect='auto',
        height=600  # Increased height for better spacing
    )
    norm_cm_fig.update_layout(
        plot_bgcolor='rgba(0,0,0,0)',
        xaxis=dict(
            side='top',
            tickangle=0  # Horizontal labels
        ),
        yaxis=dict(
            tickangle=0  # Horizontal labels
        ),
        margin=dict(l=150, r=100, t=150, b=100),  # Increased margins around the matrix
        annotations=[
            dict(
                x=0.5,
                y=-0.25,  # Moved annotation lower to avoid overlap
                xref='paper',
                yref='paper',
                text='Values represent the proportion of each true class classified as each predicted class',
                showarrow=False,
                font=dict(size=12)
            )
        ]
    )
    # Improve hover information
    norm_cm_fig.update_traces(
        hovertemplate='<b>%{y}</b><br>%{x}<br>Proportion: %{z:.2%}<extra></extra>'
    )
    st.plotly_chart(norm_cm_fig, use_container_width=True)
    # Confusion matrix comparison
    if len(selected_models) > 1 and len(pca_filter) > 0:
        st.subheader("Confusion Matrix Comparison")
        st.markdown("""
        Compare confusion matrices across different models and PCA options to identify
        which model performs best for specific classification tasks.
        """)
        # Choose what to compare
        if len(selected_models) > 1 and len(pca_filter) > 1:
            compare_option = st.radio(
                "Compare across:",
                options=['Models (same PCA)', 'PCA Options (same model)'],
                horizontal=True
            )
            if compare_option == 'Models (same PCA)':
                # Compare different models with the same PCA option
                compare_pca = st.selectbox(
                    "Select PCA Option for Comparison",
                    options=pca_filter
                )
                models_to_compare = selected_models
                fixed_pca = compare_pca
                # Create subplots for each model
                cm_comparison_fig = make_subplots(
                    rows=1,
                    cols=len(models_to_compare),
                    subplot_titles=[f"{model} ({fixed_pca})" for model in models_to_compare],
                    horizontal_spacing=0.1  # Increased spacing between subplots
                )
                for i, model in enumerate(models_to_compare):
                    cm = confusion_matrices[model][fixed_pca]
                    cm_comparison_fig.add_trace(
                        go.Heatmap(
                            z=cm,
                            x=['FALSE POSITIVE', 'CANDIDATE', 'CONFIRMED'],
                            y=['FALSE POSITIVE', 'CANDIDATE', 'CONFIRMED'],
                            colorscale='Blues',
                            showscale=i == 0,  # Only show colorbar for first heatmap
                            text=cm,
                            texttemplate='%{text}',
                            hovertemplate='<b>True: %{y}</b><br>Predicted: %{x}<br>Count: %{z}<extra></extra>'
                        ),
                        row=1, col=i+1
                    )
            else:
                # Compare different PCA options for the same model
                compare_model = st.selectbox(
                    "Select Model for Comparison",
                    options=selected_models
                )
                pcas_to_compare = pca_filter
                fixed_model = compare_model
                # Create subplots for each PCA option
                cm_comparison_fig = make_subplots(
                    rows=1,
                    cols=len(pcas_to_compare),
                    subplot_titles=[f"{fixed_model} ({pca})" for pca in pcas_to_compare],
                    horizontal_spacing=0.1  # Increased spacing between subplots
                )
                for i, pca in enumerate(pcas_to_compare):
                    cm = confusion_matrices[fixed_model][pca]
                    cm_comparison_fig.add_trace(
                        go.Heatmap(
                            z=cm,
                            x=['FALSE POSITIVE', 'CANDIDATE', 'CONFIRMED'],
                            y=['FALSE POSITIVE', 'CANDIDATE', 'CONFIRMED'],
                            colorscale='Blues',
                            showscale=i == 0,  # Only show colorbar for first heatmap
                            text=cm,
                            texttemplate='%{text}',
                            hovertemplate='<b>True: %{y}</b><br>Predicted: %{x}<br>Count: %{z}<extra></extra>'
                        ),
                        row=1, col=i+1
                    )
        else:
            # Only one model or PCA option selected
            st.info("Please select multiple models and PCA options in the sidebar to enable comparison.")
            cm_comparison_fig = None
        if cm_comparison_fig:
            cm_comparison_fig.update_layout(
                height=500,
                title_text='Confusion Matrix Comparison',
                margin=dict(l=150, r=100, t=150, b=100)  # Increased margins
            )
            # Update all axes
            for i in range(len(cm_comparison_fig.data)):
                cm_comparison_fig.update_xaxes(
                    title_text="Predicted Class",
                    row=1,
                    col=i+1,
                    tickangle=0  # Horizontal labels
                )
                cm_comparison_fig.update_yaxes(
                    title_text="True Class",
                    row=1,
                    col=i+1,
                    tickangle=0  # Horizontal labels
                )
            st.plotly_chart(cm_comparison_fig, use_container_width=True)

# Add a summary section at the bottom
st.markdown("---")
st.header("Summary and Recommendations")
col1, col2 = st.columns(2)
with col1:
  st.subheader("Key Findings")
  # Find best model overall
  best_model_info = filtered_metrics.sort_values('accuracy', ascending=False).iloc[0]
  best_model = best_model_info['model']
  best_pca = best_model_info['pca']
  best_accuracy = best_model_info['accuracy']
  # Find fastest model
  fastest_model_info = filtered_metrics.sort_values('training_time').iloc[0]
  fastest_model = fastest_model_info['model']
  fastest_pca = fastest_model_info['pca']
  fastest_time = fastest_model_info['training_time']
  # Find most memory-efficient model
  mem_eff_model_info = filtered_metrics.sort_values('memory_usage').iloc[0]
  mem_eff_model = mem_eff_model_info['model']
  mem_eff_pca = mem_eff_model_info['pca']
  mem_eff_usage = mem_eff_model_info['memory_usage']
  st.markdown(f"""
  Based on the analysis, the key findings are:
  1. **Best Overall Model**: {best_model} ({best_pca}) with accuracy of {best_accuracy:.4f}
  2. **Fastest Model**: {fastest_model} ({fastest_pca}) with training time of {fastest_time:.2f} seconds
  3. **Most Memory-Efficient**: {mem_eff_model} ({mem_eff_pca}) using {mem_eff_usage:.2f} MB
  4. **Class Performance**:
      - All models perform best on the FALSE POSITIVE class
      - The CANDIDATE class is the most challenging to classify correctly
  5. **PCA Impact**:
      - PCA generally reduces training time and memory usage (except for SVM where it increased training time by 4.03%)
      - PCA slightly reduces model performance in most cases
  6. **Neural Network Performance**:
      - The Neural Network model without PCA shows clear signs of overfitting, with training loss decreasing while validation loss plateaus
      - With PCA, the Neural Network shows better generalization with closer training and validation losses
  """)

with col2:
    st.subheader("Recommendations")
    # Find the model with the best balance of performance and efficiency
    # Simple scoring approach: normalize metrics and combine
    performance_metrics = filtered_metrics.copy()
    # Normalize metrics (higher is better for accuracy, lower is better for time/memory)
    max_acc = performance_metrics['accuracy'].max()
    min_acc = performance_metrics['accuracy'].min()
    max_time = performance_metrics['training_time'].max()
    min_time = performance_metrics['training_time'].min()
    max_mem = performance_metrics['memory_usage'].max()
    min_mem = performance_metrics['memory_usage'].min()
    if max_acc > min_acc:
        performance_metrics['norm_acc'] = (performance_metrics['accuracy'] - min_acc) / (max_acc - min_acc)
    else:
        performance_metrics['norm_acc'] = 1.0
    if max_time > min_time:
        performance_metrics['norm_time'] = (max_time - performance_metrics['training_time']) / (max_time - min_time)
    else:
        performance_metrics['norm_time'] = 1.0
    if max_mem > min_mem:
        performance_metrics['norm_mem'] = (max_mem - performance_metrics['memory_usage']) / (max_mem - min_mem)
    else:
        performance_metrics['norm_mem'] = 1.0
    # Combine into a single score (equal weighting)
    performance_metrics['balance_score'] = (
        performance_metrics['norm_acc'] * 0.5 +
        performance_metrics['norm_time'] * 0.25 +
        performance_metrics['norm_mem'] * 0.25
    )
    # Find the best balanced model
    balanced_model_info = performance_metrics.sort_values('balance_score', ascending=False).iloc[0]
    balanced_model = balanced_model_info['model']
    balanced_pca = balanced_model_info['pca']
    balanced_acc = balanced_model_info['accuracy']
    balanced_time = balanced_model_info['training_time']
    balanced_mem = balanced_model_info['memory_usage']
    st.markdown(f"""
    Based on the analysis, the recommendations are:
    1. **For Maximum Accuracy**: Use {best_model} ({best_pca})
       - Best for critical applications where performance is paramount
       - Consider the additional computational cost
    2. **For Fast Development**: Use {fastest_model} ({fastest_pca})
       - Best for rapid prototyping and iteration
       - Good choice for real-time applications
    3. **For Resource-Constrained Environments**: Use {mem_eff_model} ({mem_eff_pca})
       - Best for deployment on limited hardware
       - Good for batch processing multiple datasets
    4. **Best Balance (Recommended)**: Use {balanced_model} ({balanced_pca})
       - Accuracy: {balanced_acc:.4f}
       - Training Time: {balanced_time:.2f} seconds
       - Memory Usage: {balanced_mem:.2f} MB
       - Provides the best trade-off between performance and resource usage
    5. **For the Neural Network Model**:
       - Use PCA to reduce overfitting and improve generalization
       - Consider early stopping around epoch 10 to prevent overfitting
       - Implement regularization techniques for better performance on the CANDIDATE class
    """)
""")
'''

print("Dashboard code has been saved to 'kepler_dashboard.py'")

# Start Streamlit with the fixed file
import subprocess
import time
from pyngrok import ngrok

# Start Streamlit in the background
print("Starting Streamlit with fixed file...")
process = subprocess.Popen(['streamlit', 'run', 'fixed_dashboard.py', '--server.port', '8501'],
                          stdout=subprocess.PIPE,
                          stderr=subprocess.PIPE)

# Wait for Streamlit to start
time.sleep(15)  # Give it time to fully start

# Set authtoken here
ngrok.set_auth_token("2vks47C4VQoVVGC66ci0cSdrFZj_3vTxD7BmVu9jK7bQkayWa")

# Set up a tunnel to the Streamlit app
public_url = ngrok.connect(8501)
print(f"Streamlit app URL: {public_url}")
print("Click the URL above to view dashboard")

Dashboard code has been saved to 'kepler_dashboard.py'
Starting Streamlit with fixed file...
Streamlit app URL: NgrokTunnel: "https://4cba-34-125-71-103.ngrok-free.app" -> "http://localhost:8501"
Click the URL above to view your interactive Kepler exoplanet dashboard!
