In [4]:
# Save script to a Python file
script_content = """
# Importing required libraries
import streamlit as st
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import SMOTE, RandomOverSampler
import plotly.graph_objects as go

# Preset dataset path
PRESET_DATASET_PATH = "C:/Users/HP/Downloads/Combined_XAIC_Dimensions.csv"

# Function to add missing columns based on existing dataset attributes dynamically
def map_dimensions(dataset):
    if 'Ability to Abstract' not in dataset.columns:
        dataset['Ability to Abstract'] = dataset['Data Type'].apply(
            lambda x: 'High' if x in ['Image', 'Audio'] else 'Medium' if x in ['Text', 'Alphanumeric'] else 'Low'
        )
    if 'Data Type Understanding' not in dataset.columns:
        dataset['Data Type Understanding'] = dataset['Data Type'].apply(
            lambda x: 'High' if x in ['Image', 'Video'] else 'Medium' if x == 'Audio' else 'Low'
        )
    if 'Algorithm Function' not in dataset.columns:
        dataset['Algorithm Function'] = dataset['Algorithms'].apply(
            lambda x: 'Predictive' if 'Regression' in x or 'Neural Network' in x else
                      'Descriptive' if 'Clustering' in x else 'Prescriptive'
        )
    if 'Human Interaction Type' not in dataset.columns:
        dataset['Human Interaction Type'] = dataset['Algorithm Function'].apply(
            lambda x: 'Direct' if x == 'Prescriptive' else
                      'Indirect' if x == 'Predictive' else 'Automatic'
        )
    return dataset

# Function to recommend the most suitable Explainable AI (XAI) tool
def recommend_xai_tool(ability_to_abstract, data_type_understanding, algorithm_function, human_interaction, best_model):
    st.write("### 🔬 Recommended XAI Tool for Explainable AI")
    if data_type_understanding == 'Medium' or algorithm_function in ['Descriptive', 'Predictive']:
        return "LIME", "LIME is recommended as it works effectively with descriptive or predictive models, providing localized explanations for the data."

    if ability_to_abstract == 'High' and data_type_understanding == 'High' and best_model == "Random Forest":
        return "DeepLIFT", "DeepLIFT is recommended for models with complex data like images or audio, offering insights into neuron activations in neural networks."

    if ability_to_abstract == 'High' and data_type_understanding == 'High':
        return "SHAP (Shapley Values)", "SHAP is recommended as it provides detailed feature importance and model explanation, ideal for high abstraction and complex models."

    if ability_to_abstract == 'High' and algorithm_function in ['Predictive', 'Descriptive'] and best_model == "Random Forest":
        return "Activation Atlases", "Activation Atlases are ideal for vision-based tasks, providing a visualization of feature activations."

    if human_interaction == 'Direct' and algorithm_function == 'Prescriptive':
        return "RuleX", "RuleX is recommended for prescriptive models and direct human interactions, providing rule-based explanations for decision-making."

    return "SHAP (Shapley Values)", "SHAP is versatile and can be used for various data types and algorithms, making it a reliable choice."

# Function to display a preview of the selected dimensions
def preview_selected_dimensions(ability_to_abstract, data_type_understanding, algorithm_function, human_interaction):
    st.write("### 🛠️ Selected Dimensions Summary")
    summary_data = pd.DataFrame({
        'Dimension': ['Ability to Abstract', 'Data Type Understanding', 'Algorithm Function', 'Human Interaction Type'],
        'Value': [ability_to_abstract, data_type_understanding, algorithm_function, human_interaction]
    })
    st.dataframe(summary_data)

# Visualization to compare XAI tools based on their features
def visualize_xai_comparison(best_tool):
    st.write("### 🛠️ Why this XAI tool is recommended?")
    
    # Criteria and scores for various XAI tools (example scores)
    criteria = ["Interpretability", "Scalability", "Usability", "Compatibility"]
    tools_scores = {
        "LIME": [4, 3, 5, 4],
        "SHAP (Shapley Values)": [5, 4, 4, 5],
        "DeepLIFT": [4, 3, 3, 5],
        "Activation Atlases": [3, 2, 4, 5],
        "RuleX": [5, 4, 3, 4],
    }
    
    # Prepare radar chart data
    fig = go.Figure()
    for tool, scores in tools_scores.items():
        fig.add_trace(go.Scatterpolar(
            r=scores,
            theta=criteria,
            fill='toself' if tool == best_tool else 'none',
            name=tool,
            line=dict(dash='solid' if tool == best_tool else 'dash'),
            opacity=1.0 if tool == best_tool else 0.5
        ))

    # Update layout for better clarity
    fig.update_layout(
        polar=dict(
            radialaxis=dict(
                visible=True,
                range=[0, 5],
                tickvals=[0, 1, 2, 3, 4, 5],
                ticktext=["Poor", "Below Avg", "Average", "Good", "Excellent"]
            )
        ),
        title=f"Comparison of {best_tool} with Other XAI Tools",
        legend_title="XAI Tools",
        height=600,
        width=800
    )
    st.plotly_chart(fig)
# Function to create visualization for the 4 dimensions mapped to knowledge and explanation
def visualize_dimensions_mapped_to_knowledge_and_explanation(ability_to_abstract, data_type_understanding, algorithm_function, human_interaction):
    st.write("### 📊 Visualization of Dimensions: Knowledge and Explanation Levels")

    # Constructing a DataFrame for visualization
    data = pd.DataFrame({
        'Dimension': ['Ability to Abstract', 'Data Type Understanding', 'Algorithm Function', 'Human Interaction Type'],
        'Knowledge': [
            5 if ability_to_abstract == 'High' else 3 if ability_to_abstract == 'Medium' else 1,
            5 if data_type_understanding == 'High' else 3 if data_type_understanding == 'Medium' else 1,
            5 if algorithm_function in ['Predictive', 'Prescriptive'] else 3 if algorithm_function == 'Descriptive' else 1,
            5 if human_interaction == 'Direct' else 3 if human_interaction == 'Indirect' else 1
        ],
        'Explanation': [
            5 if ability_to_abstract == 'High' else 3 if ability_to_abstract == 'Medium' else 1,
            5 if data_type_understanding == 'High' else 3 if data_type_understanding == 'Medium' else 1,
            5 if algorithm_function in ['Predictive', 'Prescriptive'] else 3 if algorithm_function == 'Descriptive' else 1,
            5 if human_interaction == 'Direct' else 3 if human_interaction == 'Indirect' else 1
        ]
    })

    # Create grouped bar chart with Plotly
    fig = go.Figure()
    
    # Add bars for Knowledge
    fig.add_trace(go.Bar(
        x=data['Dimension'],
        y=data['Knowledge'],
        name='Knowledge Level',
        marker_color='blue',
        hovertemplate='<b>Dimension:</b> %{x}<br>' +
                      '<b>Knowledge Level:</b> %{y}<br>' +
                      '<extra></extra>'
    ))

    # Add bars for Explanation
    fig.add_trace(go.Bar(
        x=data['Dimension'],
        y=data['Explanation'],
        name='Explanation Level',
        marker_color='orange',
        hovertemplate='<b>Dimension:</b> %{x}<br>' +
                      '<b>Explanation Level:</b> %{y}<br>' +
                      '<extra></extra>'
    ))

    # Adjust chart layout
    fig.update_layout(
        title="Knowledge and Explanation Levels for Selected Dimensions",
        xaxis=dict(title="Dimensions"),
        yaxis=dict(
            title="Scaled Level (0 to 5)",
            tickvals=[0, 1, 2, 3, 4, 5],
            ticktext=["Low", "Below Avg", "Medium", "Good", "High"]
        ),
        barmode='group',
        legend=dict(title="Levels"),
        height=500,
        width=800
    )
    st.plotly_chart(fig)

    # Function to visualize why Random Forest is a strong candidate for Explainable AI
def visualize_rf_xai_strength():
    st.write("### 🌟 Random Forest a strong choice for Explainable AI")
    
    # Criteria for model comparison
    criteria = ["Feature Interpretability", "Model Complexity", "Performance", "Global vs. Local Explanations"]
    
    # Scores for Random Forest and other common models
    models_scores = {
        "Random Forest": [5, 4, 5, 5],
        "Neural Networks": [2, 1, 5, 3],
        "SVM": [3, 3, 4, 3],
        "Linear Regression": [5, 5, 3, 2],
        "KNN": [3, 4, 4, 2]
    }
    
    # Create radar chart with Plotly
    fig = go.Figure()
    
    # Add data for each model
    for model, scores in models_scores.items():
        fig.add_trace(go.Scatterpolar(
            r=scores,
            theta=criteria,
            fill='toself' if model == "Random Forest" else 'none',
            name=model,
            line=dict(dash='solid' if model == "Random Forest" else 'dash'),
            opacity=1.0 if model == "Random Forest" else 0.5
        ))
    
    # Customize chart layout
    fig.update_layout(
        polar=dict(
            radialaxis=dict(
                visible=True,
                range=[0, 5],
                tickvals=[0, 1, 2, 3, 4, 5],
                ticktext=["Poor", "Below Avg", "Avg", "Good", "Excellent"]
            )
        ),
        title="Comparison of Random Forest with Other Models for Explainable AI",
        legend_title="Models",
        height=600,
        width=800
    )
    
    st.plotly_chart(fig)


# Streamlit app begins here
st.title("🔍 Explainable AI")
st.write("📂 Using the preset dataset to begin.")

# Load the preset dataset
try:
    dataset = pd.read_csv(PRESET_DATASET_PATH)
    st.write("Preset dataset loaded successfully! 🎉")
    st.write("📋 Preview of the preset dataset:")
    st.dataframe(dataset.head())
except FileNotFoundError:
    st.error("Preset dataset not found. Please ensure the file exists at the specified path.")
    dataset = None

if dataset is not None:
    dataset = map_dimensions(dataset)

    # Selection inputs for filtering the dataset
    occupations = dataset['Occupation'].unique()
    selected_occupation = st.selectbox("👔 Choose an occupation", occupations)
    industries = dataset['Industry'].unique()
    selected_industry = st.selectbox("🏢 Choose an industry", industries)
    data_types = dataset['Data Type'].unique()
    selected_data_type = st.selectbox("🗂️ Choose a data type", data_types)

    if selected_occupation and selected_industry and selected_data_type:
        filtered_data = dataset[(dataset['Occupation'] == selected_occupation) &
                                (dataset['Industry'] == selected_industry) &
                                (dataset['Data Type'] == selected_data_type)]

        if not filtered_data.empty:
            st.write("Filtered dataset preview:")
            st.dataframe(filtered_data.head())

            # Extract modal values for visualization and modeling
            ability_to_abstract = filtered_data['Ability to Abstract'].mode()[0]
            data_type_understanding = filtered_data['Data Type Understanding'].mode()[0]
            algorithm_function = filtered_data['Algorithm Function'].mode()[0]
            human_interaction = filtered_data['Human Interaction Type'].mode()[0]

            # Preview selected dimensions
            st.write("### Selected Dimensions Summary")
            preview_data = pd.DataFrame({
                'Dimension': ['Ability to Abstract', 'Data Type Understanding', 'Algorithm Function', 'Human Interaction Type'],
                'Value': [ability_to_abstract, data_type_understanding, algorithm_function, human_interaction]
            })
            st.dataframe(preview_data)

            # Visualize dimensions mapped to knowledge and explanation
            visualize_dimensions_mapped_to_knowledge_and_explanation(
                ability_to_abstract, data_type_understanding, algorithm_function, human_interaction
            )

            # Target column selection for ML modeling
            target_column = st.selectbox("🎯 Select the Target Column for Prediction", dataset.columns)
            if target_column:
                features = dataset.drop(columns=[target_column])
                target = dataset[target_column]
                le = LabelEncoder()
                features = features.apply(lambda col: le.fit_transform(col) if col.dtype == 'object' else col)
                target = le.fit_transform(target)

                # Handle imbalanced datasets using SMOTE or RandomOverSampler
                target_series = pd.Series(target)
                minority_class_size = target_series.value_counts().min()

                if minority_class_size < 5:
                    st.warning("Minority class too small for SMOTE. Using RandomOverSampler instead.")
                    oversampler = RandomOverSampler(random_state=42)
                    X_resampled, y_resampled = oversampler.fit_resample(features, target)
                else:
                    smote = SMOTE(random_state=42, k_neighbors=min(5, minority_class_size - 1))
                    X_resampled, y_resampled = smote.fit_resample(features, target)

                # Train/test split and Random Forest model training
                X_train, X_temp, y_train, y_temp = train_test_split(X_resampled, y_resampled, test_size=0.3, random_state=42)
                X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)
                rf_model = RandomForestClassifier(
                    random_state=42, n_estimators=200, max_depth=10,
                    min_samples_split=10, min_samples_leaf=5
                )
                rf_model.fit(X_train, y_train)
                rf_val_pred = rf_model.predict(X_val)
                rf_accuracy = accuracy_score(y_val, rf_val_pred)

                st.write(f"Random Forest Validation Accuracy: **{rf_accuracy:.2f}**")
                # After the Random Forest validation accuracy output
                

# Explain why Random Forest is the best for XAI
                visualize_rf_xai_strength()


                # Recommend the best XAI tool
                best_xai_tool, xai_tool_explanation = recommend_xai_tool(
                    ability_to_abstract,
                    data_type_understanding,
                    algorithm_function,
                    human_interaction,
                    "Random Forest"
                )
                st.write(f"**{best_xai_tool}**")
                st.write(f"Why? {xai_tool_explanation}")

                # Generate the radar chart for XAI comparison
                visualize_xai_comparison(best_xai_tool)
        else:
            st.warning("No matching data found for the selected combination.")





"""

# Save the script to a file
file_name = "explainable_AI_app.py"
with open(file_name, "w", encoding="utf-8") as f:
    f.write(script_content)

print(f"Script saved as {file_name}")
import os

# Create .streamlit directory if it doesn't exist
os.makedirs(".streamlit", exist_ok=True)

# Define the theme configuration
theme_config = """

[theme]
backgroundColor="#eff0f3"
secondaryBackgroundColor="#f38f20"
textColor="#0c0c0c"

"""

# Save the theme configuration to config.toml
with open(".streamlit/config.toml", "w", encoding="utf-8") as f:
    f.write(theme_config)

print("Theme configuration saved to .streamlit/config.toml")

Script saved as explainable_AI_app.py
Theme configuration saved to .streamlit/config.toml


In [None]:
streamlit run explainable_AI_app.py