In [3]:
from diagrams import Diagram, Cluster
from diagrams.programming.language import Python
from diagrams.custom import Custom
from diagrams.onprem.client import Users
from diagrams.onprem.storage import Ceph  # Storage for dataset and uploaded audio

# Paths to Custom Logos
streamlit_icon = "C:/Users/USER/Downloads/streamlit-logo.png"
tensorflow_icon = "C:/Users/USER/Downloads/Tensorflow_logo.png"
ml_flow_icon = "C:/Users/USER/Downloads/MLflow.png"
feature = "C:/Users/USER/Downloads/Feature_engineering.png"
trained_model = "C:/Users/USER/Downloads/ai-model.png"
speechtext = "C:/Users/USER/Downloads/voice-recognition.png"
prediction = "C:/Users/USER/Downloads/data-classification.png"
preprocess = "C:/Users/USER/Downloads/preprocessing.png"
microphone = "C:/Users/USER/Downloads/microphone.png"

# Create the system architecture diagram
with Diagram("Emotion Recognition System Architecture 3", show=False, direction="LR"):
    
    # Section 1: User & Streamlit UI (Top Left)
    user = Users("User")
    frontend = Custom("Streamlit UI", streamlit_icon)
    user >> frontend

    # Section 2: Audio Input & Preprocessing (Middle Left)
    with Cluster("Audio Input & Preprocessing"):
        live_audio = Custom("Real-Time Audio", microphone)
        file_upload = Ceph("Uploaded Audio (MP3/WAV)")
        preprocessing = Custom("Preprocessing", preprocess)
        mfcc_extraction = Python("MFCC Extraction")
        vggish_extraction = Python("VGGish Embeddings")

        frontend >> [live_audio, file_upload]
        [live_audio, file_upload] >> preprocessing >> [mfcc_extraction, vggish_extraction]

    # Section 3: Model Inference (Middle Right, parallel to Preprocessing)
    with Cluster("Model Inference"):
        transformer_model = Custom("Transformer Model", trained_model)
        prediction_result = Custom("Emotion Prediction", prediction)
        transcription = Custom("Speech-to-Text", speechtext)

        [mfcc_extraction, vggish_extraction] >> transformer_model
        transformer_model >> prediction_result
        preprocessing >> transcription  # ðŸ”¥ Direct arrow from Preprocessing to Speech-to-Text

    frontend << [prediction_result, transcription]

    # Section 4: Training Pipeline (Bottom Right)
    with Cluster("Training Pipeline"):
        dataset = Ceph("RAVDESS + Augmented Data")
        feature_engineering = Custom("Feature Engineering", feature)
        training = Custom("Model Training", ml_flow_icon)
        saved_model = Custom("Trained Model", trained_model)

        dataset >> feature_engineering >> training >> saved_model
        saved_model >> transformer_model  # Feeding trained model into inference

    frontend << saved_model
