In [1]:
import streamlit as st

st.title("Hello, Streamlit!")
st.write("This is a simple Streamlit app running from Jupyter.")
import streamlit as st
import requests
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration, SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
import os
import torch
import soundfile as sf
from datasets import load_dataset
import matplotlib.pyplot as plt
import numpy as np
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

# Model Description
model_description = """
This application utilizes image captioning and text-to-speech models to generate a caption for an uploaded image 
and convert the caption into speech.

The image captioning model is based on [Salesforce's BLIP architecture](https://huggingface.co/Salesforce/blip-image-captioning-base), which can generate descriptive captions for images.

The text-to-speech model, based on [Microsoft's SpeechT5](https://huggingface.co/microsoft/speecht5_tts), converts the generated caption into speech with the help of a 
HiFiGAN vocoder.
"""


@st.cache_resource
def initialize_image_captioning():
    processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
    model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
    return processor, model

@st.cache_resource
def initialize_speech_synthesis():
    processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
    model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
    vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
    embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
    speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
    return processor, model, vocoder, speaker_embeddings

def generate_caption(processor, model, image):
    inputs = processor(image, return_tensors="pt")
    out = model.generate(**inputs)
    output_caption = processor.decode(out[0], skip_special_tokens=True)
    return output_caption

def generate_speech(processor, model, vocoder, speaker_embeddings, caption):
    inputs = processor(text=caption, return_tensors="pt")
    speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
    sf.write("speech.wav", speech.numpy(), samplerate=16000)

def play_sound():
    audio_file = open("speech.wav", 'rb')
    audio_bytes = audio_file.read()
    st.audio(audio_bytes, format='audio/wav')

def visualize_speech():
    data, samplerate = sf.read("speech.wav")
    duration = len(data) / samplerate

    # Create time axis
    time = np.linspace(0., duration, len(data))

     # Plot the speech waveform
    fig, ax = plt.subplots(figsize=(10, 4))
    ax.plot(time, data)
    ax.set(xlabel="Time (s)", ylabel="Amplitude", title="Speech Waveform")

    # Display the plot using st.pyplot()
    st.pyplot(fig)

def main():
    st.set_page_config(
    page_title="Image-to-Speech",
    page_icon="📸",
    initial_sidebar_state="collapsed",
    menu_items={
        'Get Help': 'https://www.extremelycoolapp.com/help',
        'Report a bug': "https://www.extremelycoolapp.com/bug",
        'About': "# This is a header. This is an *extremely* cool app!"
    }
)

    st.sidebar.markdown("---")
    st.sidebar.markdown("Developed by Alim Tleuliyev")
    st.sidebar.markdown("Contact: [alim.tleuliyev@nu.edu.kz](mailto:alim.tleuliyev@nu.edu.kz)")
    st.sidebar.markdown("GitHub: [Repo](https://github.com/AlimTleuliyev/image-to-audio)")

    st.markdown(
        """
        <style>
        .container {
            max-width: 800px;
        }
        .title {
            text-align: center;
            font-size: 32px;
            font-weight: bold;
            margin-bottom: 20px;
        }
        .description {
            margin-bottom: 30px;
        }
        .instructions {
            margin-bottom: 20px;
            padding: 10px;
            background-color: #f5f5f5;
            border-radius: 5px;
        }
        </style>
        """,
        unsafe_allow_html=True
    )

    # Title
    st.markdown("<div class='title'>Image Captioning and Text-to-Speech</div>", unsafe_allow_html=True)
    col1, col2, col3 = st.columns([1,2,1])

    with col1:
        st.write("")

    with col2:
        st.image("images/logo.png", use_column_width=True, caption="Generated by DALL-E")

    with col3:
        st.write("")

    # Model Description
    st.markdown("<div class='description'>" + model_description + "</div>", unsafe_allow_html=True)

    # Instructions
    with st.expander("Instructions"):
        st.markdown("1. Upload an image or provide the URL of an image.")
        st.markdown("2. Click the 'Generate Caption and Speech' button.")
        st.markdown("3. The generated caption will be displayed, and the speech will start playing.")


    # Choose image source
    image_source = st.radio("Select Image Source:", ("Upload Image", "Open from URL"))

    image = None

    if image_source == "Upload Image":
        # File uploader for image
        uploaded_file = st.file_uploader("Upload an image", type=["png", "jpg", "jpeg"])
        if uploaded_file is not None:
            image = Image.open(uploaded_file)
        else:
            image = None

    else:
        # Input box for image URL
        url = st.text_input("Enter the image URL:")
        if url:
            try:
                response = requests.get(url, stream=True)
                if response.status_code == 200:
                    image = Image.open(response.raw)
                else:
                    st.error("Error loading image from URL.")
                    image = None
            except requests.exceptions.RequestException as e:
                st.error(f"Error loading image from URL: {e}")
                image = None

    # Generate caption and play sound button
    if image is not None:
        # Display the uploaded image
        st.image(image, caption='Uploaded Image', use_column_width=True)

        # Initialize image captioning models
        caption_processor, caption_model = initialize_image_captioning()

        # Initialize speech synthesis models
        speech_processor, speech_model, speech_vocoder, speaker_embeddings = initialize_speech_synthesis()

        # Generate caption
        with st.spinner("Generating Caption..."):
            output_caption = generate_caption(caption_processor, caption_model, image)

        # Display the caption
        st.subheader("Caption:")
        st.write(output_caption)
        
        # Generate speech from the caption
        with st.spinner("Generating Speech..."):
            generate_speech(speech_processor, speech_model, speech_vocoder, speaker_embeddings, output_caption)

        
        st.subheader("Audio:")
        # Play the generated sound
        play_sound()

        # Visualize the speech waveform
        with st.expander("See visualization"):
            visualize_speech()


if __name__ == "__main__":
    main()

2025-04-22 12:17:23.183 
  command:

    streamlit run C:\Users\khush\AppData\Roaming\Python\Python312\site-packages\ipykernel_launcher.py [ARGUMENTS]


RuntimeError: Runtime hasn't been created!

In [3]:
import streamlit as st
import os

def main():
    st.title("My Streamlit App")

    image_path = "images/logo.png"

    if os.path.exists(image_path):
        st.image(image_path, use_column_width=True, caption="Generated by DALL·E")
    else:
        st.warning(f"Image not found: {image_path}")

if __name__ == "__main__":
    main()
'''

# Save the code to a Python file
with open("my_app.py", "w") as f:
    f.write(code)

print("✅ File 'my_app.py' created.")
print("👉 Now open your terminal and run:\n   streamlit run my_app.py")

SyntaxError: incomplete input (2674410274.py, line 16)

In [5]:
# This will create and save a Streamlit app file
code = '''import streamlit as st
import os

def main():
    st.title("My Streamlit App")

    image_path = "images/logo.png"

    if os.path.exists(image_path):
        st.image(image_path, use_column_width=True, caption="Generated by DALL·E")
    else:
        st.warning(f"Image not found: {image_path}")

if __name__ == "__main__":
    main()
'''

# Save the code to a Python file
with open("my_app.py", "w") as f:
    f.write(code)

print("✅ File 'my_app.py' created.")
print("👉 Now open your terminal and run:\n   streamlit run my_app.py")


✅ File 'my_app.py' created.
👉 Now open your terminal and run:
   streamlit run my_app.py


In [7]:
import os
os.getcwd()


'C:\\Users\\khush\\Downloads\\image-to-audio-master (1)\\image-to-audio-master\\Untitled Folder'

In [9]:
!pip install streamlit




DEPRECATION: Loading egg at c:\users\khush\anaconda3\lib\site-packages\huggingface_hub-0.29.2-py3.8.egg is deprecated. pip 24.3 will enforce this behaviour change. A possible replacement is to use pip for package installation.. Discussion can be found at https://github.com/pypa/pip/issues/12330

[notice] A new release of pip is available: 23.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [11]:
!python -m pip install --upgrade pip


Collecting pip
  Downloading pip-25.0.1-py3-none-any.whl.metadata (3.7 kB)
Downloading pip-25.0.1-py3-none-any.whl (1.8 MB)
   ---------------------------------------- 0.0/1.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.8 MB ? eta -:--:--
    --------------------------------------- 0.0/

DEPRECATION: Loading egg at c:\users\khush\anaconda3\lib\site-packages\huggingface_hub-0.29.2-py3.8.egg is deprecated. pip 24.3 will enforce this behaviour change. A possible replacement is to use pip for package installation.. Discussion can be found at https://github.com/pypa/pip/issues/12330
ERROR: Exception:
Traceback (most recent call last):
  File "C:\Users\khush\AppData\Roaming\Python\Python312\site-packages\pip\_internal\cli\base_command.py", line 180, in exc_logging_wrapper
    status = run_func(*args)
             ^^^^^^^^^^^^^^^
  File "C:\Users\khush\AppData\Roaming\Python\Python312\site-packages\pip\_internal\cli\req_command.py", line 245, in wrapper
    return func(self, options, args)
           ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\khush\AppData\Roaming\Python\Python312\site-packages\pip\_internal\commands\install.py", line 452, in run
    installed = install_given_reqs(
                ^^^^^^^^^^^^^^^^^^^
  File "C:\Users\khush\AppData\Roaming\Python\Python312\sit

In [13]:
!python -m pip install --upgrade pip


Collecting pip
  Using cached pip-25.0.1-py3-none-any.whl.metadata (3.7 kB)
Using cached pip-25.0.1-py3-none-any.whl (1.8 MB)
Installing collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 23.3.1
    Uninstalling pip-23.3.1:
      Successfully uninstalled pip-23.3.1
  Rolling back uninstall of pip
  Moving to c:\users\khush\appdata\roaming\python\python312\scripts\pip.exe
   from C:\Users\khush\AppData\Local\Temp\pip-uninstall-go2ck4iw\pip.exe
  Moving to c:\users\khush\appdata\roaming\python\python312\scripts\pip3.12.exe
   from C:\Users\khush\AppData\Local\Temp\pip-uninstall-go2ck4iw\pip3.12.exe
  Moving to c:\users\khush\appdata\roaming\python\python312\scripts\pip3.exe
   from C:\Users\khush\AppData\Local\Temp\pip-uninstall-go2ck4iw\pip3.exe
  Moving to c:\users\khush\appdata\roaming\python\python312\site-packages\pip-23.3.1.dist-info\
   from C:\Users\khush\AppData\Roaming\Python\Python312\site-packages\~ip-23.3.1.dist-info
  Moving to c:\users

DEPRECATION: Loading egg at c:\users\khush\anaconda3\lib\site-packages\huggingface_hub-0.29.2-py3.8.egg is deprecated. pip 24.3 will enforce this behaviour change. A possible replacement is to use pip for package installation.. Discussion can be found at https://github.com/pypa/pip/issues/12330
ERROR: Exception:
Traceback (most recent call last):
  File "C:\Users\khush\AppData\Roaming\Python\Python312\site-packages\pip\_internal\cli\base_command.py", line 180, in exc_logging_wrapper
    status = run_func(*args)
             ^^^^^^^^^^^^^^^
  File "C:\Users\khush\AppData\Roaming\Python\Python312\site-packages\pip\_internal\cli\req_command.py", line 245, in wrapper
    return func(self, options, args)
           ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\khush\AppData\Roaming\Python\Python312\site-packages\pip\_internal\commands\install.py", line 452, in run
    installed = install_given_reqs(
                ^^^^^^^^^^^^^^^^^^^
  File "C:\Users\khush\AppData\Roaming\Python\Python312\sit

In [15]:
!pip install --upgrade huggingface_hub


Collecting huggingface_hub
  Downloading huggingface_hub-0.30.2-py3-none-any.whl.metadata (13 kB)
Downloading huggingface_hub-0.30.2-py3-none-any.whl (481 kB)
   ---------------------------------------- 0.0/481.4 kB ? eta -:--:--
    --------------------------------------- 10.2/481.4 kB ? eta -:--:--
   --- ----------------------------------- 41.0/481.4 kB 487.6 kB/s eta 0:00:01
   ---------- ----------------------------- 122.9/481.4 kB 1.2 MB/s eta 0:00:01
   ---------------------------------- ----- 419.8/481.4 kB 2.9 MB/s eta 0:00:01
   ---------------------------------------- 481.4/481.4 kB 2.5 MB/s eta 0:00:00
Installing collected packages: huggingface_hub
  Attempting uninstall: huggingface_hub
    Found existing installation: huggingface_hub 0.29.2
    Uninstalling huggingface_hub-0.29.2:


DEPRECATION: Loading egg at c:\users\khush\anaconda3\lib\site-packages\huggingface_hub-0.29.2-py3.8.egg is deprecated. pip 24.3 will enforce this behaviour change. A possible replacement is to use pip for package installation.. Discussion can be found at https://github.com/pypa/pip/issues/12330
ERROR: Could not install packages due to an OSError: [WinError 32] The process cannot access the file because it is being used by another process: 'c:\\users\\khush\\anaconda3\\lib\\site-packages\\huggingface_hub-0.29.2-py3.8.egg'
Consider using the `--user` option or check the permissions.


[notice] A new release of pip is available: 23.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [17]:
# Uninstall pip
!python -m pip uninstall -y pip

# Reinstall pip using ensurepip
!python -m ensurepip --upgrade


Found existing installation: pip 23.3.1
Uninstalling pip-23.3.1:
  Successfully uninstalled pip-23.3.1
Looking in links: c:\Users\khush\AppData\Local\Temp\tmpe209h1rc


DEPRECATION: Loading egg at c:\users\khush\anaconda3\lib\site-packages\huggingface_hub-0.29.2-py3.8.egg is deprecated. pip 24.3 will enforce this behaviour change. A possible replacement is to use pip for package installation.. Discussion can be found at https://github.com/pypa/pip/issues/12330


In [19]:
!pip uninstall -y huggingface_hub


Found existing installation: huggingface_hub 0.29.2
Uninstalling huggingface_hub-0.29.2:


DEPRECATION: Loading egg at c:\users\khush\anaconda3\lib\site-packages\huggingface_hub-0.29.2-py3.8.egg is deprecated. pip 25.1 will enforce this behaviour change. A possible replacement is to use pip for package installation. Discussion can be found at https://github.com/pypa/pip/issues/12330
ERROR: Exception:
Traceback (most recent call last):
  File "C:\Users\khush\anaconda3\Lib\shutil.py", line 847, in move
    os.rename(src, real_dst)
PermissionError: [WinError 32] The process cannot access the file because it is being used by another process: 'c:\\users\\khush\\anaconda3\\lib\\site-packages\\huggingface_hub-0.29.2-py3.8.egg' -> 'C:\\Users\\khush\\AppData\\Local\\Temp\\pip-uninstall-288v2fzp\\huggingface_hub-0.29.2-py3.8.egg'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Users\khush\anaconda3\Lib\site-packages\pip\_internal\cli\base_command.py", line 106, in _run_wrapper
    status = _inner_run()
             ^^

In [21]:
!pip install huggingface_hub




DEPRECATION: Loading egg at c:\users\khush\anaconda3\lib\site-packages\huggingface_hub-0.29.2-py3.8.egg is deprecated. pip 25.1 will enforce this behaviour change. A possible replacement is to use pip for package installation. Discussion can be found at https://github.com/pypa/pip/issues/12330


In [23]:
import os
import shutil

egg_path = r"c:\users\khush\anaconda3\lib\site-packages\huggingface_hub-0.29.2-py3.8.egg"

if os.path.exists(egg_path):
    try:
        shutil.rmtree(egg_path)
        print("Old egg file removed successfully.")
    except Exception as e:
        print(f"Failed to remove: {e}")
else:
    print("Egg file not found.")


Failed to remove: [WinError 267] The directory name is invalid: 'c:\\users\\khush\\anaconda3\\lib\\site-packages\\huggingface_hub-0.29.2-py3.8.egg'


In [25]:
import os
import shutil

egg_path = r"c:\users\khush\anaconda3\lib\site-packages\huggingface_hub-0.29.2-py3.8.egg"

if os.path.exists(egg_path):
    try:
        shutil.rmtree(egg_path)
        print("Old egg file removed successfully.")
    except Exception as e:
        print(f"Failed to remove: {e}")
else:
    print("Egg file not found.")


Failed to remove: [WinError 267] The directory name is invalid: 'c:\\users\\khush\\anaconda3\\lib\\site-packages\\huggingface_hub-0.29.2-py3.8.egg'


In [27]:
import os

egg_path = r"c:\users\khush\anaconda3\lib\site-packages\huggingface_hub-0.29.2-py3.8.egg"

if os.path.exists(egg_path):
    try:
        os.remove(egg_path)
        print("Old egg file removed successfully.")
    except Exception as e:
        print(f"Failed to remove: {e}")
else:
    print("Egg file not found.")


Failed to remove: [WinError 32] The process cannot access the file because it is being used by another process: 'c:\\users\\khush\\anaconda3\\lib\\site-packages\\huggingface_hub-0.29.2-py3.8.egg'


In [29]:
!pip install huggingface_hub --no-cache-dir




DEPRECATION: Loading egg at c:\users\khush\anaconda3\lib\site-packages\huggingface_hub-0.29.2-py3.8.egg is deprecated. pip 25.1 will enforce this behaviour change. A possible replacement is to use pip for package installation. Discussion can be found at https://github.com/pypa/pip/issues/12330


In [31]:
import os

egg_path = r"c:\users\khush\anaconda3\lib\site-packages\huggingface_hub-0.29.2-py3.8.egg"

try:
    os.remove(egg_path)
    print("✅ .egg file deleted successfully.")
except Exception as e:
    print(f"❌ Could not delete file: {e}")


❌ Could not delete file: [WinError 32] The process cannot access the file because it is being used by another process: 'c:\\users\\khush\\anaconda3\\lib\\site-packages\\huggingface_hub-0.29.2-py3.8.egg'


In [33]:
❌ Could not delete file: [WinError 32] The process cannot access the file because it is being used by another process: 'c:\\users\\khush\\anaconda3\\lib\\site-packages\\huggingface_hub-0.29.2-py3.8.egg'

SyntaxError: invalid character '❌' (U+274C) (1193035753.py, line 1)