In [1]:
!pip install gradio
!pip install transformers
!pip install torch
!pip install google-cloud-storage

Collecting gradio
  Downloading gradio-4.41.0-py3-none-any.whl.metadata (15 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi (from gradio)
  Downloading fastapi-0.112.1-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.3.0 (from gradio)
  Downloading gradio_client-1.3.0-py3-none-any.whl.metadata (7.1 kB)
Collecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.27.0-py3-none-any.whl.metadata (7.2 kB)
Collecting orjson~=3.0 (from gradio)
  Downloading orjson-3.10.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.4/50.4 kB[0m [31m837.3 kB/s[0m eta [36m0:00:00[0m
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.9 (from gra

In [2]:
import gradio as gr
from transformers import pipeline
import numpy as np
import spacy
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img

In [3]:
transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.94k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/290M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/1.53k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/805 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.41M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

normalizer.json:   0%|          | 0.00/52.7k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/34.6k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/1.83k [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/185k [00:00<?, ?B/s]

In [4]:
# Load the spaCy model (needed to extract name from a sentence)
nlp = spacy.load('en_core_web_sm')

In [5]:
def extract_name(text):
    """
    Function to extract name from a sentence

    Args:
      text: Spoken Text string
    Returns:
      Name
    """
    doc = nlp(text)
    name = [ent.text for ent in doc.ents if ent.label_ == 'PERSON']
    if name:
      return name[0]
    else:
      "No name found"

In [6]:
# # Function to Translate Speech to text & return return patient's name
# def transcribe(audio):
#     sr, y = audio
#     y = y.astype(np.float32)
#     y /= np.max(np.abs(y))

#     text = transcriber({"sampling_rate": sr, "raw": y})["text"]
#     return text


In [7]:
from PIL import Image
import requests
from io import BytesIO

# Get Image using patient's name from GCS
def image_from_gcs(name):
  try:
    # URL of the image
    gcs_file_url = 'https://storage.googleapis.com/gh_patient_images/'+ name +'.jpg'

    # Fetch the image from GCS
    response = requests.get(gcs_file_url)
    image = Image.open(BytesIO(response.content))
    return image
  except Exception as e:
      return ("Error reading image file:", str(e))

In [8]:
def prepare_image(image):
    """
    Prepare image for model
    Args:
      image: Get patient's image
    Steps:
    - Convert Image to numpy file
    - COnvert to floating point and normalize the file

    Returns: Normalized image file

    """
    # Resize image to 300x300
    image = tf.image.resize(image, (300, 300))

    # Normalize image
    image_normalized = np.array(image).astype(np.float32) / 255

    # Add batch dimension
    image_normalized = np.expand_dims(image_normalized, axis=0)  # Shape becomes (1, 300, 300, 3)

    return image_normalized

In [9]:
# Get model's prediction
def prediction(name, image_normalized):
    # Load the model
    try:
        # Path to model in GCS
        gcs_model_path = 'gs://gh_gcs_bucket/melanoma_cnn_model.keras'
        # Load the model directly from GCS
        model = tf.keras.models.load_model(gcs_model_path)
    except Exception as e:
        return ("Error reading model:", str(e))

    # Model Prediction
    try:
      prediction_probs = model.predict(image_normalized)
    except Exception as e:
      return f"Error during prediction: {e}"

    prediction = np.argmax(prediction_probs, axis=1)
    confidence = np.max(prediction_probs)

    # Interpret the prediction
    if prediction == 1:
        return f"Our model's prediction for {name} is malignant with {confidence*100:.2f}% confidence."
    else:
        return f"Our model's prediction for {name} is benign with {confidence*100:.2f}% confidence."

In [10]:
# Use audio to get model's prediciton
def process_speech(audio):
  """
  Input = Audio
  Output = Prediction text string
  """
  # 1. Speech to text conversion and get patient name
  # name = transcribe(audio)
  sr, y = audio
  y = y.astype(np.float32)
  y /= np.max(np.abs(y))

  text = transcriber({"sampling_rate": sr, "raw": y})["text"]

  #Get name from text
  name = extract_name(text)
  if name == "No name found":
    return "No name found"

  # 2. Get Image from GCS file using Patient's name
  try:
    image = image_from_gcs(name)
  except Exception:
    return "No image content found."

  # 3. Prepare image for model prediction
  image_normalized = prepare_image(image)

  # 4. Get CNN Model's prediction
  results = prediction(name, image_normalized)
  return results, image

In [11]:
# Gradio app
onco_assistant_app = gr.Interface(
    fn=process_speech,                # Main function to process speech
    inputs = gr.Audio(sources="microphone"),  # Audio input
    outputs = [gr.Textbox(label="Prediction Results", lines=2),   # Text output
              gr.Image(type="pil", label="Analyzed Image", visible=True)],  # Image output

    # inputs=gr.Audio(sources=["microphone"]),
    # outputs=["text"],
    title="Physician's Assistant for Melanoma Detection",
    description="Please ask for results of one the these patient's sample images - Marie, Benjamin, Blake, Brooke, Maya & Mario"

)

In [12]:
# Launch App
onco_assistant_app.launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://a8777179ea129e3418.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


