~~~
Copyright 2024 Google LLC

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    https://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
~~~
<table><tbody><tr>
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/google-health/cxr-foundation/blob/master/notebooks/quick_start_with_hugging_face.ipynb">
      <img alt="Google Colab logo" src="https://www.tensorflow.org/images/colab_logo_32px.png" width="32px"><br> Run in Google Colab
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/google-health/cxr-foundation/blob/master/notebooks/quick_start_with_hugging_face.ipynb">
      <img alt="GitHub logo" src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" width="32px"><br> View on GitHub
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://huggingface.co/google/cxr-foundation">
      <img alt="HuggingFace logo" src="https://huggingface.co/front/assets/huggingface_logo-noborder.svg" width="32px"><br> View on HuggingFace
    </a>
  </td>
</tr></tbody></table>

# Quick start with Hugging Face
This Colab notebook provides a basic demo of using Chest X-ray (CXR) Foundation. CXR Foundation is an embeddings models that generates a machine learning representations known as embeddings, from chest X-ray images and/or chest X-ray related text. These embeddings can be used to develop custom models for CXR use-cases with less data and compute compared to traditional model development methods. Learn more about embeddings and their benefits at this [page](https://developers.google.com/health-ai-developer-foundations/cxr-foundation).

In [None]:
from huggingface_hub import login
token = "TOKEN"
login(token = token)

In [None]:
# @title Helper Functions to prepare inputs: text & image TF Example
!pip install tensorflow-text==2.17 pypng 2>&1 1>/dev/null
import io
import png
import tensorflow as tf
import tensorflow_text as tf_text
import tensorflow_hub as tf_hub
import numpy as np

# Helper function for tokenizing text input
def bert_tokenize(text):
    """Tokenizes input text and returns token IDs and padding masks."""
    preprocessor = tf_hub.KerasLayer(
        "https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3")
    out = preprocessor(tf.constant([text.lower()]))
    ids = out['input_word_ids'].numpy().astype(np.int32)
    masks = out['input_mask'].numpy().astype(np.float32)
    paddings = 1.0 - masks
    end_token_idx = ids == 102
    ids[end_token_idx] = 0
    paddings[end_token_idx] = 1.0
    ids = np.expand_dims(ids, axis=1)
    paddings = np.expand_dims(paddings, axis=1)
    assert ids.shape == (1, 1, 128)
    assert paddings.shape == (1, 1, 128)
    return ids, paddings

# Helper function for processing image data
def png_to_tfexample(image_array: np.ndarray) -> tf.train.Example:
    """Creates a tf.train.Example from a NumPy array."""
    # Convert the image to float32 and shift the minimum value to zero
    image = image_array.astype(np.float32)
    image -= image.min()

    if image_array.dtype == np.uint8:
        # For uint8 images, no rescaling is needed
        pixel_array = image.astype(np.uint8)
        bitdepth = 8
    else:
        # For other data types, scale image to use the full 16-bit range
        max_val = image.max()
        if max_val > 0:
            image *= 65535 / max_val  # Scale to 16-bit range
        pixel_array = image.astype(np.uint16)
        bitdepth = 16

    # Ensure the array is 2-D (grayscale image)
    if pixel_array.ndim != 2:
        raise ValueError(f'Array must be 2-D. Actual dimensions: {pixel_array.ndim}')

    # Encode the array as a PNG image
    output = io.BytesIO()
    png.Writer(
        width=pixel_array.shape[1],
        height=pixel_array.shape[0],
        greyscale=True,
        bitdepth=bitdepth
    ).write(output, pixel_array.tolist())
    png_bytes = output.getvalue()

    # Create a tf.train.Example and assign the features
    example = tf.train.Example()
    features = example.features.feature
    features['image/encoded'].bytes_list.value.append(png_bytes)
    features['image/format'].bytes_list.value.append(b'png')

    return example

# Compute Embeddings

In [None]:
!pwd

/content


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import json
# Open and read the JSON file
with open('/content/drive/MyDrive/FinalProject/mock_data/all_reports.json', 'r') as file:
    data_json = json.load(file)

In [None]:
reports = json.loads(data_json)

In [None]:
embeddings = []

In [None]:
from tqdm import tqdm

In [None]:
# @title Invoke Model with Text
import numpy as np

# Download the model repository files
from huggingface_hub import snapshot_download
snapshot_download(repo_id="google/cxr-foundation",local_dir='/content/hf',
                  allow_patterns=['elixr-c-v2-pooled/*', 'pax-elixr-b-text/*'])
for i in tqdm(range(10000)):
  # Run QFormer with text only.
  # Initialize image input with zeros
  tokens, paddings = bert_tokenize(reports[i])
  qformer_input = {
      'image_feature': np.zeros([1, 8, 8, 1376], dtype=np.float32).tolist(),
      'ids': tokens.tolist(),
      'paddings': paddings.tolist(),
  }

  if 'qformer_model' not in locals():
    qformer_model = tf.saved_model.load(
        "/content/drive/MyDrive/FinalProject/CXR_Foundation/hf/pax-elixr-b-text"
    )

  qformer_output = qformer_model.signatures['serving_default'](**qformer_input)
  text_embeddings = qformer_output['contrastive_txt_emb']
  embeddings.append(text_embeddings)

text_embeddings = np.array(embeddings)
print("Text Embedding shape: ", text_embeddings.shape)
print("First 5 tokens: ", text_embeddings[0][0:5])


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Fetching 8 files:   0%|          | 0/8 [00:00<?, ?it/s]

(…)x-elixr-b-text/variables/variables.index:   0%|          | 0.00/5.91k [00:00<?, ?B/s]

fingerprint.pb:   0%|          | 0.00/46.0 [00:00<?, ?B/s]

(…)xr-c-v2-pooled/variables/variables.index:   0%|          | 0.00/28.8k [00:00<?, ?B/s]

saved_model.pb:   0%|          | 0.00/6.89M [00:00<?, ?B/s]

variables.data-00000-of-00001:   0%|          | 0.00/2.02G [00:00<?, ?B/s]

saved_model.pb:   0%|          | 0.00/9.17M [00:00<?, ?B/s]

fingerprint.pb:   0%|          | 0.00/57.0 [00:00<?, ?B/s]

variables.data-00000-of-00001:   0%|          | 0.00/724M [00:00<?, ?B/s]

100%|██████████| 4000/4000 [1:49:23<00:00,  1.64s/it]


Text Embedding shape:  (4000, 1, 128)
First 5 tokens:  [[-0.1410696   0.06975039 -0.20902047  0.09288634  0.03252077 -0.09582689
  -0.00763591 -0.12482438  0.04656262 -0.12433634 -0.0541867  -0.04918359
  -0.0372279  -0.14575931 -0.12352236  0.06114453 -0.18072326  0.19746536
  -0.00941372  0.0031709   0.04723487 -0.09771044 -0.01552262  0.22538945
  -0.084242   -0.05744239  0.0251517  -0.07628777  0.07861981  0.04077673
   0.07518265  0.04284273 -0.01128395  0.07034989 -0.01030409  0.0613001
  -0.12440585  0.03919867  0.03047935  0.02775761  0.02067903 -0.0694648
   0.02549432 -0.12221021 -0.05438835 -0.11790224  0.08338971  0.0190734
  -0.09110063 -0.00417817  0.11337389 -0.11186189  0.12079409 -0.03509314
   0.10374022 -0.04979909 -0.04386095  0.0080727  -0.0328982  -0.00745914
   0.18754315 -0.09491649  0.06371722 -0.00359903  0.06850179 -0.11491389
  -0.1015761  -0.08863342 -0.008537   -0.09555308  0.1806654  -0.05765858
  -0.12239487 -0.00341187 -0.03344354 -0.06258988 -0.0481790

In [None]:
text_embeddings = np.squeeze(text_embeddings, axis = 1)
text_embeddings.shape

(4000, 128)

In [None]:
np.save('/content/drive/MyDrive/FinalProject/mock_data/train_reports_cxr_foundation_embeddings_6000_9999.npy', text_embeddings)

# Next steps

Explore the other [notebooks](https://github.com/google-health/cxr-foundation/blob/master/notebooks) to learn what else you can do with the model.