diff --git a/.bazelrc b/.bazelrc
index 5788b7d3b..a2ee67b8c 100644
--- a/.bazelrc
+++ b/.bazelrc
@@ -43,7 +43,6 @@ build:android_x86_64 --fat_apk_cpu=x86_64
 
 # iOS configs
 build:ios --apple_platform_type=ios
-build:ios --apple_bitcode=embedded --copt=-fembed-bitcode
 build:ios --copt=-Wno-c++11-narrowing
 build:ios --cxxopt=-fobjc-arc
 
diff --git a/.github/workflows/android-build-test.yml b/.github/workflows/android-build-test.yml
index 1d5fd62d1..31df40b35 100644
--- a/.github/workflows/android-build-test.yml
+++ b/.github/workflows/android-build-test.yml
@@ -100,15 +100,22 @@ jobs:
           rm /tmp/${SAMSUNG_LIB}.zip && \
           mkdir -p mobile_back_samsung/samsung/lib/internal && \
           mv /tmp/${SAMSUNG_LIB}/* mobile_back_samsung/samsung/lib/internal/
+      - name: Download QTI SDK
+        env:
+          QTI_SDK: qairt-2.25.0.240728-linux
+        run: |
+          gsutil cp gs://mobile-app-build-290400_github-actions/lib/v4.1/${QTI_SDK}.zip /tmp/ && \
+          unzip /tmp/${QTI_SDK}.zip -d /tmp/${QTI_SDK} && \
+          rm /tmp/${QTI_SDK}.zip && \
+          mv /tmp/${QTI_SDK}/* mobile_back_qti/
       - name: Download QTI libraries
         env:
-          QTI_LIB: qaisw-2.20.0.240223_linux
+          QTI_LIB: StableDiffusionShared
         run: |
-          gsutil cp gs://mobile-app-build-290400_github-actions/lib/v4.0/${QTI_LIB}.zip /tmp/ && \
+          gsutil cp gs://mobile-app-build-290400_github-actions/lib/v4.1/${QTI_LIB}.zip /tmp/ && \
           unzip /tmp/${QTI_LIB}.zip -d /tmp/${QTI_LIB} && \
           rm /tmp/${QTI_LIB}.zip && \
-          mkdir -p mobile_back_qti/${QTI_LIB} && \
-          mv /tmp/${QTI_LIB}/* mobile_back_qti/${QTI_LIB}/
+          mv /tmp/${QTI_LIB}/* mobile_back_qti/cpp/backend_qti/StableDiffusionShared/
       - name: Cache bazel
         uses: actions/cache@v4
         with:
@@ -143,6 +150,7 @@ jobs:
           PERF_TEST: true
           WITH_TFLITE: 0
           WITH_QTI: 1
+          WITH_STABLEDIFFUSION: 1
         run: |
           make flutter/android/release flutter/android/apk flutter/android/test-apk
           gsutil mv output/android-apks/test-main.apk $GCLOUD_BUCKET_PATH/test-main-qti.apk
@@ -191,6 +199,7 @@ jobs:
           WITH_QTI: 1
           WITH_SAMSUNG: 1
           WITH_APPLE: 0
+          WITH_STABLEDIFFUSION: 1
         run: |
           make flutter/android/release
           gsutil cp flutter/build/app/outputs/bundle/release/app-release.aab $GCLOUD_BUCKET_PATH/app-release.aab
diff --git a/.gitignore b/.gitignore
index 17e98e56c..a7be10a01 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,13 +4,12 @@
 .ijwb
 .idea
 .vscode
+.fvm
 /bazel-*
 /output
 /output_logs
 /datasets/output
 /datasets/downloads
-/mobile_back_qti/snpe-*
-/mobile_back_qti/qaisw-*
 *.so
 * .apk
 * .tflite
@@ -22,3 +21,7 @@ __pycache__/
 *.log
 *.iml
 *.env
+
+*.g.dart
+*.gen.dart
+*.gen.h
diff --git a/WORKSPACE b/WORKSPACE
index 9c0bb08ea..dbcbc5c2f 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -1,7 +1,7 @@
 workspace(name = "mlperf_app")
 
-load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
 load("@bazel_tools//tools/build_defs/repo:git.bzl", "new_git_repository")
+load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
 
 http_archive(
     name = "bazel_skylib",
@@ -49,11 +49,11 @@ http_archive(
     ],
 )
 
-load("@rules_python//python:repositories.bzl", "python_register_toolchains")
 load(
     "@org_tensorflow//tensorflow/tools/toolchains/python:python_repo.bzl",
     "python_repository",
 )
+load("@rules_python//python:repositories.bzl", "python_register_toolchains")
 
 python_repository(name = "python_version_repo")
 
@@ -107,3 +107,10 @@ snpe_version_loader(
     name = "snpe_version_loader",
     workspace_dir = __workspace_dir__,
 )
+
+load("//mobile_back_qti/cpp/backend_qti/StableDiffusionShared:variables.bzl", "stable_diffusion_external_deps_shared")
+
+stable_diffusion_external_deps_shared(
+    name = "stable_diffusion_external_deps_shared",
+    workspace_dir = __workspace_dir__,
+)
diff --git a/flutter/android/android-docker.mk b/flutter/android/android-docker.mk
index 21b03d649..3e38a8386 100644
--- a/flutter/android/android-docker.mk
+++ b/flutter/android/android-docker.mk
@@ -1,4 +1,4 @@
-# Copyright 2020-2022 The MLPerf Authors. All Rights Reserved.
+# Copyright 2020-2024 The MLPerf Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -42,6 +42,7 @@ flutter_common_docker_flags= \
 		--env WITH_PIXEL=${WITH_PIXEL} \
 		--env WITH_MEDIATEK=${WITH_MEDIATEK} \
 		--env proxy_bazel_args=${proxy_bazel_args} \
+		--env BAZEL_OUTPUT_ROOT_ARG="--output_user_root=/image-workdir/cache/bazel" \
 		--env OFFICIAL_BUILD=${OFFICIAL_BUILD} \
 		--env FIREBASE_CRASHLYTICS_ENABLED=${FIREBASE_CRASHLYTICS_ENABLED} \
 		--env FLUTTER_BUILD_NUMBER=${FLUTTER_BUILD_NUMBER} \
diff --git a/flutter/android/android.mk b/flutter/android/android.mk
index 4347167bd..1a9f70c86 100644
--- a/flutter/android/android.mk
+++ b/flutter/android/android.mk
@@ -20,7 +20,7 @@ ANDROID_NDK_API_LEVEL?=33
 
 flutter/android: flutter/android/libs
 flutter/android/release: flutter/check-release-env flutter/android flutter/prepare flutter/android/apk flutter/android/appbundle
-flutter/android/libs: flutter/android/libs/checksum flutter/android/libs/build flutter/android/libs/copy
+flutter/android/libs: flutter/android/libs/deps flutter/android/libs/checksum flutter/android/libs/build flutter/android/libs/copy
 # run `make flutter/android/apk` before `flutter/android/test-apk`
 flutter/android/test-apk: flutter/android/test-apk/main flutter/android/test-apk/helper
 
@@ -35,6 +35,10 @@ else
 	@echo "Skip checksum validation"
 endif
 
+.PHONY: flutter/android/libs/deps
+flutter/android/libs/deps:
+	${backend_qti_libs_deps}
+
 .PHONY: flutter/android/libs/build
 flutter/android/libs/build:
 	bazel ${BAZEL_OUTPUT_ROOT_ARG} ${proxy_bazel_args} ${sonar_bazel_startup_options} \
diff --git a/flutter/android/docker/Dockerfile b/flutter/android/docker/Dockerfile
index 04ad769b3..48f19dea3 100644
--- a/flutter/android/docker/Dockerfile
+++ b/flutter/android/docker/Dockerfile
@@ -30,6 +30,7 @@ RUN curl --proto '=https' -L https://github.com/bazelbuild/bazelisk/releases/dow
     chmod +x /usr/local/bin/bazel
 
 ENV ANDROID_SDK_ROOT=/opt/android
+ENV ANDROID_HOME=/opt/android
 WORKDIR $ANDROID_SDK_ROOT/cmdline-tools
 # sdkmanager expects to be placed into `$ANDROID_SDK_ROOT/cmdline-tools/tools`
 RUN curl --proto '=https' -L https://dl.google.com/android/repository/commandlinetools-linux-7583922_latest.zip | jar x && \
@@ -73,3 +74,94 @@ RUN mkdir $ANDROID_SDK_HOME && \
 
 # Git repo will be mounted at '/image-workdir/project'
 RUN git config --global --add safe.directory /image-workdir/project
+
+# Add 32-bit support since the adb in our sdk's are 32-bit binaries
+RUN dpkg --add-architecture i386
+ARG DEBIAN_FRONTEND=noninteractive
+ARG APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=DontWarn
+
+# build-essential: for rest-kit(rest-kit needed for crad-repo)
+# file : used by ndk to determine if host is 32 or 64 bit
+# libtbb-dev: used for parallelization of postprocessing for pose-estimation
+# libcanberra-gtk-module, libgtk2.0-dev: required by opencv runtime
+# libstdc++6:i386: to add support for 32 bit binaries.
+# libxtst6 : required to run ide(e.g intellij) inside docker
+RUN apt-get update && apt-get install --no-install-recommends -y build-essential \
+    cmake \
+    curl \
+    git \
+    emacs \
+    file \
+    less \
+    libcanberra-gtk-module \
+    libgtk2.0-dev \
+    libtbb-dev \
+    libstdc++6:i386 \
+    libxtst6
+
+# pkg-config: required by opencv runtime
+# software-properties-common: for curl
+# xterm : to make resize available for avoiding line wrapping
+# zip : required when sourcing snpe envsetup
+RUN apt-get update && apt-get install --no-install-recommends -y mc \
+    meld \
+    pkg-config \
+    python3-dev \
+    software-properties-common \
+    sudo \
+    tmux \
+    tree \
+    unzip \
+    vim \
+    wget \
+    xterm \
+    zip \
+    ffmpeg \
+    libjpeg-dev \
+    zlib1g-dev && \
+    apt-get clean autoclean
+
+# Get cmake-3.19.3
+# apt has 3.10.2 as latest version, so remove it
+RUN apt-get remove --no-install-recommends -y cmake
+RUN wget -q https://github.com/Kitware/CMake/releases/download/v3.19.3/cmake-3.19.3-Linux-x86_64.sh -O /opt/cmake-3.19.3-Linux-x86_64.sh && \
+    chmod +x /opt/cmake-3.19.3-Linux-x86_64.sh && mkdir -p /opt/cmake && \
+    bash /opt/cmake-3.19.3-Linux-x86_64.sh --skip-license --prefix=/opt/cmake && \
+    rm -rf /opt/cmake-3.19.3-Linux-x86_64.sh
+# Add CMAKE into PATH
+ENV PATH "/opt/cmake/bin:${PATH}"
+
+# OpenCV
+ENV CMAKE_TOOLCHAIN_FILE "${ANDROID_NDK_ROOT}/build/cmake/android.toolchain.cmake"
+ENV ANDROID_ABI "arm64-v8a"
+ENV API_LEVEL "31"
+ENV ANDROID_TOOLCHAIN_NAME "aarch64-linux-android-4.9"
+ARG COMMON_CMAKE_OPTIONS="-DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=release \
+                          -DBUILD_ZLIB=ON -DWITH_FFMPEG=ON -DBUILD_TESTS=OFF \
+                          -DWITH_TBB=ON -DBUILD_PERF_TESTS=OFF -DWITH_IPP=OFF \
+                          -DWITH_OPENEXR=ON -DWITH_JASPER=ON -WITH_PNG=ON -DBUILD_FAT_JAVA_LIB=OFF WITH_IMGCODEC=ON"
+# Ref: https://docs.opencv.org/3.4.0/d7/d9f/tutorial_linux_install.html
+RUN wget -q https://github.com/opencv/opencv/archive/3.4.7/opencv-3.4.7.tar.gz -O /tmp/3.4.7.tar.gz && \
+    tar -C /tmp -xvf /tmp/3.4.7.tar.gz  && \
+    # First build for arm-android
+    cd /tmp/opencv-3.4.7 && mkdir -p /opt/opencv-3.4.7_android/ release_android && cd release_android && \
+    cmake -DCMAKE_TOOLCHAIN_FILE="${CMAKE_TOOLCHAIN_FILE}" \
+          -DANDROID_NDK="${ANDROID_NDK}" \
+          -DANDROID_HOME="${ANDROID_HOME}" \
+          -DANDROID_STL=c++_shared  \
+          -DBUILD_ANDROID_PROJECTS=OFF \
+          -DANDROID_NATIVE_API_LEVEL="${API_LEVEL}" \
+          -DANDROID_ABI="${ANDROID_ABI}" \
+          -DWITH_CUDA=ON -DWITH_OPENCL=ON \
+          $COMMON_CMAKE_OPTIONS \
+          -D WITH_ITT=OFF \
+          -DCMAKE_INSTALL_PREFIX=/opt/opencv-3.4.7_android/ .. && \
+    make -j16  && \
+    sudo make -j16 install  && cp -rf ./3rdparty/ /opt/opencv-3.4.7_android/
+
+# Set the variables to be used for actual app development/build
+ENV ANDROID_SYSROOT "${ANDROID_NDK}/toolchains/llvm/prebuilt/linux-x86_64/sysroot"
+ENV ANDROID_PLATFORM "${API_LEVEL}"
+ENV ANDROID_PLATFORM_TOOLS "${ANDROID_HOME}/platform-tools"
+ENV PATH "${ANDROID_PLATFORM_TOOLS}:${PATH}"
+ENV LD_LIBRARY_PATH "/usr/local/lib/:${LD_LIBRARY_PATH}"
\ No newline at end of file
diff --git a/flutter/assets/icons/ic_task_stable_diffusion.svg b/flutter/assets/icons/ic_task_stable_diffusion.svg
new file mode 100644
index 000000000..cb8745b58
--- /dev/null
+++ b/flutter/assets/icons/ic_task_stable_diffusion.svg
@@ -0,0 +1,12 @@
+<svg width="1000" height="1024" viewBox="0 0 1000 1024" fill="none" xmlns="http://www.w3.org/2000/svg">
+<g clip-path="url(#clip0_2_2)">
+<path d="M953.125 699H546.875C520.986 699 500 678.014 500 652.125V370.875C500 344.986 520.986 324 546.875 324H953.125C979.014 324 1000 344.986 1000 370.875V652.125C1000 678.014 979.014 699 953.125 699ZM609.375 378.688C579.172 378.688 554.688 403.172 554.688 433.375C554.688 463.578 579.172 488.063 609.375 488.063C639.578 488.063 664.062 463.578 664.062 433.375C664.062 403.172 639.578 378.688 609.375 378.688ZM562.5 636.5H937.5V527.125L852.036 441.661C847.46 437.085 840.04 437.085 835.463 441.661L703.125 574L648.911 519.786C644.335 515.21 636.915 515.21 632.338 519.786L562.5 589.625V636.5Z" fill="#26507D"/>
+<path d="M233.333 345.333H100C91.1594 345.333 82.6809 348.845 76.4297 355.096C70.1785 361.348 66.6666 369.826 66.6666 378.667V645.333C66.6666 654.174 70.1785 662.652 76.4297 668.904C82.6809 675.155 91.1594 678.667 100 678.667H300C308.841 678.667 317.319 675.155 323.57 668.904C329.821 662.652 333.333 654.174 333.333 645.333V445.333M233.333 345.333L333.333 445.333M233.333 345.333V445.333H333.333M266.667 528.667H133.333M266.667 595.333H133.333M166.667 462H133.333" stroke="#26507D" stroke-width="20" stroke-linecap="round" stroke-linejoin="round"/>
+<path d="M372 512H428M428 512L400 484M428 512L400 540" stroke="#26507D" stroke-width="20" stroke-linecap="round" stroke-linejoin="round"/>
+</g>
+<defs>
+<clipPath id="clip0_2_2">
+<rect width="1000" height="1024" fill="white"/>
+</clipPath>
+</defs>
+</svg>
diff --git a/flutter/assets/icons/ic_task_stable_diffusion_white.svg b/flutter/assets/icons/ic_task_stable_diffusion_white.svg
new file mode 100644
index 000000000..61eb0a374
--- /dev/null
+++ b/flutter/assets/icons/ic_task_stable_diffusion_white.svg
@@ -0,0 +1,12 @@
+<svg width="1000" height="1024" viewBox="0 0 1000 1024" fill="none" xmlns="http://www.w3.org/2000/svg">
+<g clip-path="url(#clip0_102_52)">
+<path d="M953.125 699H546.875C520.986 699 500 678.014 500 652.125V370.875C500 344.986 520.986 324 546.875 324H953.125C979.014 324 1000 344.986 1000 370.875V652.125C1000 678.014 979.014 699 953.125 699ZM609.375 378.688C579.172 378.688 554.688 403.172 554.688 433.375C554.688 463.578 579.172 488.063 609.375 488.063C639.578 488.063 664.062 463.578 664.062 433.375C664.062 403.172 639.578 378.688 609.375 378.688ZM562.5 636.5H937.5V527.125L852.036 441.661C847.46 437.085 840.04 437.085 835.463 441.661L703.125 574L648.911 519.786C644.335 515.21 636.915 515.21 632.338 519.786L562.5 589.625V636.5Z" fill="white"/>
+<path d="M233.333 345.333H100C91.1595 345.333 82.6811 348.845 76.4299 355.096C70.1786 361.348 66.6667 369.826 66.6667 378.667V645.333C66.6667 654.174 70.1786 662.652 76.4299 668.904C82.6811 675.155 91.1595 678.667 100 678.667H300C308.841 678.667 317.319 675.155 323.57 668.904C329.822 662.652 333.333 654.174 333.333 645.333V445.333M233.333 345.333L333.333 445.333M233.333 345.333V445.333H333.333M266.667 528.667H133.333M266.667 595.333H133.333M166.667 462H133.333" stroke="white" stroke-width="20" stroke-linecap="round" stroke-linejoin="round"/>
+<path d="M372 512H428M428 512L400 484M428 512L400 540" stroke="white" stroke-width="20" stroke-linecap="round" stroke-linejoin="round"/>
+</g>
+<defs>
+<clipPath id="clip0_102_52">
+<rect width="1000" height="1024" fill="white"/>
+</clipPath>
+</defs>
+</svg>
diff --git a/flutter/cpp/binary/cmdline.mk b/flutter/cpp/binary/cmdline.mk
index fce713b17..f920044ae 100644
--- a/flutter/cpp/binary/cmdline.mk
+++ b/flutter/cpp/binary/cmdline.mk
@@ -15,7 +15,11 @@
 
 include flutter/cpp/binary/cmdline-docker.mk
 
-cmdline/android/bins/release: cmdline/android/bins/build cmdline/android/bins/copy
+cmdline/android/bins/release: cmdline/android/libs/deps cmdline/android/bins/build cmdline/android/bins/copy
+
+.PHONY: cmdline/android/libs/deps
+cmdline/android/libs/deps:
+	${backend_qti_libs_deps}
 
 .PHONY: cmdline/android/bins/build
 cmdline/android/bins/build:
diff --git a/flutter/cpp/datasets/coco_gen.cc b/flutter/cpp/datasets/coco_gen.cc
index 6fbd161b3..b8f468d99 100644
--- a/flutter/cpp/datasets/coco_gen.cc
+++ b/flutter/cpp/datasets/coco_gen.cc
@@ -106,18 +106,20 @@ std::vector<uint8_t> CocoGen::ProcessOutput(const int sample_idx,
   backend_->ConvertOutputs(total_byte, OUTPUT_WIDTH, OUTPUT_HEIGHT,
                            output_pixels.data());
 
-  std::string raw_output_filename =
-      raw_output_dir_ + "/output_" + std::to_string(sample_idx) + ".rgb8";
-  dump_output_pixels(output_pixels, raw_output_filename);
-
   if (!output_pixels.empty()) {
     sample_ids_.insert(sample_idx);
     CaptionRecord* record = samples_.at(sample_idx).get();
-    LOG(INFO) << "caption: " << record->get_caption();
-    caption_map[sample_idx] = record->get_caption();
+    LOG(INFO) << "caption_id: " << record->get_caption_id()
+              << " caption_text: " << record->get_caption_text();
+    caption_id_map[sample_idx] = record->get_caption_id();
+    caption_text_map[sample_idx] = record->get_caption_text();
     output_pixels_map[sample_idx] = output_pixels;
     attention_mask_map[sample_idx] = record->get_attention_mask_vector();
     input_ids_map[sample_idx] = record->get_input_ids_vector();
+    std::string raw_output_filename = raw_output_dir_ + "/caption_id_" +
+                                      std::to_string(record->get_caption_id()) +
+                                      ".rgb8";
+    dump_output_pixels(output_pixels, raw_output_filename);
     return output_pixels;
   } else {
     return std::vector<uint8_t>();
@@ -130,7 +132,8 @@ float CocoGen::ComputeAccuracy() {
   float total_score = 0.0f;
   float total_samples = static_cast<float>(sample_ids_.size());
   for (int sample_idx : sample_ids_) {
-    std::string caption = caption_map[sample_idx];
+    int caption_id = caption_id_map[sample_idx];
+    std::string caption_text = caption_text_map[sample_idx];
     std::vector<int32_t> input_ids = input_ids_map[sample_idx];
     std::vector<int32_t> attention_mask = attention_mask_map[sample_idx];
     std::vector<uint8_t> output_pixels = output_pixels_map[sample_idx];
@@ -140,8 +143,8 @@ float CocoGen::ComputeAccuracy() {
     }
     float score =
         score_predictor_.predict(attention_mask, input_ids, pixel_values);
-    LOG(INFO) << "sample_idx: " << sample_idx << " caption: " << caption
-              << " score: " << score;
+    LOG(INFO) << "sample_idx: " << sample_idx << " caption_id: " << caption_id
+              << " caption_text: " << caption_text << " score: " << score;
     total_score += score;
   }
   float avg_score = total_score / total_samples;
diff --git a/flutter/cpp/datasets/coco_gen.h b/flutter/cpp/datasets/coco_gen.h
index f89016dcd..e99f6f25a 100644
--- a/flutter/cpp/datasets/coco_gen.h
+++ b/flutter/cpp/datasets/coco_gen.h
@@ -83,7 +83,8 @@ class CocoGen : public Dataset {
   std::set<int> sample_ids_;
   bool isModelFound;
   std::string raw_output_dir_;
-  std::unordered_map<int, std::string> caption_map;
+  std::unordered_map<int, int> caption_id_map;
+  std::unordered_map<int, std::string> caption_text_map;
   std::unordered_map<int, std::vector<uint8_t>> output_pixels_map;
   std::unordered_map<int, std::vector<int32_t>> attention_mask_map;
   std::unordered_map<int, std::vector<int32_t>> input_ids_map;
diff --git a/flutter/cpp/datasets/coco_gen_utils/CLIP_Model_to_TFLite.ipynb b/flutter/cpp/datasets/coco_gen_utils/CLIP_Model_to_TFLite.ipynb
index af0baaee9..b0ab574e0 100644
--- a/flutter/cpp/datasets/coco_gen_utils/CLIP_Model_to_TFLite.ipynb
+++ b/flutter/cpp/datasets/coco_gen_utils/CLIP_Model_to_TFLite.ipynb
@@ -46,7 +46,7 @@
       "source": [
         "SAVED_MODEL_DIR = './clip_model'\n",
         "TFLITE_MODEL_PATH = './clip_model.tflite'\n",
-        "MODEL_NAME = \"openai/clip-vit-base-patch32\""
+        "MODEL_NAME = \"openai/clip-vit-large-patch14\""
       ],
       "metadata": {
         "id": "eOxB3zL_33tq"
diff --git a/flutter/cpp/datasets/coco_gen_utils/generate_tfrecords.py b/flutter/cpp/datasets/coco_gen_utils/generate_tfrecords.py
index 45070b061..8c3dcbe15 100644
--- a/flutter/cpp/datasets/coco_gen_utils/generate_tfrecords.py
+++ b/flutter/cpp/datasets/coco_gen_utils/generate_tfrecords.py
@@ -56,9 +56,10 @@ def download_image(url, file_path):
     print(f"Downloaded image to {file_path}")
 
 
-def serialize_example(caption, input_ids, attention_mask, file_name, clip_score):
+def serialize_example(caption_id, caption, input_ids, attention_mask, file_name, clip_score):
   """Creates a tf.train.Example message ready to be written to a file."""
   feature = {
+    'caption_id': tf.train.Feature(int64_list=tf.train.Int64List(value=caption_id)),
     'caption': tf.train.Feature(bytes_list=tf.train.BytesList(value=[caption.encode()])),
     'input_ids': tf.train.Feature(int64_list=tf.train.Int64List(value=input_ids)),
     'attention_mask': tf.train.Feature(int64_list=tf.train.Int64List(value=attention_mask)),
@@ -87,6 +88,7 @@ def main():
   with tf.io.TFRecordWriter(args.output_tfrecord, options='ZLIB') as writer:
     total = len(df)
     for idx, row in df.iterrows():
+      caption_id = row['id']
       caption = row['caption']
       file_name = row['file_name']
       coco_url = row['coco_url']
@@ -104,6 +106,7 @@ def main():
       clip_score = outputs.logits_per_image.numpy().flatten().tolist()
 
       example = serialize_example(
+        caption_id=[int(caption_id)],
         caption=caption,
         input_ids=input_ids,
         attention_mask=attention_mask,
diff --git a/flutter/cpp/datasets/coco_gen_utils/types.h b/flutter/cpp/datasets/coco_gen_utils/types.h
index 082c5cf5c..0945500aa 100644
--- a/flutter/cpp/datasets/coco_gen_utils/types.h
+++ b/flutter/cpp/datasets/coco_gen_utils/types.h
@@ -29,9 +29,14 @@ struct CaptionRecord {
     tensorflow::Example example;
     example.ParseFromString(record);
 
+    auto caption_id_list =
+        tensorflow::GetFeatureValues<int64_t>("caption_id", example);
+    caption_id =
+        std::vector<int32_t>(caption_id_list.begin(), caption_id_list.end())[0];
+
     auto caption_list =
         tensorflow::GetFeatureValues<string>("caption", example);
-    caption =
+    caption_text =
         std::vector<std::string>(caption_list.begin(), caption_list.end());
 
     auto input_id_list =
@@ -57,7 +62,8 @@ struct CaptionRecord {
 
   void dump() {
     std::cout << "CaptionRecord:\n";
-    std::cout << "  caption: " << get_caption() << "\n";
+    std::cout << "  caption_id: " << get_caption_id() << "\n";
+    std::cout << "  caption_text: " << get_caption_text() << "\n";
     std::cout << "  input_ids: ";
     for (size_t i = 0; i < input_ids.size(); ++i) {
       std::cout << input_ids[i];
@@ -80,7 +86,8 @@ struct CaptionRecord {
     std::cout << "  clip_score: " << clip_score << "\n";
   }
 
-  std::string get_caption() const { return caption[0]; }
+  int get_caption_id() const { return caption_id; }
+  std::string get_caption_text() const { return caption_text[0]; }
   std::string get_filename() const { return filename[0]; }
   int32_t* get_input_ids() { return input_ids.data(); }
   int32_t* get_attention_mask() { return attention_mask.data(); }
@@ -88,7 +95,8 @@ struct CaptionRecord {
   std::vector<int32_t> get_attention_mask_vector() { return attention_mask; }
 
  private:
-  std::vector<std::string> caption;
+  int caption_id;
+  std::vector<std::string> caption_text;
   std::vector<int32_t> input_ids;
   std::vector<int32_t> attention_mask;
   std::vector<std::string> filename;
diff --git a/flutter/integration_test/expected_accuracy.dart b/flutter/integration_test/expected_accuracy.dart
index c1d37a2e0..712bee11b 100644
--- a/flutter/integration_test/expected_accuracy.dart
+++ b/flutter/integration_test/expected_accuracy.dart
@@ -83,16 +83,16 @@ const Map<String, Interval> _superResolution = {
 
 // TODO (anhappdev): update expected accuracy for stable diffusion
 const Map<String, Interval> _stableDiffusion = {
-  'cpu': Interval(min: 0, max: 100),
-  'npu': Interval(min: 0, max: 100),
-  'tpu': Interval(min: 0, max: 100),
-  'ane|TFLite': Interval(min: 0, max: 100),
-  'ane|Core ML': Interval(min: 0, max: 100),
-  'cpu&gpu&ane': Interval(min: 0, max: 100),
-  'snpe_dsp': Interval(min: 0, max: 100),
-  'psnpe_dsp': Interval(min: 0, max: 100),
-  'neuron': Interval(min: 0, max: 100),
-  'samsung_npu': Interval(min: 0, max: 100),
+  'cpu': Interval(min: 0, max: 1.0),
+  'npu': Interval(min: 0, max: 1.0),
+  'tpu': Interval(min: 0, max: 1.0),
+  'ane|TFLite': Interval(min: 0, max: 1.0),
+  'ane|Core ML': Interval(min: 0, max: 1.0),
+  'cpu&gpu&ane': Interval(min: 0, max: 1.0),
+  'snpe_dsp': Interval(min: 0, max: 1.0),
+  'psnpe_dsp': Interval(min: 0, max: 1.0),
+  'neuron': Interval(min: 0, max: 1.0),
+  'samsung_npu': Interval(min: 0, max: 1.0),
 };
 
 const benchmarkExpectedAccuracy = {
diff --git a/flutter/integration_test/expected_throughput.dart b/flutter/integration_test/expected_throughput.dart
index 1b9e416a1..f3925faa7 100644
--- a/flutter/integration_test/expected_throughput.dart
+++ b/flutter/integration_test/expected_throughput.dart
@@ -60,7 +60,7 @@ const Map<String, Map<String, Interval>> _imageClassificationV2 = {
     _kS24Ultra: Interval(min: 800, max: 1500),
   },
   _kMediatekBackend: {
-    _kDN2103: Interval(min: 5, max: 90),
+    _kDN2103: Interval(min: 4.5, max: 90),
     _kS10Plus: Interval(min: 400, max: 800)
   },
   _kSamsungBackend: {
@@ -88,11 +88,11 @@ const Map<String, Map<String, Interval>> _objectDetection = {
   },
   _kQtiBackend: {
     _kS22Ultra: Interval(min: 700, max: 1400),
-    _kS24Ultra: Interval(min: 1800, max: 2500),
+    _kS24Ultra: Interval(min: 1800, max: 2700),
   },
   _kMediatekBackend: {
     _kDN2103: Interval(min: 120, max: 210),
-    _kS10Plus: Interval(min: 1200, max: 1800)
+    _kS10Plus: Interval(min: 1200, max: 2000)
   },
   _kSamsungBackend: {
     _kS24: Interval(min: 1400, max: 2400),
@@ -123,7 +123,7 @@ const Map<String, Map<String, Interval>> _imageSegmentationV2 = {
   },
   _kMediatekBackend: {
     _kDN2103: Interval(min: 45, max: 70),
-    _kS10Plus: Interval(min: 800, max: 1400)
+    _kS10Plus: Interval(min: 800, max: 1500)
   },
   _kSamsungBackend: {
     _kS24: Interval(min: 800, max: 1500),
@@ -150,7 +150,7 @@ const Map<String, Map<String, Interval>> _naturalLanguageProcessing = {
   },
   _kQtiBackend: {
     _kS22Ultra: Interval(min: 100, max: 200),
-    _kS24Ultra: Interval(min: 250, max: 450),
+    _kS24Ultra: Interval(min: 250, max: 460),
   },
   _kMediatekBackend: {
     _kDN2103: Interval(min: 1, max: 6),
@@ -185,7 +185,7 @@ const Map<String, Map<String, Interval>> _superResolution = {
   },
   _kMediatekBackend: {
     _kDN2103: Interval(min: 5, max: 15),
-    _kS10Plus: Interval(min: 150, max: 280)
+    _kS10Plus: Interval(min: 150, max: 300)
   },
   _kSamsungBackend: {
     _kS24: Interval(min: 90, max: 180),
@@ -244,10 +244,10 @@ const Map<String, Map<String, Interval>> _imageClassificationOfflineV2 = {
   },
   _kQtiBackend: {
     _kS22Ultra: Interval(min: 250, max: 450),
-    _kS24Ultra: Interval(min: 900, max: 1600),
+    _kS24Ultra: Interval(min: 900, max: 1700),
   },
   _kMediatekBackend: {
-    _kDN2103: Interval(min: 5, max: 90),
+    _kDN2103: Interval(min: 4.5, max: 90),
     _kS10Plus: Interval(min: 700, max: 1200)
   },
   _kSamsungBackend: {
diff --git a/flutter/integration_test/first_test.dart b/flutter/integration_test/first_test.dart
index 07bcb2391..522448223 100644
--- a/flutter/integration_test/first_test.dart
+++ b/flutter/integration_test/first_test.dart
@@ -31,13 +31,14 @@ void main() {
     testWidgets('run benchmarks', (WidgetTester tester) async {
       await startApp(tester);
       await validateSettings(tester);
+      await setBenchmarks(tester);
       await runBenchmarks(tester);
     });
 
     testWidgets('check results', (WidgetTester tester) async {
       final extendedResult = await obtainResult();
       printResults(extendedResult);
-      // TODO (anhappdev) uncomment when stable_diffusion is ready
+      // TODO (anhappdev) uncomment when stable_diffusion is implemented for all backends.
       // checkTaskCount(extendedResult);
       checkTasks(extendedResult);
     });
diff --git a/flutter/integration_test/utils.dart b/flutter/integration_test/utils.dart
index d99a66000..f8fcd7ed1 100644
--- a/flutter/integration_test/utils.dart
+++ b/flutter/integration_test/utils.dart
@@ -64,6 +64,21 @@ Future<void> validateSettings(WidgetTester tester) async {
   }
 }
 
+Future<void> setBenchmarks(WidgetTester tester) async {
+  final state = tester.state(find.byType(MaterialApp));
+  final benchmarkState = state.context.read<BenchmarkState>();
+  for (var benchmark in benchmarkState.benchmarks) {
+    // Disable test for stable diffusion since it take too long to finish.
+    if (benchmark.id == BenchmarkId.stableDiffusion) {
+      benchmark.isActive = false;
+      print('Benchmark ${benchmark.id} is disabled');
+    } else {
+      benchmark.isActive = true;
+      print('Benchmark ${benchmark.id} is enabled');
+    }
+  }
+}
+
 Future<void> runBenchmarks(WidgetTester tester) async {
   const downloadTimeout = 20 * 60; // 20 minutes
   const runBenchmarkTimeout = 30 * 60; // 30 minutes
diff --git a/flutter/lib/app_constants.dart b/flutter/lib/app_constants.dart
index a54c5ae89..38c7c3adb 100644
--- a/flutter/lib/app_constants.dart
+++ b/flutter/lib/app_constants.dart
@@ -24,14 +24,15 @@ class BenchmarkId {
   static const imageClassificationOfflineV2 = 'image_classification_offline_v2';
   static const stableDiffusion = 'stable_diffusion';
 
+  // The sort order of this list will be used in the UI
   static const allIds = [
+    imageClassificationV2,
     objectDetection,
     imageSegmentationV2,
     naturalLanguageProcessing,
     superResolution,
-    imageClassificationV2,
-    imageClassificationOfflineV2,
     stableDiffusion,
+    imageClassificationOfflineV2,
   ];
 }
 
diff --git a/flutter/lib/benchmark/benchmark.dart b/flutter/lib/benchmark/benchmark.dart
index 8f46f6692..38b1867f3 100644
--- a/flutter/lib/benchmark/benchmark.dart
+++ b/flutter/lib/benchmark/benchmark.dart
@@ -1,5 +1,6 @@
 import 'package:collection/collection.dart';
 
+import 'package:mlperfbench/app_constants.dart';
 import 'package:mlperfbench/backend/bridge/run_settings.dart';
 import 'package:mlperfbench/backend/loadgen_info.dart';
 import 'package:mlperfbench/benchmark/info.dart';
@@ -123,7 +124,11 @@ class BenchmarkStore {
     required List<pb.BenchmarkSetting> backendConfig,
     required Map<String, bool> taskSelection,
   }) {
-    for (final task in appConfig.task) {
+    // sort the order of task based on BenchmarkId.allIds
+    final List<pb.TaskConfig> sortedTasks = List.from(appConfig.task)
+      ..sort((a, b) =>
+          BenchmarkId.allIds.indexOf(a.id) - BenchmarkId.allIds.indexOf(b.id));
+    for (final task in sortedTasks) {
       final backendSettings = backendConfig
           .singleWhereOrNull((setting) => setting.benchmarkId == task.id);
       if (backendSettings == null) {
diff --git a/flutter/lib/benchmark/info.dart b/flutter/lib/benchmark/info.dart
index 856555b4e..d2d027cdf 100644
--- a/flutter/lib/benchmark/info.dart
+++ b/flutter/lib/benchmark/info.dart
@@ -63,6 +63,12 @@ class BenchmarkInfo {
           detailsTitle: stringResources.benchInfoSuperResolution,
           detailsContent: stringResources.benchInfoSuperResolutionDesc,
         );
+      case (BenchmarkId.stableDiffusion):
+        return BenchmarkLocalizationInfo(
+          name: stringResources.benchNameStableDiffusion,
+          detailsTitle: stringResources.benchInfoStableDiffusion,
+          detailsContent: stringResources.benchInfoStableDiffusionDesc,
+        );
       default:
         throw 'unhandled task id: ${task.id}';
     }
diff --git a/flutter/lib/l10n/app_en.arb b/flutter/lib/l10n/app_en.arb
index 49a94e174..8a99fa31d 100644
--- a/flutter/lib/l10n/app_en.arb
+++ b/flutter/lib/l10n/app_en.arb
@@ -107,17 +107,20 @@
   "benchNameLanguageProcessing": "Language Processing",
   "benchNameImageClassificationOffline": "Image Classification (offline)",
   "benchNameSuperResolution": "Super Resolution",
+  "benchNameStableDiffusion": "Stable Diffusion",
   "benchInfoImageClassification": "Image Classification",
   "benchInfoObjectDetection": "Object detection",
   "benchInfoImageSegmentation": "Image Segmentation",
   "benchInfoLanguageProcessing": "Language Processing",
   "benchInfoSuperResolution": "Super Resolution",
+  "benchInfoStableDiffusion": "Stable Diffusion",
   "benchInfoImageClassificationDesc": "Image classification picks the best label to describe an input image and is commonly used for photo search and text extraction. The MobileNetEdgeTPU reference model is evaluated on the ImageNet 2012 validation dataset and requires a minimum accuracy of 74.66% (98% of FP32 accuracy of 76.19%) Top-1 accuracy (For Performance measurements, App uses a different dataset).\n\nThe MobileNetEdgeTPU network is a descendent of the MobileNet-v2 family that is optimized for low-latency and mobile accelerators. The MobileNetEdgeTPU model architecture is based on convolutional layers with inverted residuals and linear bottlenecks, similar to MobileNet v2, but is optimized by introducing fused inverted bottleneck convolutions to improve hardware utilization, and removing hard-swish and squeeze-and-excite blocks.\n\nThe offline variant of image classification has no latency constraints and typically uses batched inference and has higher throughput.",
   "benchInfoImageClassificationV2Desc": "Image classification picks the best label to describe an input image and is commonly used for photo search and text extraction.\n\nThe MobileNetV4-Conv-L model boasts an impressive 83% accuracy with the ImageNet dataset, versus 76% accuracy for the prior standard, MobileNetEdgeTPU. MobileNetV4-Conv-L is designed to perform well across a range of mobile processor types, from CPUs and GPUs to neural accelerators. The MLPerf Mobile working group worked closely with the MobileNetV4 team in order to ensure optimized performance. This combination of an improved model architecture and collaborative optimization has proven quite potent. Although MobileNetV4-Conv-L executes six times the number of mathematical operations of its predecessor, MobileNetEdgeTPU, benchmark execution times have only increased by a factor of roughly 4.6.\n\nThe offline variant of image classification has no latency constraints and typically uses batched inference and has higher throughput.",
   "benchInfoObjectDetectionDesc": "Object detection draws bounding boxes around recognized objects in an input image, assigning each one a label. This is a common approach for identifying objects in photos, and automotive safety. Since v1.0, our reference model has been updated to MobileDets (from v0.7 model,  Single Shot Detector with a MobileNet-v2 feature extractor operating). MobileDets are trained on the COCO 2017 validation dataset. The MobileDets Object Detection task is evaluated on the COCO 2017 dataset with an input image resolution of 320x320. It requires a minimum  mean Average Precision (mAP) of 27.075 (95% of FP32 mAP of 28.5%), which is significantly higher than that of the previous model.\n\nMobileDets are searched for object detection. A key feature of MobileDets is that the search space includes both inverted bottleneck blocks and regular convolution operations to help improve the accuracy-latency trade-off on several hardware accelerators.",
   "benchInfoImageSegmentationDesc": "Semantic image segmentation partitions an input image into labeled objects at pixel granularity, and is used for complex image manipulation such as red-eye reduction as well as automotive and medical applications. The reference model is the MOSAIC network paired with a tailored feature extraction backbone. It operates on 512x512 resolution input images from the ADE20K validation set and requires a minimum mean Intersection Over Union (mIoU) value of 57.36% (96% of FP32 mIoU of 59.75%), significantly higher than the previous segmentation model (MobileNetv2-Deeplabv3+).\n\nMOSAIC employs a simple asymmetric encoder-decoder structure which consists of an efficient multi-scale context encoder and a light-weight hybrid decoder to recover spatial details from aggregated information with multiple lateral connections between the two. The feature extractor is a variant of MobileNet Multi-Hardware, which is a network built and optimized with neural architecture search. It is further enhanced for image segmentation by reducing the output stride, adding dilated convolutions at the end stage, and halving the feature channels.",
   "benchInfoLanguageProcessingDesc": "Question Answering finds the best answer to an input question based on a body of text, and is commonly employed in applications such as virtual assistants and chatbots. The reference model, MobileBERT, is evaluated on the Stanford Question Answering Dataset (SQUAD) v1.1 Dev-mini. The task requires a minimum F1-score of 87.4% (93% of FP32 F1-score of 93.08%).\n\nMobileBERT is a streamlined, mobile-optimized version of the larger BERT_LARGE network. It features bottleneck structures and a carefully designed balance between self-attention and feed-forward networks. While BERT is task-agnostic and can be applied to various downstream natural language processing tasks, the MobileBERT variant used in MLPerf is specifically fine-tuned for question answering.",
   "benchInfoSuperResolutionDesc": "Image Super Resolution (SR) upscales a lower resolution input into a higher resolution output image, enhancing the quality and detail. It is a common task in many mobile applications such as digital zoom. The reference model, EDSR F32B5, is a lightweight member of the Enhanced Deep Super Resolution (EDSR) family that is trained for 2X super resolution on the DIV2K dataset with bicubic downsampling and tested on the OpenSR test-set which comprises 25 selected 1920x1080 HDR images. The benchmark requires a minimum accuracy of 33 dB Peak Signal to Noise Ratio (PSNR) relative to a 33.58 dB accuracy with FP32.\n\nThe EDSR family of models demonstrated excellent performance by winning a super resolution challenge at CVPR 2017. The EDSR F32B5 reference model features five EDSR blocks, each with 32 feature maps. The EDSR block is a simple residual block consisting of a residual connection on one branch and a convolution-ReLU-convolution on the other branch. The final upsampling layer is a depth-to-space operator, which facilitates the x2 super resolution process.",
+  "benchInfoStableDiffusionDesc": "The Text to Image Gen AI benchmark adopts Stable Diffusion v1.5 for generating images from text prompts. It is a latent diffusion model. The benchmarked Stable Diffusion v1.5 refers to a specific configuration of the model architecture that uses a downsampling-factor 8 autoencoder with an 860M UNet,123M CLIP ViT-L/14 text encoder for the diffusion model, and VAE Decoder of 49.5M parameters. The model was trained on 595k steps at resolution of 512x512, which enables it to generate high quality images. We refer you to https://huggingface.co/benjamin-paine/stable-diffusion-v1-5 for more information. The benchmark runs 20 denoising steps for inference, and uses a precalculated time embedding of size 1x1280. Reference models can be found here https://github.com/mlcommons/mobile_open/releases.\n\nFor latency benchmarking, we benchmark end to end, excluding the time embedding calculation and the tokenizer. For accuracy calculations, the app adopts the CLIP metric for text-to-image consistency, and further evaluation of the generated images using this Image Quality Aesthetic Assessment metric https://github.com/idealo/image-quality-assessment/tree/master?tab=readme-ov-file",
 
   "resourceDownload": "Download",
   "resourceClear": "Clear",
diff --git a/flutter/lib/ui/icons.dart b/flutter/lib/ui/icons.dart
index cadd279c5..524813430 100644
--- a/flutter/lib/ui/icons.dart
+++ b/flutter/lib/ui/icons.dart
@@ -26,10 +26,8 @@ class AppIcons {
       _pSvg('ic_task_image_classification_offline.svg');
   static final SvgPicture superResolution =
       _pSvg('ic_task_super_resolution.svg');
-
-  // TODO (anhappdev): update icon
   static final SvgPicture stableDiffusion =
-      _pSvg('ic_task_super_resolution.svg');
+      _pSvg('ic_task_stable_diffusion.svg');
 
   static final SvgPicture imageClassificationWhite =
       _pSvg('ic_task_image_classification_white.svg');
@@ -43,10 +41,8 @@ class AppIcons {
       _pSvg('ic_task_image_classification_offline_white.svg');
   static final SvgPicture superResolutionWhite =
       _pSvg('ic_task_super_resolution_white.svg');
-
-  // TODO (anhappdev): update icon
   static final SvgPicture stableDiffusionWhite =
-      _pSvg('ic_task_super_resolution_white.svg');
+      _pSvg('ic_task_stable_diffusion_white.svg');
 
   static final SvgPicture arrow = _pSvg('ic_arrow.svg');
 
@@ -71,6 +67,7 @@ class BenchmarkIcons {
     BenchmarkId.imageSegmentationV2: AppIcons.imageSegmentation,
     BenchmarkId.naturalLanguageProcessing: AppIcons.languageProcessing,
     BenchmarkId.superResolution: AppIcons.superResolution,
+    BenchmarkId.stableDiffusion: AppIcons.stableDiffusion,
     BenchmarkId.imageClassificationOfflineV2:
         AppIcons.imageClassificationOffline,
   };
@@ -81,6 +78,7 @@ class BenchmarkIcons {
     BenchmarkId.imageSegmentationV2: AppIcons.imageSegmentationWhite,
     BenchmarkId.naturalLanguageProcessing: AppIcons.languageProcessingWhite,
     BenchmarkId.superResolution: AppIcons.superResolutionWhite,
+    BenchmarkId.stableDiffusion: AppIcons.stableDiffusionWhite,
     BenchmarkId.imageClassificationOfflineV2:
         AppIcons.imageClassificationOfflineWhite,
   };
diff --git a/mobile_back_apple/dev-utils/Makefile b/mobile_back_apple/dev-utils/Makefile
index a9904fe38..4107ec89c 100644
--- a/mobile_back_apple/dev-utils/Makefile
+++ b/mobile_back_apple/dev-utils/Makefile
@@ -134,8 +134,8 @@ tflite-run-sd:
 		--output_dir="${REPO_ROOT_DIR}/output" \
 		--model_file="${REPO_ROOT_DIR}/mobile_back_apple/dev-resources/stable_diffusion/dynamic-sd-models" \
 		--lib_path="bazel-bin/mobile_back_tflite/cpp/backend_tflite/libtflitebackend.so" \
+		--input_tfrecord="${REPO_ROOT_DIR}/mobile_back_apple/dev-resources/stable_diffusion/coco_gen_test.tfrecord" \
 		--custom_config="stable_diffusion_num_steps:20,stable_diffusion_seed:633994880" \
-		--input_tfrecord="${REPO_ROOT_DIR}/mobile_back_apple/dev-resources/stable_diffusion/coco_gen_full.tfrecord" \
 		--input_clip_model="${REPO_ROOT_DIR}/mobile_back_apple/dev-resources/stable_diffusion/clip_model_512x512.tflite" \
 		--min_query_count=5
 
diff --git a/mobile_back_pixel/cpp/backend_tflite/BUILD b/mobile_back_pixel/cpp/backend_tflite/BUILD
index dd416856b..8b59fc413 100644
--- a/mobile_back_pixel/cpp/backend_tflite/BUILD
+++ b/mobile_back_pixel/cpp/backend_tflite/BUILD
@@ -51,11 +51,24 @@ cc_library(
 cc_library(
     name = "tflite_pixel",
     srcs = [
+        "pixel_single_model_pipeline.cc",
         "tflite_pixel.cc",
+        "//mobile_back_tflite/cpp/backend_tflite:embedding_utils.cc",
+        "//mobile_back_tflite/cpp/backend_tflite:sd_utils.cc",
+        "//mobile_back_tflite/cpp/backend_tflite:stable_diffusion_invoker.cc",
+        "//mobile_back_tflite/cpp/backend_tflite:stable_diffusion_pipeline.cc",
     ],
     hdrs = [
         "tflite_settings_pixel.h",
         "thread_pool.h",
+        "//mobile_back_tflite/cpp/backend_tflite:embedding_utils.h",
+        "//mobile_back_tflite/cpp/backend_tflite:pipeline.h",
+        "//mobile_back_tflite/cpp/backend_tflite:sd_utils.h",
+        "//mobile_back_tflite/cpp/backend_tflite:single_model_pipeline.h",
+        "//mobile_back_tflite/cpp/backend_tflite:stable_diffusion_invoker.h",
+        "//mobile_back_tflite/cpp/backend_tflite:stable_diffusion_pipeline.h",
+        "//mobile_back_tflite/cpp/backend_tflite:thread_pool.h",
+        "//mobile_back_tflite/cpp/backend_tflite:utils.h",
     ],
     copts = tflite_copts() + select({
         "//flutter/android/commonlibs:use_asan": [
@@ -64,11 +77,14 @@ cc_library(
             "-O1",
             "-fno-omit-frame-pointer",
         ],
-        "//conditions:default": [],
+        "//conditions:default": [
+            "-Imobile_back_tflite/cpp/backend_tflite",
+        ],
     }),
     deps = [
         ":pixel_settings",
         ":resize_bilinear_op",
+        "//flutter/cpp:utils",
         "//flutter/cpp/c:headers",
         "@org_tensorflow//tensorflow/core:tflite_portable_logging",
         "@org_tensorflow//tensorflow/lite/c:c_api",
diff --git a/mobile_back_pixel/cpp/backend_tflite/pixel_single_model_pipeline.cc b/mobile_back_pixel/cpp/backend_tflite/pixel_single_model_pipeline.cc
new file mode 100644
index 000000000..1d44b411f
--- /dev/null
+++ b/mobile_back_pixel/cpp/backend_tflite/pixel_single_model_pipeline.cc
@@ -0,0 +1,426 @@
+/* Copyright 2021 The MLPerf Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "flutter/cpp/c/backend_c.h"
+#include "flutter/cpp/c/type.h"
+#include "tensorflow/lite/c/c_api.h"
+#include "tensorflow/lite/c/c_api_experimental.h"
+#include "tensorflow/lite/c/common.h"
+#if __ANDROID__
+#include <sys/system_properties.h>
+
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/lite/delegates/gpu/delegate.h"
+#include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
+#endif
+#include "resize_argmax_op.h"
+#include "single_model_pipeline.h"
+#include "stable_diffusion_pipeline.h"
+#include "tflite_settings_pixel.h"
+#include "thread_pool.h"
+
+#define N_OFFLINE_INTERPRETERS 8
+
+struct TFLiteBackendData {
+  const char* name = "TFLite-pixel";
+  const char* vendor = "Google";
+  const char* accelerator = "CPU";
+  TfLiteModel* model{nullptr};
+  std::vector<TfLiteInterpreterOptions*> options{};
+  std::vector<TfLiteInterpreter*> interpreter{};
+  int32_t shards_num = 1;
+  uint32_t real_batch_size = 1;
+  std::unique_ptr<Threadpool> executer;
+  int32_t original_tensor_size = 0;
+};
+
+static bool backendExists = false;
+
+static constexpr const char* const kDelegateCpu = "CPU";
+
+inline mlperf_data_t::Type TfType2Type(TfLiteType type) {
+  switch (type) {
+    case kTfLiteFloat32:
+      return mlperf_data_t::Float32;
+    case kTfLiteUInt8:
+      return mlperf_data_t::Uint8;
+    case kTfLiteInt8:
+      return mlperf_data_t::Int8;
+    case kTfLiteFloat16:
+      return mlperf_data_t::Float16;
+    case kTfLiteInt32:
+      return mlperf_data_t::Int32;
+    case kTfLiteInt64:
+      return mlperf_data_t::Int64;
+    default:
+      printf("TfLiteType %d not supported\n", type);
+      return mlperf_data_t::Float32;
+  }
+}
+
+size_t TFLiteNumElements(const TfLiteTensor* tensor) {
+  size_t result = 1;
+  for (int i = 0; i < TfLiteTensorNumDims(tensor); ++i) {
+    result *= TfLiteTensorDim(tensor, i);
+  }
+  return result;
+}
+
+#if __ANDROID__
+bool is_emulator() {
+  char ro_build_characteristics[PROP_VALUE_MAX + 1];
+  if (__system_property_get("ro.build.characteristics",
+                            ro_build_characteristics)) {
+    char* ptr;
+    ptr = strstr(ro_build_characteristics, "emulator");
+    if (ptr) return true;
+  }
+  return false;
+}
+#endif
+
+// Create a new backend and return the pointer to it.
+mlperf_backend_ptr_t SingleModelPipeline::backend_create(
+    const char* model_path, mlperf_backend_configuration_t* configs,
+    const char* native_lib_path) {
+  // Verify only one instance of the backend exists at any time
+  if (backendExists) {
+    printf("Error: Only one backend instance should exist at a time\n");
+    return nullptr;
+  }
+
+  TFLiteBackendData* backend_data = new TFLiteBackendData();
+
+  backendExists = true;
+
+  // Load the model.
+  backend_data->model = TfLiteModelCreateFromFile(model_path);
+  if (!backend_data->model) {
+    printf("Failed to load model: %s", model_path);
+    mlperf_backend_delete(backend_data);
+    return nullptr;
+  }
+
+  if (configs->batch_size > 1) {
+    backend_data->shards_num = N_OFFLINE_INTERPRETERS;
+
+    if ((configs->batch_size % backend_data->shards_num) != 0) {
+      printf("Batch size is not dividable by shards_num: %d %% %d != 0\n",
+             configs->batch_size, backend_data->shards_num);
+      mlperf_backend_delete(backend_data);
+      return nullptr;
+    }
+
+    backend_data->real_batch_size =
+        configs->batch_size / backend_data->shards_num;
+  }
+
+  backend_data->executer =
+      std::unique_ptr<Threadpool>(new Threadpool(backend_data->shards_num));
+
+  // Create interpreter options function.
+  auto create_option = [&](TfLiteInterpreterOptions*& option_ptr) -> void {
+    option_ptr = TfLiteInterpreterOptionsCreate();
+    TfLiteInterpreterOptionsAddCustomOp(option_ptr, "ResizeArgmax",
+                                        Register_ResizeArgmax(), 1, 999);
+    TfLiteDelegate* delegate = nullptr;
+
+    for (int i = 0; i < configs->count; ++i) {
+      if (strcmp(configs->keys[i], "num_threads") == 0) {
+        TfLiteInterpreterOptionsSetNumThreads(option_ptr,
+                                              atoi(configs->values[i]));
+      }
+    }
+
+#if __ANDROID__
+    if (strcmp(configs->delegate_selected, kDelegateCpu) == 0) {
+      backend_data->accelerator = "CPU";
+    } else if (!is_emulator() &&
+               ((strcmp(configs->accelerator, "gpu_f16") == 0) ||
+                (strcmp(configs->accelerator, "gpu") == 0))) {
+      backend_data->accelerator = "GPU";
+      auto options = TfLiteGpuDelegateOptionsV2Default();
+      if (strcmp(configs->accelerator, "gpu_f16") == 0)
+        options.inference_priority1 = TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY;
+      delegate = TfLiteGpuDelegateV2Create(&options);
+    } else if (strcmp(configs->accelerator, "tpu") == 0) {
+      backend_data->accelerator = "EdgeTPU";
+      auto options = tflite::StatefulNnApiDelegate::Options();
+      options.allow_fp16 = true;
+      options.disallow_nnapi_cpu = true;
+      options.accelerator_name = "google-edgetpu";
+      delegate = new tflite::StatefulNnApiDelegate(options);
+    }
+    if (delegate != nullptr) {
+      TfLiteInterpreterOptionsAddDelegate(option_ptr, delegate);
+    }
+#endif
+  };
+
+  backend_data->options.resize(backend_data->shards_num);
+  backend_data->interpreter.resize(backend_data->shards_num);
+
+  for (int k = 0; k < backend_data->shards_num; k++) {
+    // Create Backend Option
+    create_option(backend_data->options[k]);
+
+    // Create the interpreter.
+    backend_data->interpreter[k] =
+        TfLiteInterpreterCreate(backend_data->model, backend_data->options[k]);
+    if (!backend_data->interpreter[k]) {
+      printf("Fallback to a vanilla interpreter\n");
+      backend_data->interpreter[k] = TfLiteInterpreterCreate(
+          backend_data->model, TfLiteInterpreterOptionsCreate());
+      if (!backend_data->interpreter[k]) {
+        printf("Failed to create the interpreter\n");
+        mlperf_backend_delete(backend_data);
+        return nullptr;
+      }
+    }
+  }
+
+  const int32_t input_tensor_count =
+      TfLiteInterpreterGetInputTensorCount(backend_data->interpreter[0]);
+
+  for (int shard_index = 0; shard_index < backend_data->shards_num;
+       shard_index++) {
+    TfLiteInterpreter*& shard = backend_data->interpreter[shard_index];
+
+    for (int input_index = 0; input_index < input_tensor_count; input_index++) {
+      TfLiteTensor* tensor =
+          TfLiteInterpreterGetInputTensor(shard, input_index);
+
+      backend_data->original_tensor_size = tensor->bytes;
+
+      if (backend_data->real_batch_size != tensor->dims->data[0]) {
+        std::vector<int32_t> dims;
+        dims.resize(tensor->dims->size);
+        dims[0] = backend_data->real_batch_size;
+        for (int i = 1; i < tensor->dims->size; i++) {
+          dims[i] = tensor->dims->data[i];
+        }
+        if (TfLiteInterpreterResizeInputTensor(shard, input_index, dims.data(),
+                                               tensor->dims->size) !=
+            kTfLiteOk) {
+          printf("Failed to resize input\n");
+          mlperf_backend_delete(backend_data);
+          return nullptr;
+        }
+      }
+    }
+
+    if (TfLiteInterpreterAllocateTensors(shard) != kTfLiteOk) {
+      printf("Failed to allocate tensors\n");
+      mlperf_backend_delete(backend_data);
+      return nullptr;
+    }
+  }
+
+  return backend_data;
+}
+
+// Vendor name who create this backend.
+const char* SingleModelPipeline::backend_vendor_name(
+    mlperf_backend_ptr_t backend_ptr) {
+  TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr;
+  return backend_data->vendor;
+}
+
+// TODO: Return the name of the accelerator.
+const char* SingleModelPipeline::backend_accelerator_name(
+    mlperf_backend_ptr_t backend_ptr) {
+  TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr;
+  return backend_data->accelerator;
+}
+
+// Return the name of this backend.
+const char* SingleModelPipeline::backend_name(
+    mlperf_backend_ptr_t backend_ptr) {
+  TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr;
+  return backend_data->name;
+}
+
+// Destroy the backend pointer and its data.
+void SingleModelPipeline::backend_delete(mlperf_backend_ptr_t backend_ptr) {
+  TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr;
+  TfLiteModelDelete(backend_data->model);
+  for (int i = 0; i < backend_data->shards_num; i++) {
+    TfLiteInterpreterOptionsDelete(backend_data->options[i]);
+    TfLiteInterpreterDelete(backend_data->interpreter[i]);
+  }
+  delete backend_data;
+  backendExists = false;
+}
+
+// Run the inference for a sample.
+mlperf_status_t SingleModelPipeline::backend_issue_query(
+    mlperf_backend_ptr_t backend_ptr) {
+  TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr;
+  auto task = [&backend_data](int index) -> TfLiteStatus {
+    return TfLiteInterpreterInvoke(backend_data->interpreter[index]);
+  };
+
+  std::vector<std::future<TfLiteStatus>> f;
+  f.resize(backend_data->shards_num);
+  // dispatch workers for shards
+  for (int k = 1; k < backend_data->shards_num; k++) {
+    f[k] = backend_data->executer->submit(task, k);
+  }
+  // main thread for the first shard
+  if (task(0) != kTfLiteOk) {
+    printf("Failed to run the inference\n");
+    return MLPERF_FAILURE;
+  }
+  // sync and get result of workers
+  for (int k = 1; k < backend_data->shards_num; k++) {
+    if (f[k].get() != kTfLiteOk) {
+      printf("Failed to run the inference\n");
+      return MLPERF_FAILURE;
+    }
+  }
+  return MLPERF_SUCCESS;
+}
+
+// Flush the staged queries immediately.
+mlperf_status_t SingleModelPipeline::backend_flush_queries(
+    mlperf_backend_ptr_t backend_ptr) {
+  return MLPERF_SUCCESS;
+}
+
+// Return the number of inputs of the model.
+int32_t SingleModelPipeline::backend_get_input_count(
+    mlperf_backend_ptr_t backend_ptr) {
+  TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr;
+  return TfLiteInterpreterGetInputTensorCount(backend_data->interpreter[0]);
+}
+
+// Return the type of the ith input.
+mlperf_data_t SingleModelPipeline::backend_get_input_type(
+    mlperf_backend_ptr_t backend_ptr, int32_t i) {
+  TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr;
+  const TfLiteTensor* tensor =
+      TfLiteInterpreterGetInputTensor(backend_data->interpreter[0], i);
+  mlperf_data_t type;
+  type.type = TfType2Type(TfLiteTensorType(tensor));
+  type.size = TFLiteNumElements(tensor);
+  type.size /= backend_data->real_batch_size;
+  return type;
+}
+
+// Set the data for ith input.
+mlperf_status_t SingleModelPipeline::backend_set_input(
+    mlperf_backend_ptr_t backend_ptr, int32_t batch_index, int32_t i,
+    void* data) {
+  cpu_set_t cpuset;
+  CPU_ZERO(&cpuset);
+  CPU_SET(6, &cpuset);
+  CPU_SET(7, &cpuset);
+  sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
+
+  TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr;
+
+  const int shard_index = batch_index / backend_data->real_batch_size;
+  TfLiteTensor* tensor = TfLiteInterpreterGetInputTensor(
+      backend_data->interpreter[shard_index], i);
+  const int data_offset = backend_data->original_tensor_size *
+                          (batch_index % backend_data->real_batch_size);
+  memcpy(tensor->data.raw + data_offset, data,
+         backend_data->original_tensor_size);
+
+  return MLPERF_SUCCESS;
+}
+
+// Return the number of outputs for the model.
+int32_t SingleModelPipeline::backend_get_output_count(
+    mlperf_backend_ptr_t backend_ptr) {
+  TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr;
+  return TfLiteInterpreterGetOutputTensorCount(backend_data->interpreter[0]);
+}
+
+// Return the type of ith output.
+mlperf_data_t SingleModelPipeline::backend_get_output_type(
+    mlperf_backend_ptr_t backend_ptr, int32_t i) {
+  TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr;
+  const TfLiteTensor* tensor =
+      TfLiteInterpreterGetOutputTensor(backend_data->interpreter[0], i);
+  mlperf_data_t type;
+  type.type = TfType2Type(TfLiteTensorType(tensor));
+  type.size = TFLiteNumElements(tensor);
+  type.size /= backend_data->real_batch_size;
+  return type;
+}
+
+// Get the data from ith output.
+mlperf_status_t SingleModelPipeline::backend_get_output(
+    mlperf_backend_ptr_t backend_ptr, uint32_t batch_index, int32_t i,
+    void** data) {
+  TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr;
+  const int shard_index = batch_index / backend_data->real_batch_size;
+
+  const TfLiteTensor* output_tensor = TfLiteInterpreterGetOutputTensor(
+      backend_data->interpreter[shard_index], i);
+  batch_index %= backend_data->real_batch_size;
+
+  int non_batch_size = 1;
+  for (int i = 1; i < output_tensor->dims->size; i++) {
+    non_batch_size *= output_tensor->dims->data[i];
+  }
+
+  switch (output_tensor->type) {
+    case kTfLiteFloat32:
+      *data = (output_tensor->data.f + (batch_index * non_batch_size));
+      break;
+    case kTfLiteUInt8:
+      *data = (output_tensor->data.uint8 + (batch_index * non_batch_size));
+      break;
+    case kTfLiteInt8:
+      *data = (output_tensor->data.int8 + (batch_index * non_batch_size));
+      break;
+    case kTfLiteFloat16:
+      *data = (output_tensor->data.f16 + (batch_index * non_batch_size));
+      break;
+    case kTfLiteInt32:
+      *data = (output_tensor->data.i32 + (batch_index * non_batch_size));
+      break;
+    case kTfLiteInt64:
+      *data = (output_tensor->data.i64 + (batch_index * non_batch_size));
+      break;
+    default:
+      printf("Data type not yet supported\n");
+      return MLPERF_FAILURE;
+  }
+  return MLPERF_SUCCESS;
+}
+
+void SingleModelPipeline::backend_convert_inputs(
+    mlperf_backend_ptr_t backend_ptr, int bytes, int width, int height,
+    uint8_t* data) {}
+
+void SingleModelPipeline::backend_convert_outputs(
+    mlperf_backend_ptr_t backend_ptr, int bytes, int width, int height,
+    uint8_t* data) {}
+
+void* SingleModelPipeline::backend_get_buffer(size_t n) {
+  return ::operator new(n);
+}
+
+void SingleModelPipeline::backend_release_buffer(void* p) {
+  ::operator delete(p);
+}
diff --git a/mobile_back_pixel/cpp/backend_tflite/settings/tflite_settings_pixel6.pbtxt b/mobile_back_pixel/cpp/backend_tflite/settings/tflite_settings_pixel6.pbtxt
index 80a8a1517..23cc5eff7 100644
--- a/mobile_back_pixel/cpp/backend_tflite/settings/tflite_settings_pixel6.pbtxt
+++ b/mobile_back_pixel/cpp/backend_tflite/settings/tflite_settings_pixel6.pbtxt
@@ -207,3 +207,34 @@ benchmark_setting {
   }
   delegate_selected: "NNAPI"
 }
+
+benchmark_setting {
+  benchmark_id: "stable_diffusion"
+  framework: "TFLite"
+  delegate_choice: {
+    delegate_name: "NNAPI"
+    accelerator_name: "npu"
+    accelerator_desc: "NPU"
+    model_file: {
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/tflite/sd_decoder_dynamic.tflite"
+      model_checksum: "68acdb62f99e1dc2c7f5db8cdd0e007c"
+    }
+    model_file: {
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/tflite/sd_diffusion_model_dynamic.tflite"
+      model_checksum: "309e95f76ac8de01130942037a28aa8f"
+    }
+    model_file: {
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/tflite/sd_text_encoder_dynamic.tflite"
+      model_checksum: "b64effb0360f9ea49a117cdaf8a2fbdc"
+    }
+    model_file: {
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/tflite/timestep_embeddings_data.bin.ts"
+      model_checksum: "798b772155a69de5df44b304327bb3cc"
+    }
+  }
+  delegate_selected: "NNAPI"
+  custom_setting {
+    id: "pipeline"
+    value: "StableDiffusionPipeline"
+  }
+}
diff --git a/mobile_back_pixel/cpp/backend_tflite/tflite_pixel.cc b/mobile_back_pixel/cpp/backend_tflite/tflite_pixel.cc
index d8d1cfa1f..476b7ae60 100644
--- a/mobile_back_pixel/cpp/backend_tflite/tflite_pixel.cc
+++ b/mobile_back_pixel/cpp/backend_tflite/tflite_pixel.cc
@@ -1,90 +1,46 @@
-/* Copyright 2021 The MLPerf Authors. All Rights Reserved.
-
+/* Copyright 2024 The MLPerf Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
-
     http://www.apache.org/licenses/LICENSE-2.0
-
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include <stdbool.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
+#include "single_model_pipeline.h"
+#include "stable_diffusion_pipeline.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tflite_settings_pixel.h"
 
-#include "flutter/cpp/c/backend_c.h"
-#include "flutter/cpp/c/type.h"
-#include "tensorflow/lite/c/c_api.h"
-#include "tensorflow/lite/c/c_api_experimental.h"
-#include "tensorflow/lite/c/common.h"
 #if __ANDROID__
 #include <sys/system_properties.h>
-
-#include "tensorflow/core/platform/logging.h"
-#include "tensorflow/lite/delegates/gpu/delegate.h"
-#include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
 #endif
-#include "resize_argmax_op.h"
-#include "tflite_settings_pixel.h"
-#include "thread_pool.h"
-
-#define N_OFFLINE_INTERPRETERS 8
-
-struct TFLiteBackendData {
-  const char* name = "TFLite-pixel";
-  const char* vendor = "Google";
-  const char* accelerator = "CPU";
-  TfLiteModel* model{nullptr};
-  std::vector<TfLiteInterpreterOptions*> options{};
-  std::vector<TfLiteInterpreter*> interpreter{};
-  int32_t shards_num = 1;
-  uint32_t real_batch_size = 1;
-  std::unique_ptr<Threadpool> executer;
-  int32_t original_tensor_size = 0;
-};
 
-static bool backendExists = false;
+#ifdef __cplusplus
+extern "C" {
+#endif  // __cplusplus
 
-static constexpr const char* const kDelegateCpu = "CPU";
+std::unique_ptr<Pipeline> pipeline;
 
-inline mlperf_data_t::Type TfType2Type(TfLiteType type) {
-  switch (type) {
-    case kTfLiteFloat32:
-      return mlperf_data_t::Float32;
-    case kTfLiteUInt8:
-      return mlperf_data_t::Uint8;
-    case kTfLiteInt8:
-      return mlperf_data_t::Int8;
-    case kTfLiteFloat16:
-      return mlperf_data_t::Float16;
-    case kTfLiteInt32:
-      return mlperf_data_t::Int32;
-    case kTfLiteInt64:
-      return mlperf_data_t::Int64;
-    default:
-      printf("TfLiteType %d not supported\n", type);
-      return mlperf_data_t::Float32;
+void init_pipeline(const char *pipeline_type) {
+  bool sd_pipeline = (strcmp(pipeline_type, "StableDiffusionPipeline") == 0);
+  if (sd_pipeline) {
+    LOG(INFO) << "Initializing StableDiffusionPipeline";
+    pipeline = std::make_unique<StableDiffusionPipeline>();
+  } else {
+    LOG(INFO) << "Initializing SingleModelPipeline";
+    pipeline = std::make_unique<SingleModelPipeline>();
   }
 }
 
-size_t TFLiteNumElements(const TfLiteTensor* tensor) {
-  size_t result = 1;
-  for (int i = 0; i < TfLiteTensorNumDims(tensor); ++i) {
-    result *= TfLiteTensorDim(tensor, i);
-  }
-  return result;
-}
+void reset_pipeline() { pipeline.reset(); }
 
-// TFLite is the standard backend for all hardwares.
-bool mlperf_backend_matches_hardware(const char** not_allowed_message,
-                                     const char** settings,
-                                     const mlperf_device_info_t* device_info) {
+// TFLite is the standard backend for all hardware.
+bool mlperf_backend_matches_hardware(const char **not_allowed_message,
+                                     const char **settings,
+                                     const mlperf_device_info_t *device_info) {
   *not_allowed_message = nullptr;
   *settings = tflite_settings.c_str();
 
@@ -102,321 +58,102 @@ bool mlperf_backend_matches_hardware(const char** not_allowed_message,
   return false;
 }
 
-#if __ANDROID__
-bool is_emulator() {
-  char ro_build_characteristics[PROP_VALUE_MAX + 1];
-  if (__system_property_get("ro.build.characteristics",
-                            ro_build_characteristics)) {
-    char* ptr;
-    ptr = strstr(ro_build_characteristics, "emulator");
-    if (ptr) return true;
-  }
-  return false;
-}
-#endif
-
 // Create a new backend and return the pointer to it.
 mlperf_backend_ptr_t mlperf_backend_create(
-    const char* model_path, mlperf_backend_configuration_t* configs,
-    const char* native_lib_path) {
-  // Verify only one instance of the backend exists at any time
-  if (backendExists) {
-    printf("Error: Only one backend instance should exist at a time\n");
-    return nullptr;
-  }
-
-  TFLiteBackendData* backend_data = new TFLiteBackendData();
-
-  backendExists = true;
-
-  // Load the model.
-  backend_data->model = TfLiteModelCreateFromFile(model_path);
-  if (!backend_data->model) {
-    printf("Failed to load model: %s", model_path);
-    mlperf_backend_delete(backend_data);
-    return nullptr;
-  }
-
-  if (configs->batch_size > 1) {
-    backend_data->shards_num = N_OFFLINE_INTERPRETERS;
-
-    if ((configs->batch_size % backend_data->shards_num) != 0) {
-      printf("Batch size is not dividable by shards_num: %d %% %d != 0\n",
-             configs->batch_size, backend_data->shards_num);
-      mlperf_backend_delete(backend_data);
-      return nullptr;
-    }
-
-    backend_data->real_batch_size =
-        configs->batch_size / backend_data->shards_num;
-  }
-
-  backend_data->executer =
-      std::unique_ptr<Threadpool>(new Threadpool(backend_data->shards_num));
-
-  // Create interpreter options function.
-  auto create_option = [&](TfLiteInterpreterOptions*& option_ptr) -> void {
-    option_ptr = TfLiteInterpreterOptionsCreate();
-    TfLiteInterpreterOptionsAddCustomOp(option_ptr, "ResizeArgmax",
-                                        Register_ResizeArgmax(), 1, 999);
-    TfLiteDelegate* delegate = nullptr;
-
-    for (int i = 0; i < configs->count; ++i) {
-      if (strcmp(configs->keys[i], "num_threads") == 0) {
-        TfLiteInterpreterOptionsSetNumThreads(option_ptr,
-                                              atoi(configs->values[i]));
-      }
-    }
-
-#if __ANDROID__
-    if (strcmp(configs->delegate_selected, kDelegateCpu) == 0) {
-      backend_data->accelerator = "CPU";
-    } else if (!is_emulator() &&
-               ((strcmp(configs->accelerator, "gpu_f16") == 0) ||
-                (strcmp(configs->accelerator, "gpu") == 0))) {
-      backend_data->accelerator = "GPU";
-      auto options = TfLiteGpuDelegateOptionsV2Default();
-      if (strcmp(configs->accelerator, "gpu_f16") == 0)
-        options.inference_priority1 = TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY;
-      delegate = TfLiteGpuDelegateV2Create(&options);
-    } else if (strcmp(configs->accelerator, "tpu") == 0) {
-      backend_data->accelerator = "EdgeTPU";
-      auto options = tflite::StatefulNnApiDelegate::Options();
-      options.allow_fp16 = true;
-      options.disallow_nnapi_cpu = true;
-      options.accelerator_name = "google-edgetpu";
-      delegate = new tflite::StatefulNnApiDelegate(options);
-    }
-    if (delegate != nullptr) {
-      TfLiteInterpreterOptionsAddDelegate(option_ptr, delegate);
-    }
-#endif
-  };
-
-  backend_data->options.resize(backend_data->shards_num);
-  backend_data->interpreter.resize(backend_data->shards_num);
-
-  for (int k = 0; k < backend_data->shards_num; k++) {
-    // Create Backend Option
-    create_option(backend_data->options[k]);
-
-    // Create the interpreter.
-    backend_data->interpreter[k] =
-        TfLiteInterpreterCreate(backend_data->model, backend_data->options[k]);
-    if (!backend_data->interpreter[k]) {
-      printf("Fallback to a vanilla interpreter\n");
-      backend_data->interpreter[k] = TfLiteInterpreterCreate(
-          backend_data->model, TfLiteInterpreterOptionsCreate());
-      if (!backend_data->interpreter[k]) {
-        printf("Failed to create the interpreter\n");
-        mlperf_backend_delete(backend_data);
-        return nullptr;
-      }
-    }
-  }
-
-  const int32_t input_tensor_count =
-      TfLiteInterpreterGetInputTensorCount(backend_data->interpreter[0]);
-
-  for (int shard_index = 0; shard_index < backend_data->shards_num;
-       shard_index++) {
-    TfLiteInterpreter*& shard = backend_data->interpreter[shard_index];
-
-    for (int input_index = 0; input_index < input_tensor_count; input_index++) {
-      TfLiteTensor* tensor =
-          TfLiteInterpreterGetInputTensor(shard, input_index);
-
-      backend_data->original_tensor_size = tensor->bytes;
-
-      if (backend_data->real_batch_size != tensor->dims->data[0]) {
-        std::vector<int32_t> dims;
-        dims.resize(tensor->dims->size);
-        dims[0] = backend_data->real_batch_size;
-        for (int i = 1; i < tensor->dims->size; i++) {
-          dims[i] = tensor->dims->data[i];
-        }
-        if (TfLiteInterpreterResizeInputTensor(shard, input_index, dims.data(),
-                                               tensor->dims->size) !=
-            kTfLiteOk) {
-          printf("Failed to resize input\n");
-          mlperf_backend_delete(backend_data);
-          return nullptr;
-        }
-      }
-    }
-
-    if (TfLiteInterpreterAllocateTensors(shard) != kTfLiteOk) {
-      printf("Failed to allocate tensors\n");
-      mlperf_backend_delete(backend_data);
-      return nullptr;
+    const char *model_path, mlperf_backend_configuration_t *configs,
+    const char *native_lib_path) {
+  const char *pipeline_type = "";
+  for (int i = 0; i < configs->count; ++i) {
+    if (strcmp(configs->keys[i], "pipeline") == 0) {
+      pipeline_type = configs->values[i];
+      break;
     }
   }
-
-  return backend_data;
+  init_pipeline(pipeline_type);
+  return pipeline->backend_create(model_path, configs, native_lib_path);
 }
 
 // Vendor name who create this backend.
-const char* mlperf_backend_vendor_name(mlperf_backend_ptr_t backend_ptr) {
-  TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr;
-  return backend_data->vendor;
+const char *mlperf_backend_vendor_name(mlperf_backend_ptr_t backend_ptr) {
+  return pipeline->backend_vendor_name(backend_ptr);
 }
 
 // TODO: Return the name of the accelerator.
-const char* mlperf_backend_accelerator_name(mlperf_backend_ptr_t backend_ptr) {
-  TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr;
-  return backend_data->accelerator;
+const char *mlperf_backend_accelerator_name(mlperf_backend_ptr_t backend_ptr) {
+  return pipeline->backend_accelerator_name(backend_ptr);
 }
 
 // Return the name of this backend.
-const char* mlperf_backend_name(mlperf_backend_ptr_t backend_ptr) {
-  TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr;
-  return backend_data->name;
+const char *mlperf_backend_name(mlperf_backend_ptr_t backend_ptr) {
+  return pipeline->backend_name(backend_ptr);
 }
 
 // Destroy the backend pointer and its data.
 void mlperf_backend_delete(mlperf_backend_ptr_t backend_ptr) {
-  TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr;
-  TfLiteModelDelete(backend_data->model);
-  for (int i = 0; i < backend_data->shards_num; i++) {
-    TfLiteInterpreterOptionsDelete(backend_data->options[i]);
-    TfLiteInterpreterDelete(backend_data->interpreter[i]);
-  }
-  delete backend_data;
-  backendExists = false;
+  pipeline->backend_delete(backend_ptr);
+  reset_pipeline();
 }
 
 // Run the inference for a sample.
 mlperf_status_t mlperf_backend_issue_query(mlperf_backend_ptr_t backend_ptr) {
-  TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr;
-  auto task = [&backend_data](int index) -> TfLiteStatus {
-    return TfLiteInterpreterInvoke(backend_data->interpreter[index]);
-  };
-
-  std::vector<std::future<TfLiteStatus>> f;
-  f.resize(backend_data->shards_num);
-  // dispatch workers for shards
-  for (int k = 1; k < backend_data->shards_num; k++) {
-    f[k] = backend_data->executer->submit(task, k);
-  }
-  // main thread for the first shard
-  if (task(0) != kTfLiteOk) {
-    printf("Failed to run the inference\n");
-    return MLPERF_FAILURE;
-  }
-  // sync and get result of workers
-  for (int k = 1; k < backend_data->shards_num; k++) {
-    if (f[k].get() != kTfLiteOk) {
-      printf("Failed to run the inference\n");
-      return MLPERF_FAILURE;
-    }
-  }
-  return MLPERF_SUCCESS;
+  return pipeline->backend_issue_query(backend_ptr);
 }
 
 // Flush the staged queries immediately.
 mlperf_status_t mlperf_backend_flush_queries(mlperf_backend_ptr_t backend_ptr) {
-  return MLPERF_SUCCESS;
+  return pipeline->backend_flush_queries(backend_ptr);
 }
 
 // Return the number of inputs of the model.
 int32_t mlperf_backend_get_input_count(mlperf_backend_ptr_t backend_ptr) {
-  TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr;
-  return TfLiteInterpreterGetInputTensorCount(backend_data->interpreter[0]);
+  return pipeline->backend_get_input_count(backend_ptr);
 }
 
 // Return the type of the ith input.
 mlperf_data_t mlperf_backend_get_input_type(mlperf_backend_ptr_t backend_ptr,
                                             int32_t i) {
-  TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr;
-  const TfLiteTensor* tensor =
-      TfLiteInterpreterGetInputTensor(backend_data->interpreter[0], i);
-  mlperf_data_t type;
-  type.type = TfType2Type(TfLiteTensorType(tensor));
-  type.size = TFLiteNumElements(tensor);
-  type.size /= backend_data->real_batch_size;
-  return type;
+  return pipeline->backend_get_input_type(backend_ptr, i);
 }
 
 // Set the data for ith input.
 mlperf_status_t mlperf_backend_set_input(mlperf_backend_ptr_t backend_ptr,
                                          int32_t batch_index, int32_t i,
-                                         void* data) {
-  cpu_set_t cpuset;
-  CPU_ZERO(&cpuset);
-  CPU_SET(6, &cpuset);
-  CPU_SET(7, &cpuset);
-  sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
-
-  TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr;
-
-  const int shard_index = batch_index / backend_data->real_batch_size;
-  TfLiteTensor* tensor = TfLiteInterpreterGetInputTensor(
-      backend_data->interpreter[shard_index], i);
-  const int data_offset = backend_data->original_tensor_size *
-                          (batch_index % backend_data->real_batch_size);
-  memcpy(tensor->data.raw + data_offset, data,
-         backend_data->original_tensor_size);
-
-  return MLPERF_SUCCESS;
+                                         void *data) {
+  return pipeline->backend_set_input(backend_ptr, batch_index, i, data);
 }
 
 // Return the number of outputs for the model.
 int32_t mlperf_backend_get_output_count(mlperf_backend_ptr_t backend_ptr) {
-  TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr;
-  return TfLiteInterpreterGetOutputTensorCount(backend_data->interpreter[0]);
+  return pipeline->backend_get_output_count(backend_ptr);
 }
 
 // Return the type of ith output.
 mlperf_data_t mlperf_backend_get_output_type(mlperf_backend_ptr_t backend_ptr,
                                              int32_t i) {
-  TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr;
-  const TfLiteTensor* tensor =
-      TfLiteInterpreterGetOutputTensor(backend_data->interpreter[0], i);
-  mlperf_data_t type;
-  type.type = TfType2Type(TfLiteTensorType(tensor));
-  type.size = TFLiteNumElements(tensor);
-  type.size /= backend_data->real_batch_size;
-  return type;
+  return pipeline->backend_get_output_type(backend_ptr, i);
 }
 
 // Get the data from ith output.
 mlperf_status_t mlperf_backend_get_output(mlperf_backend_ptr_t backend_ptr,
                                           uint32_t batch_index, int32_t i,
-                                          void** data) {
-  TFLiteBackendData* backend_data = (TFLiteBackendData*)backend_ptr;
-  const int shard_index = batch_index / backend_data->real_batch_size;
+                                          void **data) {
+  return pipeline->backend_get_output(backend_ptr, batch_index, i, data);
+}
 
-  const TfLiteTensor* output_tensor = TfLiteInterpreterGetOutputTensor(
-      backend_data->interpreter[shard_index], i);
-  batch_index %= backend_data->real_batch_size;
+void mlperf_backend_convert_inputs(mlperf_backend_ptr_t backend_ptr, int bytes,
+                                   int width, int height, uint8_t *data) {
+  return pipeline->backend_convert_inputs(backend_ptr, bytes, width, height,
+                                          data);
+}
 
-  int non_batch_size = 1;
-  for (int i = 1; i < output_tensor->dims->size; i++) {
-    non_batch_size *= output_tensor->dims->data[i];
-  }
+void *mlperf_backend_get_buffer(size_t n) {
+  return pipeline->backend_get_buffer(n);
+}
 
-  switch (output_tensor->type) {
-    case kTfLiteFloat32:
-      *data = (output_tensor->data.f + (batch_index * non_batch_size));
-      break;
-    case kTfLiteUInt8:
-      *data = (output_tensor->data.uint8 + (batch_index * non_batch_size));
-      break;
-    case kTfLiteInt8:
-      *data = (output_tensor->data.int8 + (batch_index * non_batch_size));
-      break;
-    case kTfLiteFloat16:
-      *data = (output_tensor->data.f16 + (batch_index * non_batch_size));
-      break;
-    case kTfLiteInt32:
-      *data = (output_tensor->data.i32 + (batch_index * non_batch_size));
-      break;
-    case kTfLiteInt64:
-      *data = (output_tensor->data.i64 + (batch_index * non_batch_size));
-      break;
-    default:
-      printf("Data type not yet supported\n");
-      return MLPERF_FAILURE;
-  }
-  return MLPERF_SUCCESS;
+void mlperf_backend_release_buffer(void *p) {
+  return pipeline->backend_release_buffer(p);
+}
+
+#ifdef __cplusplus
 }
+#endif  // __cplusplus
diff --git a/mobile_back_qti/.gitignore b/mobile_back_qti/.gitignore
index 3e74de154..96cd036db 100644
--- a/mobile_back_qti/.gitignore
+++ b/mobile_back_qti/.gitignore
@@ -1 +1,4 @@
-qairt
\ No newline at end of file
+snpe-*
+qaisw-*
+qairt
+cpp/backend_qti/StableDiffusionShared/include/opencv
diff --git a/mobile_back_qti/BUILD b/mobile_back_qti/BUILD
index 7ae9fec89..661b866b1 100644
--- a/mobile_back_qti/BUILD
+++ b/mobile_back_qti/BUILD
@@ -28,11 +28,11 @@ config_setting(
 cc_import(
     name = "snpewindowslib",
     hdrs = glob([
-        SNPE_VERSION + "/include/SNPE/**/*.hpp",
-        SNPE_VERSION + "/include/SNPE/**/*.h",
+        "qairt/" + SNPE_VERSION + "/include/SNPE/**/*.hpp",
+        "qairt/" + SNPE_VERSION + "/include/SNPE/**/*.h",
     ]),
-    interface_library = SNPE_VERSION + "/lib/aarch64-windows-msvc/SNPE.lib",
-    shared_library = SNPE_VERSION + "/lib/aarch64-windows-msvc/SNPE.dll",
+    interface_library = "qairt/" + SNPE_VERSION + "/lib/aarch64-windows-msvc/SNPE.lib",
+    shared_library = "qairt/" + SNPE_VERSION + "/lib/aarch64-windows-msvc/SNPE.dll",
 )
 
 cc_library(
@@ -49,30 +49,39 @@ cc_library(
 cc_library(
     name = "snpe",
     srcs = [
-        SNPE_VERSION + "/lib/aarch64-android/libSNPE.so",
+        "qairt/" + SNPE_VERSION + "/lib/aarch64-android/libSNPE.so",
     ],
     hdrs = glob([
-        SNPE_VERSION + "/include/SNPE/**/*.hpp",
-        SNPE_VERSION + "/include/SNPE/**/*.h",
+        "qairt/" + SNPE_VERSION + "/include/SNPE/**/*.hpp",
+        "qairt/" + SNPE_VERSION + "/include/SNPE/**/*.h",
+        "qairt/" + SNPE_VERSION + "/include/QNN/**/*.h",
+        "qairt/" + SNPE_VERSION + "/include/QNN/*.h",
     ]),
-    copts = ["-I" + SNPE_VERSION + "/include/SNPE"],
+    copts = [
+        "-I" + "qairt/" + SNPE_VERSION + "/include/SNPE",
+        "-I" + SNPE_VERSION + "/include/QNN",
+    ],
     visibility = ["//visibility:public"],
 )
 
 cc_library(
     name = "snpe_deps",
     srcs = [
-        SNPE_VERSION + "/lib/aarch64-android/libhta.so",
-        SNPE_VERSION + "/lib/aarch64-android/libSnpeHtpV75Stub.so",
-        SNPE_VERSION + "/lib/aarch64-android/libSnpeHtpV73Stub.so",
-        SNPE_VERSION + "/lib/aarch64-android/libSnpeHtpV69Stub.so",
-        SNPE_VERSION + "/lib/aarch64-android/libSnpeHtpV68Stub.so",
-        SNPE_VERSION + "/lib/aarch64-android/libSNPE.so",
-        SNPE_VERSION + "/lib/aarch64-android/libSnpeHtpPrepare.so",
-        SNPE_VERSION + "/lib/hexagon-v68/unsigned/libSnpeHtpV68Skel.so",
-        SNPE_VERSION + "/lib/hexagon-v69/unsigned/libSnpeHtpV69Skel.so",
-        SNPE_VERSION + "/lib/hexagon-v73/unsigned/libSnpeHtpV73Skel.so",
-        SNPE_VERSION + "/lib/hexagon-v75/unsigned/libSnpeHtpV75Skel.so",
+        "qairt/" + SNPE_VERSION + "/lib/aarch64-android/libhta.so",
+        "qairt/" + SNPE_VERSION + "/lib/aarch64-android/libSnpeHtpV75Stub.so",
+        "qairt/" + SNPE_VERSION + "/lib/aarch64-android/libSnpeHtpV73Stub.so",
+        "qairt/" + SNPE_VERSION + "/lib/aarch64-android/libSnpeHtpV69Stub.so",
+        "qairt/" + SNPE_VERSION + "/lib/aarch64-android/libSnpeHtpV68Stub.so",
+        "qairt/" + SNPE_VERSION + "/lib/aarch64-android/libSNPE.so",
+        "qairt/" + SNPE_VERSION + "/lib/aarch64-android/libSnpeHtpPrepare.so",
+        "qairt/" + SNPE_VERSION + "/lib/hexagon-v68/unsigned/libSnpeHtpV68Skel.so",
+        "qairt/" + SNPE_VERSION + "/lib/hexagon-v69/unsigned/libSnpeHtpV69Skel.so",
+        "qairt/" + SNPE_VERSION + "/lib/hexagon-v73/unsigned/libSnpeHtpV73Skel.so",
+        "qairt/" + SNPE_VERSION + "/lib/hexagon-v75/unsigned/libSnpeHtpV75Skel.so",
+        "qairt/" + SNPE_VERSION + "/lib/aarch64-android/libQnnHtpV73Stub.so",
+        "qairt/" + SNPE_VERSION + "/lib/aarch64-android/libQnnHtp.so",
+        "qairt/" + SNPE_VERSION + "/lib/aarch64-android/libQnnHtpPrepare.so",
+        "qairt/" + SNPE_VERSION + "/lib/hexagon-v73/unsigned/libQnnHtpV73Skel.so",
     ],
 )
 
diff --git a/mobile_back_qti/DLC/Makefile b/mobile_back_qti/DLC/Makefile
index 73b0de98b..a70bcf602 100644
--- a/mobile_back_qti/DLC/Makefile
+++ b/mobile_back_qti/DLC/Makefile
@@ -32,11 +32,13 @@ ${DATASETS_OUT}/state/calibration.stamp:
 
 DLCBUILDDIR=${BUILDDIR}/DLC
 MODEL_BASE_PATH=${DLCBUILDDIR}/mobile
-MOBILENETEDGETPU_MODEL_PATH=${MODEL_BASE_PATH}/vision/mobilenet/models_and_code/checkpoints/float
 MOBILENETV4_MODEL_PATH=${MODEL_BASE_PATH}/vision/mobilenetV4/MobileNetV4-Conv-Large-saved-model
 MOBILEBERT_MODEL_PATH=${MODEL_BASE_PATH}/language/bert/models_and_code/checkpoints/quant/
 MOBILEMOSAIC_MODEL_PATH=${MODEL_BASE_PATH}/vision/mosaic/models_and_checkpoints/R4/
 SNUSR_MODEL_PATH = ${MODEL_BASE_PATH}/vision/edsr/models_and_checkpoints/checkpoints/f32b5/ckpt_qat/
+TEXTENCODER_MODEL_PATH = ${DLCBUILDDIR}/stable_diffusion/text_encoder/
+VAEDECODER_MODEL_PATH = ${DLCBUILDDIR}/stable_diffusion/vae_decoder/
+UNET_MODEL_PATH = ${DLCBUILDDIR}/stable_diffusion/unet/
 SNUSR_CALIBRATION_PATH=${MODEL_BASE_PATH}/calibration/OpenImages
 MLPERF_MODELS_PATH = ${DLCBUILDDIR}/mlperf_models/
 MOBILEDETSSDQAT_MODEL_PATH = ${DLCBUILDDIR}/mobile/vision/mobiledet/uint8/export_inference_graph/
@@ -52,9 +54,7 @@ $(info "Using normal docker")
 include load_normal_docker.mk
 endif
 
-htp-dlc: mobilenet_edgetpu \
-	mobilenet_edgetpu_O2 \
-	mobilenet_v4 \
+htp-dlc: mobilenet_v4 \
 	mobilenet_v4_O2 \
     mobiledet \
     mobiledet_O2 \
@@ -64,48 +64,30 @@ htp-dlc: mobilenet_edgetpu \
     mobilebert_O2 \
     snusr \
     snusr_O2 \
-    mobilenet_edgetpu_batched \
-    mobilenet_edgetpu_batched_O2 \
     mobilenet_v4_batched \
     mobilenet_v4_batched_O2
 
-ifeq ($(MAKECMDGOALS),$(filter $(MAKECMDGOALS),generate-apirec mobilenet_edgetpu_apirec mobilenet_edgetpu_batched_apirec \
+
+ifeq ($(MAKECMDGOALS),$(filter $(MAKECMDGOALS),generate-apirec \
 mobilenet_v4_apirec mobilenet_v4_batched_apirec mosaic_apirec mobilebert_apirec mobiledet_apirec snusr_apirec))
 include ../make/apirec.mk
 endif
 
-generate-apirec: mobilenet_edgetpu_apirec \
-	mobilenet_edgetpu_batched_apirec \
-	mobilenet_v4_apirec \
+generate-apirec: mobilenet_v4_apirec \
 	mobilenet_v4_batched_apirec \
 	mosaic_apirec \
 	mobiledet_apirec \
 	mobilebert_apirec \
 	snusr_apirec
 
-mobilenet_edgetpu_batched: \
-    ${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_htp_batched_3.stamp \
-    ${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_htp_batched_4.stamp \
-    ${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_htp_batched_8.stamp
-
-mobilenet_edgetpu_batched_O2: \
-    ${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_htp_batched_3_O2.stamp \
-    ${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_htp_batched_4_O2.stamp \
-    ${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_htp_batched_8_O2.stamp
-
-mobilenet_edgetpu_batched_apirec: \
-	${DLCBUILDDIR}/generate_mobilenet_edgetpu_batched_4_apirec \
-	${DLCBUILDDIR}/generate_mobilenet_edgetpu_batched_3_apirec \
-	${DLCBUILDDIR}/generate_mobilenet_edgetpu_batched_8_apirec
-
 mobilenet_v4_batched: \
-    ${DLCBUILDDIR}/mobilenet_v4_htp_batched_4.stamp \
+    ${DLCBUILDDIR}/mobilenet_v4_htp_batched_4.stamp
 
 mobilenet_v4_batched_O2: \
-    ${DLCBUILDDIR}/mobilenet_v4_htp_batched_4_O2.stamp \
+    ${DLCBUILDDIR}/mobilenet_v4_htp_batched_4_O2.stamp
 
 mobilenet_v4_batched_apirec: \
-	${DLCBUILDDIR}/generate_mobilenet_v4_batched_4_apirec \
+	${DLCBUILDDIR}/generate_mobilenet_v4_batched_4_apirec
 
 mosaic: \
 	${DLCBUILDDIR}/mobile_mosaic_htp.stamp
@@ -116,15 +98,6 @@ mosaic_O2: \
 mosaic_apirec: \
 	${DLCBUILDDIR}/generate_mosaic_apirec
 
-mobilenet_edgetpu: \
-	${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_htp.stamp
-
-mobilenet_edgetpu_O2: \
-    ${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_htp_O2.stamp
-
-mobilenet_edgetpu_apirec: \
-	${DLCBUILDDIR}/generate_mobilenet_edgetpu_apirec \
-
 mobilenet_v4: \
 	${DLCBUILDDIR}/mobilenet_v4_htp.stamp
 
@@ -132,7 +105,7 @@ mobilenet_v4_O2: \
     ${DLCBUILDDIR}/mobilenet_v4_htp_O2.stamp
 
 mobilenet_v4_apirec: \
-	${DLCBUILDDIR}/generate_mobilenet_v4_apirec \
+	${DLCBUILDDIR}/generate_mobilenet_v4_apirec
 
 mobiledet: \
 	${DLCBUILDDIR}/ssd_mobiledet_qat_htp.stamp
@@ -161,6 +134,21 @@ snusr_O2 : \
 snusr_apirec: \
 	${DLCBUILDDIR}/generate_snusr_apirec
 
+stable_diffusion_qnn: \
+	${DLCBUILDDIR}/sd_precompute_data.tar \
+	${DLCBUILDDIR}/text_encoder_qnn_context_binary_generator.stamp \
+	${DLCBUILDDIR}/vae_decoder_qnn_context_binary_generator.stamp \
+	${DLCBUILDDIR}/unet_qnn_context_binary_generator.stamp
+
+text_encoder: \
+	${DLCBUILDDIR}/text_encoder_qnn_context_binary_generator.stamp
+
+vae_decoder: \
+	${DLCBUILDDIR}/vae_decoder_qnn_context_binary_generator.stamp
+
+unet: \
+	${DLCBUILDDIR}/unet_qnn_context_binary_generator.stamp
+
 mlperf_models: \
 	${DLCBUILDDIR}/mlperf_models.stamp
 
@@ -183,94 +171,6 @@ ${DLCBUILDDIR}/mlperf_models.stamp:
 	(mkdir -p ${MLPERF_MODELS_PATH})
 	touch $@
 
-${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_float.dlc: \
-		${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \
-		${DLCBUILDDIR}/mobile/.stamp \
-		${DLCBUILDDIR}/mlperf_models.stamp
-	# Mobilenetedge TPU model conversion ....
-	# Float model
-	mkdir -p ${DLCBUILDDIR}
-	docker run \
-		-e PYTHONPATH=/snpe_sdk/lib/python \
-		-v ${SNPE_SDK}:/snpe_sdk \
-		-v ${MOBILENETEDGETPU_MODEL_PATH}:/models \
-		-v ${DLCBUILDDIR}:/output \
-		-u ${USERID}:${GROUPID} \
-		mlperf_dlc_prepare \
-		/snpe_sdk/bin/x86_64-linux-clang/snpe-tensorflow-to-dlc \
-			-i /models/frozen_graph_tf1x_transform.pb \
-			-d input "1,224,224,3" --out_node "MobilenetEdgeTPU/Predictions/Softmax" \
-			-o /output/mobilenet_edgetpu_224_1.0_float.dlc
-
-${DLCBUILDDIR}/imagenet/imagenet_image_224_list.txt: ${DLCBUILDDIR}/imagenet/state/quantdata_224.stamp
-	cat ${TOPDIR}/datasets/downloads/imagenet/cal_image_list_option_1.txt | sed "s!^!quantdata_224/!" | sed "s!JPEG!raw!" > $@
-
-${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_quant.stamp: \
-		${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \
-		${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_float.dlc \
-		${DLCBUILDDIR}/imagenet/imagenet_image_224_list.txt
-	# Quantization of MobilenetEdgeTPU DLC for HTP
-	docker run \
-		-e PYTHONPATH=/snpe_sdk/lib/python \
-		-e LD_LIBRARY_PATH=/snpe_sdk/lib/x86_64-linux-clang \
-		-v ${SNPE_SDK}:/snpe_sdk \
-		-v ${DLCBUILDDIR}:/output \
-		-v ${DLCBUILDDIR}/imagenet:/imagenet \
-		-w /imagenet \
-		-u ${USERID}:${GROUPID} \
-		mlperf_dlc_prepare \
-		/snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-quant \
-			--input_dlc=/output/mobilenet_edgetpu_224_1.0_float.dlc \
-			--input_list=imagenet_image_224_list.txt \
-			--output_dlc=/output/mobilenet_edgetpu_224_1.0_quant.dlc \
-	# Mobilenetedge TPU model conversion completed
-
-${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_htp.stamp: \
-		${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \
-		${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_quant.stamp \
-	# Offline prepare of MobilenetEdgeTPU DLC for HTP
-	docker run \
-		-e PYTHONPATH=/snpe_sdk/lib/python \
-		-e LD_LIBRARY_PATH=/snpe_sdk/lib/x86_64-linux-clang \
-		-v ${SNPE_SDK}:/snpe_sdk \
-		-v ${DLCBUILDDIR}:/output \
-		-v ${DLCBUILDDIR}/imagenet:/imagenet \
-		-w /imagenet \
-		-u ${USERID}:${GROUPID} \
-		mlperf_dlc_prepare \
-		/snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \
-			--input_dlc=/output/mobilenet_edgetpu_224_1.0_quant.dlc \
-			--output_dlc=/output/mobilenet_edgetpu_224_1.0_htp.dlc \
-			--optimization_level 3 \
-			--htp_socs=sm8650,sm8635,sm8350,sm7325,sm8450,sm8550,sm7475,sm7550,sc8380xp,sc8280x \
-                         --memorymapped_buffer_hint=true
-	# Mobilenetedge TPU model conversion completed
-	cp	${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_htp.dlc ${MLPERF_MODELS_PATH}
-	touch $@
-
-${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_htp_O2.stamp: \
-		${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \
-		${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_quant.stamp \
-	# Offline prepare of MobilenetEdgeTPU DLC for HTP
-	docker run \
-		-e PYTHONPATH=/snpe_sdk/lib/python \
-		-e LD_LIBRARY_PATH=/snpe_sdk/lib/x86_64-linux-clang \
-		-v ${SNPE_SDK}:/snpe_sdk \
-		-v ${DLCBUILDDIR}:/output \
-		-v ${DLCBUILDDIR}/imagenet:/imagenet \
-		-w /imagenet \
-		-u ${USERID}:${GROUPID} \
-		mlperf_dlc_prepare \
-		/snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \
-			--input_dlc=/output/mobilenet_edgetpu_224_1.0_quant.dlc \
-			--output_dlc=/output/mobilenet_edgetpu_224_1.0_htp_O2.dlc \
-			--optimization_level 2 \
-			--htp_socs=sm8650,sm8635,sm8350,sm7325,sm8450,sm8550,sm7475,sm7550,sc8380xp,sc8280x \
-                         --memorymapped_buffer_hint=true
-	# Mobilenetedge TPU model conversion completed
-	cp	${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_htp_O2.dlc ${MLPERF_MODELS_PATH}
-	touch $@
-
 ${DLCBUILDDIR}/mobilenet_v4_float.dlc: \
 		${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \
 		${DLCBUILDDIR}/mobile/.stamp \
@@ -327,6 +227,7 @@ ${DLCBUILDDIR}/mobilenet_v4_htp.stamp: \
 		-u ${USERID}:${GROUPID} \
 		mlperf_dlc_prepare \
 		/snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \
+			--htp_dlbc=true \
 			--input_dlc=/output/mobilenet_v4_quant.dlc \
 			--output_dlc=/output/mobilenet_v4_htp.dlc \
 			--optimization_level 3 \
@@ -350,6 +251,7 @@ ${DLCBUILDDIR}/mobilenet_v4_htp_O2.stamp: \
 		-u ${USERID}:${GROUPID} \
 		mlperf_dlc_prepare \
 		/snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \
+			--htp_dlbc=true \
 			--input_dlc=/output/mobilenet_v4_quant.dlc \
 			--output_dlc=/output/mobilenet_v4_htp_O2.dlc \
 			--optimization_level 2 \
@@ -359,249 +261,6 @@ ${DLCBUILDDIR}/mobilenet_v4_htp_O2.stamp: \
 	cp	${DLCBUILDDIR}/mobilenet_v4_htp_O2.dlc ${MLPERF_MODELS_PATH}
 	touch $@
 
-${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_float_batched_3.dlc: \
-		${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \
-		${DLCBUILDDIR}/mobile/.stamp \
-		${DLCBUILDDIR}/mlperf_models.stamp
-	# Mobilenetedge TPU model conversion ....
-	# Batched Float model
-	mkdir -p ${DLCBUILDDIR}
-	docker run \
-		-e PYTHONPATH=/snpe_sdk/lib/python \
-		-v ${SNPE_SDK}:/snpe_sdk \
-		-v ${MOBILENETEDGETPU_MODEL_PATH}:/models \
-		-v ${DLCBUILDDIR}:/output \
-		-u ${USERID}:${GROUPID} \
-		mlperf_dlc_prepare \
-		/snpe_sdk/bin/x86_64-linux-clang/snpe-tensorflow-to-dlc \
-			-i /models/frozen_graph_tf1x_transform.pb \
-			-d input "3,224,224,3" --out_node "MobilenetEdgeTPU/Predictions/Softmax" \
-			-o /output/mobilenet_edgetpu_224_1.0_float_batched_3.dlc
-
-${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_quant_batched_3.stamp: \
-		${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \
-		${DLCBUILDDIR}/imagenet/imagenet_image_224_list.txt \
-		${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_float_batched_3.dlc
-	# Quantization of MobilenetEdgeTPU Batched DLC for HTP
-	docker run \
-		-e PYTHONPATH=/snpe_sdk/lib/python \
-		-e LD_LIBRARY_PATH=/snpe_sdk/lib/x86_64-linux-clang \
-		-v ${SNPE_SDK}:/snpe_sdk \
-		-v ${DLCBUILDDIR}:/output \
-		-v ${DLCBUILDDIR}/imagenet:/imagenet-out \
-		-w /imagenet-out \
-		-u ${USERID}:${GROUPID} \
-		mlperf_dlc_prepare \
-		/snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-quant \
-			--input_dlc=/output/mobilenet_edgetpu_224_1.0_float_batched_3.dlc \
-			--input_list=imagenet_image_224_list.txt \
-			--output_dlc=/output/mobilenet_edgetpu_224_1.0_quant_batched_3.dlc
-	# Mobilenetedge TPU model conversion completed
-
-${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_htp_batched_3.stamp: \
-		${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \
-		${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_quant_batched_3.stamp
-	# Offline prepare of MobilenetEdgeTPU Batched DLC for HTP
-	docker run \
-		-e PYTHONPATH=/snpe_sdk/lib/python \
-		-e LD_LIBRARY_PATH=/snpe_sdk/lib/x86_64-linux-clang \
-		-v ${SNPE_SDK}:/snpe_sdk \
-		-v ${DLCBUILDDIR}:/output \
-		-u ${USERID}:${GROUPID} \
-		mlperf_dlc_prepare \
-		/snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \
-			--input_dlc=/output/mobilenet_edgetpu_224_1.0_quant_batched_3.dlc \
-			--output_dlc=/output/mobilenet_edgetpu_224_1.0_htp_batched_3.dlc \
-			--optimization_level 3 \
-			--htp_socs=sm8650,sm8635,sm8350,sm7325,sm8450,sm8550,sm7475,sm7550,sc8380xp,sc8280x \
-                         --memorymapped_buffer_hint=true
-	# Mobilenetedge TPU model conversion completed
-	cp	${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_htp_batched_3.dlc ${MLPERF_MODELS_PATH}
-	touch $@
-
-${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_htp_batched_3_O2.stamp: \
-		${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \
-		${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_quant_batched_3.stamp
-	# Offline prepare of MobilenetEdgeTPU Batched DLC for HTP
-	docker run \
-		-e PYTHONPATH=/snpe_sdk/lib/python \
-		-e LD_LIBRARY_PATH=/snpe_sdk/lib/x86_64-linux-clang \
-		-v ${SNPE_SDK}:/snpe_sdk \
-		-v ${DLCBUILDDIR}:/output \
-		-u ${USERID}:${GROUPID} \
-		mlperf_dlc_prepare \
-		/snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \
-			--input_dlc=/output/mobilenet_edgetpu_224_1.0_quant_batched_3.dlc \
-			--output_dlc=/output/mobilenet_edgetpu_224_1.0_htp_batched_3_O2.dlc \
-			--optimization_level 2 \
-			--htp_socs=sm8650,sm8635,sm8350,sm7325,sm8450,sm8550,sm7475,sm7550,sc8380xp,sc8280x \
-                         --memorymapped_buffer_hint=true
-	# Mobilenetedge TPU model conversion completed
-	cp	${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_htp_batched_3_O2.dlc ${MLPERF_MODELS_PATH}
-	touch $@
-
-${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_float_batched_4.dlc: \
-		${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \
-		${DLCBUILDDIR}/mobile/.stamp \
-		${DLCBUILDDIR}/mlperf_models.stamp
-	# Mobilenetedge TPU model conversion ....
-	# Batched Float model
-	mkdir -p ${DLCBUILDDIR}
-	docker run \
-		-e PYTHONPATH=/snpe_sdk/lib/python \
-		-v ${SNPE_SDK}:/snpe_sdk \
-		-v ${MOBILENETEDGETPU_MODEL_PATH}:/models \
-		-v ${DLCBUILDDIR}:/output \
-		-u ${USERID}:${GROUPID} \
-		mlperf_dlc_prepare \
-		/snpe_sdk/bin/x86_64-linux-clang/snpe-tensorflow-to-dlc \
-			-i /models/frozen_graph_tf1x_transform.pb \
-			-d input "4,224,224,3" --out_node "MobilenetEdgeTPU/Predictions/Softmax" \
-			-o /output/mobilenet_edgetpu_224_1.0_float_batched_4.dlc
-
-${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_quant_batched_4.stamp: \
-		${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \
-		${DLCBUILDDIR}/imagenet/imagenet_image_224_list.txt \
-		${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_float_batched_4.dlc
-	# Quantization of MobilenetEdgeTPU Batched DLC for HTP
-	docker run \
-		-e PYTHONPATH=/snpe_sdk/lib/python \
-		-e LD_LIBRARY_PATH=/snpe_sdk/lib/x86_64-linux-clang \
-		-v ${SNPE_SDK}:/snpe_sdk \
-		-v ${DLCBUILDDIR}:/output \
-		-v ${DLCBUILDDIR}/imagenet:/imagenet-out \
-		-w /imagenet-out \
-		-u ${USERID}:${GROUPID} \
-		mlperf_dlc_prepare \
-		/snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-quant \
-			--input_dlc=/output/mobilenet_edgetpu_224_1.0_float_batched_4.dlc \
-			--input_list=imagenet_image_224_list.txt \
-			--output_dlc=/output/mobilenet_edgetpu_224_1.0_quant_batched_4.dlc
-	# Mobilenetedge TPU model conversion completed
-
-${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_htp_batched_4.stamp: \
-		${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \
-		${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_quant_batched_4.stamp
-	# Offline prepare of MobilenetEdgeTPU Batched DLC for HTP
-	docker run \
-		-e PYTHONPATH=/snpe_sdk/lib/python \
-		-e LD_LIBRARY_PATH=/snpe_sdk/lib/x86_64-linux-clang \
-		-v ${SNPE_SDK}:/snpe_sdk \
-		-v ${DLCBUILDDIR}:/output \
-		-u ${USERID}:${GROUPID} \
-		mlperf_dlc_prepare \
-		/snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \
-			--input_dlc=/output/mobilenet_edgetpu_224_1.0_quant_batched_4.dlc \
-			--output_dlc=/output/mobilenet_edgetpu_224_1.0_htp_batched_4.dlc \
-			--optimization_level 3 \
-			--htp_socs=sm8650,sm8635,sm8350,sm7325,sm8450,sm8550,sm7475,sm7550,sc8380xp,sc8280x \
-                         --memorymapped_buffer_hint=true
-	# Mobilenetedge TPU model conversion completed
-	cp	${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_htp_batched_4.dlc ${MLPERF_MODELS_PATH}
-	touch $@
-
-${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_htp_batched_4_O2.stamp: \
-		${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \
-		${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_quant_batched_4.stamp
-	# Offline prepare of MobilenetEdgeTPU Batched DLC for HTP
-	docker run \
-		-e PYTHONPATH=/snpe_sdk/lib/python \
-		-e LD_LIBRARY_PATH=/snpe_sdk/lib/x86_64-linux-clang \
-		-v ${SNPE_SDK}:/snpe_sdk \
-		-v ${DLCBUILDDIR}:/output \
-		-u ${USERID}:${GROUPID} \
-		mlperf_dlc_prepare \
-		/snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \
-			--input_dlc=/output/mobilenet_edgetpu_224_1.0_quant_batched_4.dlc \
-			--output_dlc=/output/mobilenet_edgetpu_224_1.0_htp_batched_4_O2.dlc \
-			--optimization_level 2 \
-			--htp_socs=sm8650,sm8635,sm8350,sm7325,sm8450,sm8550,sm7475,sm7550,sc8380xp,sc8280x \
-                         --memorymapped_buffer_hint=true
-	# Mobilenetedge TPU model conversion completed
-	cp	${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_htp_batched_4_O2.dlc ${MLPERF_MODELS_PATH}
-	touch $@
-
-${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_float_batched_8.dlc: \
-		${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \
-		${DLCBUILDDIR}/mobile/.stamp \
-		${DLCBUILDDIR}/mlperf_models.stamp
-	# Mobilenetedge TPU model conversion ....
-	# Batched Float model
-	mkdir -p ${DLCBUILDDIR}
-	docker run \
-		-e PYTHONPATH=/snpe_sdk/lib/python \
-		-v ${SNPE_SDK}:/snpe_sdk \
-		-v ${MOBILENETEDGETPU_MODEL_PATH}:/models \
-		-v ${DLCBUILDDIR}:/output \
-		-u ${USERID}:${GROUPID} \
-		mlperf_dlc_prepare \
-		/snpe_sdk/bin/x86_64-linux-clang/snpe-tensorflow-to-dlc \
-			-i /models/frozen_graph_tf1x_transform.pb \
-			-d input "8,224,224,3" --out_node "MobilenetEdgeTPU/Predictions/Softmax" \
-			-o /output/mobilenet_edgetpu_224_1.0_float_batched_8.dlc
-
-${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_quant_batched_8.stamp: \
-		${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \
-		${DLCBUILDDIR}/imagenet/imagenet_image_224_list.txt \
-		${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_float_batched_8.dlc
-	# Quantization of MobilenetEdgeTPU Batched DLC for HTP
-	docker run \
-		-e PYTHONPATH=/snpe_sdk/lib/python \
-		-e LD_LIBRARY_PATH=/snpe_sdk/lib/x86_64-linux-clang \
-		-v ${SNPE_SDK}:/snpe_sdk \
-		-v ${DLCBUILDDIR}:/output \
-		-v ${DLCBUILDDIR}/imagenet:/imagenet-out \
-		-w /imagenet-out \
-		-u ${USERID}:${GROUPID} \
-		mlperf_dlc_prepare \
-		/snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-quant \
-			--input_dlc=/output/mobilenet_edgetpu_224_1.0_float_batched_8.dlc \
-			--input_list=imagenet_image_224_list.txt \
-			--output_dlc=/output/mobilenet_edgetpu_224_1.0_quant_batched_8.dlc
-	# Mobilenetedge TPU model conversion completed
-
-${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_htp_batched_8.stamp: \
-		${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \
-		${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_quant_batched_8.stamp
-	# Offline prepare of MobilenetEdgeTPU Batched DLC for HTP
-	docker run \
-		-e PYTHONPATH=/snpe_sdk/lib/python \
-		-e LD_LIBRARY_PATH=/snpe_sdk/lib/x86_64-linux-clang \
-		-v ${SNPE_SDK}:/snpe_sdk \
-		-v ${DLCBUILDDIR}:/output \
-		-u ${USERID}:${GROUPID} \
-		mlperf_dlc_prepare \
-		/snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \
-			--input_dlc=/output/mobilenet_edgetpu_224_1.0_quant_batched_8.dlc \
-			--output_dlc=/output/mobilenet_edgetpu_224_1.0_htp_batched_8.dlc \
-			--optimization_level 3 \
-			--htp_socs=sm8650,sm8635,sm8350,sm7325,sm8450,sm8550,sm7475,sm7550,sc8380xp,sc8280x \
-                         --memorymapped_buffer_hint=true
-	# Mobilenetedge TPU model conversion completed
-	cp	${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_htp_batched_8.dlc ${MLPERF_MODELS_PATH}
-	touch $@
-
-${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_htp_batched_8_O2.stamp: \
-		${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \
-		${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_quant_batched_8.stamp
-	# Offline prepare of MobilenetEdgeTPU Batched DLC for HTP
-	docker run \
-		-e PYTHONPATH=/snpe_sdk/lib/python \
-		-e LD_LIBRARY_PATH=/snpe_sdk/lib/x86_64-linux-clang \
-		-v ${SNPE_SDK}:/snpe_sdk \
-		-v ${DLCBUILDDIR}:/output \
-		-u ${USERID}:${GROUPID} \
-		mlperf_dlc_prepare \
-		/snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \
-			--input_dlc=/output/mobilenet_edgetpu_224_1.0_quant_batched_8.dlc \
-			--output_dlc=/output/mobilenet_edgetpu_224_1.0_htp_batched_8_O2.dlc \
-			--optimization_level 2 \
-			--htp_socs=sm8650,sm8635,sm8350,sm7325,sm8450,sm8550,sm7475,sm7550,sc8380xp,sc8280x \
-                         --memorymapped_buffer_hint=true
-	# Mobilenetedge TPU model conversion completed
-	cp	${DLCBUILDDIR}/mobilenet_edgetpu_224_1.0_htp_batched_8_O2.dlc ${MLPERF_MODELS_PATH}
-	touch $@
-
 ${DLCBUILDDIR}/mobilenet_v4_float_batched_4.dlc: \
 		${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \
 		${DLCBUILDDIR}/mobile/.stamp \
@@ -742,9 +401,10 @@ ${DLCBUILDDIR}/ssd_mobiledet_qat_htp.stamp: \
 		-u ${USERID}:${GROUPID} \
 		mlperf_dlc_prepare \
 		/snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \
+            --htp_dlbc=true \
 			--input_dlc=/output/ssd_mobiledet_qat_quant.dlc \
 			--output_dlc=/output/ssd_mobiledet_qat_htp.dlc \
-			--set_output_layers=Postprocessor/BatchMultiClassNonMaxSuppression \
+			--set_output_tensors="Postprocessor/BatchMultiClassNonMaxSuppression_classes,Postprocessor/BatchMultiClassNonMaxSuppression_num_detections,detection_scores:0,detection_boxes:0"\
 			--optimization_level 3 \
 			--htp_socs=sm8650,sm8635,sm8350,sm7325,sm8450,sm8550,sm7475,sm7550,sc8380xp,sc8280x \
                          --memorymapped_buffer_hint=true
@@ -766,7 +426,7 @@ ${DLCBUILDDIR}/ssd_mobiledet_qat_htp_O2.stamp: \
 		/snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \
 			--input_dlc=/output/ssd_mobiledet_qat_quant.dlc \
 			--output_dlc=/output/ssd_mobiledet_qat_htp_O2.dlc \
-			--set_output_layers=Postprocessor/BatchMultiClassNonMaxSuppression \
+			--set_output_tensors="Postprocessor/BatchMultiClassNonMaxSuppression_classes,Postprocessor/BatchMultiClassNonMaxSuppression_num_detections,detection_scores:0,detection_boxes:0"\
 			--optimization_level 2 \
 			--htp_socs=sm8650,sm8635,sm8350,sm7325,sm8450,sm8550,sm7475,sm7550,sc8380xp,sc8280x \
                          --memorymapped_buffer_hint=true
@@ -792,7 +452,7 @@ ${DLCBUILDDIR}/mobilebert_quant.pb: \
 		-v ${MOBILEBERT_MODEL_PATH}:/models \
 		-u ${USERID}:${GROUPID} \
 		mlperf_dlc_prepare \
-		python3 /usr/local/lib/python3.8/dist-packages/tensorflow/python/tools/freeze_graph.py \
+		python3 /usr/local/lib/python3.10/dist-packages/tensorflow/python/tools/freeze_graph.py \
 			--input_graph=/models/saved_model.pb --input_checkpoint=/models/checkpoints/quant \
 			--output_graph=/output/mobilebert_quant.pb \
 			--output_node_names=end_logits,start_logits \
@@ -864,9 +524,10 @@ ${DLCBUILDDIR}/mobilebert_htp.stamp: \
 		-u ${USERID}:${GROUPID} \
 		mlperf_dlc_prepare \
 		/snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \
+            --htp_dlbc=true \
 			--input_dlc=/output/mobilebert_quant.dlc \
 			--output_dlc=/output/mobilebert_quantized_htp.dlc \
-			--set_output_layers=transpose \
+			--set_output_tensors="transpose:0" \
 			--optimization_level 3 \
 			--htp_socs=sm8650,sm8635,sm7325,sm8350,sm8450,sm8550,sm7475,sm7550,sc8380xp,sc8280x \
 			--memorymapped_buffer_hint=true
@@ -886,9 +547,10 @@ ${DLCBUILDDIR}/mobilebert_htp_O2.stamp: \
 		-u ${USERID}:${GROUPID} \
 		mlperf_dlc_prepare \
 		/snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \
+            --htp_dlbc=true \
 			--input_dlc=/output/mobilebert_quant.dlc \
 			--output_dlc=/output/mobilebert_quantized_htp_O2.dlc \
-			--set_output_layers=transpose \
+			--set_output_tensors="transpose:0" \
 			--optimization_level 2 \
 			--htp_socs=sm8650,sm8635,sm7325,sm8350,sm8450,sm8550,sm7475,sm7550,sc8380xp,sc8280x \
 			--memorymapped_buffer_hint=true
@@ -949,6 +611,7 @@ ${DLCBUILDDIR}/mobile_mosaic_htp.stamp: \
 		-u ${USERID}:${GROUPID} \
 		mlperf_dlc_prepare \
 		/snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \
+            --htp_dlbc=true \
 			--input_dlc=/output/mobile_mosaic_quant.dlc \
 			--output_dlc=/output/mobile_mosaic_htp.dlc \
 			--optimization_level 3 \
@@ -1036,6 +699,7 @@ ${DLCBUILDDIR}/snusr_htp.stamp: \
 		-u ${USERID}:${GROUPID} \
 		mlperf_dlc_prepare \
 		/snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-graph-prepare \
+            --htp_dlbc=true \
 			--input_dlc=/output/snusr_quant.dlc \
 			--output_dlc=/output/snusr_htp.dlc \
 			--optimization_level 3 \
@@ -1067,6 +731,240 @@ ${DLCBUILDDIR}/snusr_htp_O2.stamp: \
 	cp	${DLCBUILDDIR}/snusr_htp_O2.dlc ${MLPERF_MODELS_PATH}
 	touch $@
 
+${DLCBUILDDIR}/sd_precompute_data.tar: \
+		${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \
+		${DLCBUILDDIR}/mobile/.stamp \
+	# Preparing sd_precompute_data.tar
+	docker run \
+		-v ${SNPE_SDK}:/qnn_sdk \
+		-v ${TOPDIR}/mobile_back_qti/DLC/util/:/util \
+        -e PYTHONPATH=/qnn_sdk/lib/python \
+		-e LD_LIBRARY_PATH=/usr/local/clang-9.0.0/lib:qnn_sdk/lib/x86_64-linux-clang:${LD_LIBRARY_PATH} \
+		-v ${DLCBUILDDIR}:/output \
+        -v ${DLCBUILDDIR}/stable_diffusion/:/stable_diffusion \
+        -w /stable_diffusion \
+		mlperf_dlc_prepare \
+		python3 /util/StableDiffusion/flatten.py \
+			--random_latent_init /stable_diffusion/random_latent_init/random_init_1.pkl \
+            --time_step_embedding /stable_diffusion/time_step_embeddings/unet_time_step_embeddings_20.pkl \
+            --time_step_list /stable_diffusion/scheduler/scheduler_time_steps_20.pkl \
+            --unconditional_text_emb /stable_diffusion/unconditional_text_emb.pkl \
+            --dumpdir /stable_diffusion/.
+	mkdir -p ${MLPERF_MODELS_PATH}/stable_diffusion
+	chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion
+	cp ${DLCBUILDDIR}/stable_diffusion/sd_precompute_data.tar ${MLPERF_MODELS_PATH}/stable_diffusion/.
+	cp ${DLCBUILDDIR}/stable_diffusion/scheduler/lambdas.bin ${MLPERF_MODELS_PATH}/stable_diffusion/.
+	cp ${DLCBUILDDIR}/stable_diffusion/scheduler/betas.bin ${MLPERF_MODELS_PATH}/stable_diffusion/.
+	chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion
+
+${DLCBUILDDIR}/text_encoder_qnn.cpp: \
+		${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \
+		${DLCBUILDDIR}/mobile/.stamp \
+		${DLCBUILDDIR}/mlperf_models.stamp
+	# TEXT-ENCODER conversion and quantization
+    # cpp & bin files
+	docker run \
+		-v ${SNPE_SDK}:/qnn_sdk \
+		-v ${TEXTENCODER_MODEL_PATH}:/models \
+        -e PYTHONPATH=/qnn_sdk/lib/python \
+		-e LD_LIBRARY_PATH=/usr/local/clang-9.0.0/lib:qnn_sdk/lib/x86_64-linux-clang:${LD_LIBRARY_PATH} \
+		-v ${DLCBUILDDIR}:/output \
+        -v ${DLCBUILDDIR}/stable_diffusion/:/stable_diffusion \
+        -w /stable_diffusion/text_encoder \
+        -v ${TOPDIR}:/dir \
+		mlperf_dlc_prepare \
+		/qnn_sdk/bin/x86_64-linux-clang/qnn-onnx-converter \
+			--input_network text_encoder_onnx/text_encoder.onnx \
+			--input_list stable_diffusion_models/text_encoder_onnx/text_encoder_input_list.txt \
+			--act_bitwidth 16 \
+			--bias_bitwidth 32 \
+			--quantization_overrides text_encoder_onnx/text_encoder.encodings \
+			--output_path /output/text_encoder.cpp
+
+${DLCBUILDDIR}/text_encoder_qnn_model_generator.stamp: \
+		${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \
+		${DLCBUILDDIR}/mobile/.stamp \
+		${DLCBUILDDIR}/text_encoder_qnn.cpp
+	# TEXT-ENCODER lib generation started
+	docker run \
+    	-v ${SNPE_SDK}:/qnn_sdk \
+        -e PYTHONPATH=/qnn_sdk/lib/python \
+    	-e LD_LIBRARY_PATH=/usr/local/clang-9.0.0/lib:qnn_sdk/lib/x86_64-linux-clang:${LD_LIBRARY_PATH} \
+    	-v ${DLCBUILDDIR}:/output \
+    	-u ${USERID}:${GROUPID} \
+        mlperf_dlc_prepare \
+		/qnn_sdk/bin/x86_64-linux-clang/qnn-model-lib-generator \
+			-c /output/text_encoder.cpp \
+			-b /output/text_encoder.bin \
+			-o /output/model_libs \
+			-t x86_64-linux-clang
+	# Text-encoder lib generation completed
+
+${DLCBUILDDIR}/text_encoder_qnn_context_binary_generator.stamp: \
+		${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \
+		${DLCBUILDDIR}/mobile/.stamp \
+		${DLCBUILDDIR}/text_encoder_qnn_model_generator.stamp
+	# TEXT-ENCODER context-binary generation started
+	docker run \
+    	-v ${SNPE_SDK}:/qnn_sdk \
+    	-v ${TEXTENCODER_MODEL_PATH}:/models \
+        -e PYTHONPATH=/qnn_sdk/lib/python \
+    	-e LD_LIBRARY_PATH=/usr/local/clang-9.0.0/lib:qnn_sdk/lib/x86_64-linux-clang:${LD_LIBRARY_PATH} \
+    	-v ${DLCBUILDDIR}:/output \
+    	-v ${DLCBUILDDIR}/stable_diffusion/:/stable_diffusion \
+    	-w /stable_diffusion/text_encoder \
+    	-u ${USERID}:${GROUPID} \
+        mlperf_dlc_prepare \
+		/qnn_sdk/bin/x86_64-linux-clang/qnn-context-binary-generator \
+			--backend /qnn_sdk/lib/x86_64-linux-clang/libQnnHtp.so \
+			--model /output/model_libs/x86_64-linux-clang/libtext_encoder.so \
+			--binary_file /output/text_encoder.serialized \
+			--config_file mcp_config.json
+	mkdir -p ${MLPERF_MODELS_PATH}/stable_diffusion
+	chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion
+	cp ${DLCBUILDDIR}/text_encoder.serialized.bin ${MLPERF_MODELS_PATH}/stable_diffusion/.
+	chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion
+	chmod -R 777 ${DLCBUILDDIR}/stable_diffusion
+	# TEXT-ENCODER context binary generation completed
+
+${DLCBUILDDIR}/vae_decoder_qnn.cpp: \
+		${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \
+		${DLCBUILDDIR}/mobile/.stamp \
+		${DLCBUILDDIR}/mlperf_models.stamp
+	# VAE-DECODER conversion and quantization
+    # cpp & bin files
+	docker run \
+		-v ${SNPE_SDK}:/qnn_sdk \
+		-v ${VAEDECODER_MODEL_PATH}:/models \
+        -e PYTHONPATH=/qnn_sdk/lib/python \
+		-e LD_LIBRARY_PATH=/usr/local/clang-9.0.0/lib:qnn_sdk/lib/x86_64-linux-clang:${LD_LIBRARY_PATH} \
+		-v ${DLCBUILDDIR}:/output \
+        -v ${DLCBUILDDIR}/stable_diffusion/:/stable_diffusion \
+        -w /stable_diffusion/vae_decoder \
+        -v ${TOPDIR}:/dir \
+		mlperf_dlc_prepare \
+		/qnn_sdk/bin/x86_64-linux-clang/qnn-onnx-converter \
+			--input_network vae_decoder_onnx/vae_decoder.onnx \
+			--input_list stable_diffusion_models/vae_decoder_onnx/vae_decoder_input_list.txt  \
+			--act_bitwidth 16 \
+			--bias_bitwidth 32 \
+			--quantization_overrides vae_decoder_onnx/vae_decoder.encodings \
+			--output_path /output/vae_decoder.cpp
+
+${DLCBUILDDIR}/vae_decoder_qnn_model_generator.stamp: \
+		${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \
+		${DLCBUILDDIR}/mobile/.stamp \
+		${DLCBUILDDIR}/vae_decoder_qnn.cpp
+	# VAE-DECODER lib generation started
+	docker run \
+    	-v ${SNPE_SDK}:/qnn_sdk \
+        -e PYTHONPATH=/qnn_sdk/lib/python \
+    	-e LD_LIBRARY_PATH=/usr/local/clang-9.0.0/lib:qnn_sdk/lib/x86_64-linux-clang:${LD_LIBRARY_PATH} \
+    	-v ${DLCBUILDDIR}:/output \
+    	-u ${USERID}:${GROUPID} \
+        mlperf_dlc_prepare \
+		/qnn_sdk/bin/x86_64-linux-clang/qnn-model-lib-generator \
+			-c /output/vae_decoder.cpp \
+			-b /output/vae_decoder.bin \
+			-o /output/model_libs \
+			-t x86_64-linux-clang
+	# vae-decoder lib generation completed
+
+${DLCBUILDDIR}/vae_decoder_qnn_context_binary_generator.stamp: \
+		${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \
+		${DLCBUILDDIR}/mobile/.stamp \
+		${DLCBUILDDIR}/vae_decoder_qnn_model_generator.stamp
+	# VAE-DECODER lib generation started
+	docker run \
+    	-v ${SNPE_SDK}:/qnn_sdk \
+    	-v ${VAEDECODER_MODEL_PATH}:/models \
+        -e PYTHONPATH=/qnn_sdk/lib/python \
+    	-e LD_LIBRARY_PATH=/usr/local/clang-9.0.0/lib:qnn_sdk/lib/x86_64-linux-clang:${LD_LIBRARY_PATH} \
+    	-v ${DLCBUILDDIR}:/output \
+    	-u ${USERID}:${GROUPID} \
+        mlperf_dlc_prepare \
+		/qnn_sdk/bin/x86_64-linux-clang/qnn-context-binary-generator \
+			--backend /qnn_sdk/lib/x86_64-linux-clang/libQnnHtp.so \
+			--model /output/model_libs/x86_64-linux-clang/libvae_decoder.so \
+			--binary_file /output/vae_decoder.serialized \
+			--config_file /models/mcp_config.json
+	mkdir -p ${MLPERF_MODELS_PATH}/stable_diffusion
+	chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion
+	cp ${DLCBUILDDIR}/vae_decoder.serialized.bin ${MLPERF_MODELS_PATH}/stable_diffusion/.
+	chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion
+	chmod -R 777 ${DLCBUILDDIR}/stable_diffusion
+	# VAE context binary generation completed
+
+${DLCBUILDDIR}/unet_qnn.cpp: \
+		${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \
+		${DLCBUILDDIR}/mobile/.stamp \
+		${DLCBUILDDIR}/mlperf_models.stamp
+	# UNET conversion and quantization
+    # cpp & bin files
+	docker run \
+		-v ${SNPE_SDK}:/qnn_sdk \
+		-v ${UNET_MODEL_PATH}:/models \
+        -e PYTHONPATH=/qnn_sdk/lib/python \
+		-e LD_LIBRARY_PATH=/usr/local/clang-9.0.0/lib:qnn_sdk/lib/x86_64-linux-clang:${LD_LIBRARY_PATH} \
+		-v ${DLCBUILDDIR}:/output \
+        -v ${DLCBUILDDIR}/stable_diffusion/:/stable_diffusion \
+        -w /stable_diffusion/unet \
+        -v ${TOPDIR}:/dir \
+		mlperf_dlc_prepare \
+		/qnn_sdk/bin/x86_64-linux-clang/qnn-onnx-converter \
+			--input_network unet_onnx_batch_1/unet.onnx \
+			-l input_3 NONTRIVIAL \
+			--input_list stable_diffusion_models/unet_onnx/unet_input_list.txt \
+			--act_bitwidth 16 \
+			--bias_bitwidth 32 \
+			--quantization_overrides unet_onnx_batch_1/unet.encodings \
+			--output_path /output/unet.cpp
+
+${DLCBUILDDIR}/unet_qnn_model_generator.stamp: \
+		${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \
+		${DLCBUILDDIR}/mobile/.stamp \
+		${DLCBUILDDIR}/unet_qnn.cpp
+	# UNET lib generation started
+	docker run \
+    	-v ${SNPE_SDK}:/qnn_sdk \
+        -e PYTHONPATH=/qnn_sdk/lib/python \
+    	-e LD_LIBRARY_PATH=/usr/local/clang-9.0.0/lib:qnn_sdk/lib/x86_64-linux-clang:${LD_LIBRARY_PATH} \
+    	-v ${DLCBUILDDIR}:/output \
+    	-u ${USERID}:${GROUPID} \
+        mlperf_dlc_prepare \
+		/qnn_sdk/bin/x86_64-linux-clang/qnn-model-lib-generator \
+			-c /output/unet.cpp \
+			-b /output/unet.bin \
+			-o /output/model_libs \
+			-t x86_64-linux-clang \
+			-t aarch64-android
+	# UNET lib generation completed
+
+${DLCBUILDDIR}/unet_qnn_context_binary_generator.stamp: \
+		${DLCBUILDDIR}/mlperf_dlc_prepare_docker.stamp \
+		${DLCBUILDDIR}/mobile/.stamp \
+		${DLCBUILDDIR}/unet_qnn_model_generator.stamp
+	# UNET context binary generation started
+	docker run \
+    	-v ${SNPE_SDK}:/qnn_sdk \
+    	-v ${UNET_MODEL_PATH}:/models \
+        -e PYTHONPATH=/qnn_sdk/lib/python \
+    	-e LD_LIBRARY_PATH=/usr/local/clang-9.0.0/lib:qnn_sdk/lib/x86_64-linux-clang:${LD_LIBRARY_PATH} \
+    	-v ${DLCBUILDDIR}:/output \
+    	-u ${USERID}:${GROUPID} \
+        mlperf_dlc_prepare \
+		/qnn_sdk/bin/x86_64-linux-clang/qnn-context-binary-generator \
+			--backend /qnn_sdk/lib/x86_64-linux-clang/libQnnHtp.so \
+			--model /output/model_libs/x86_64-linux-clang/libunet.so \
+			--binary_file /output/unet.serialized \
+			--config_file /models/mcp_config.json
+	mkdir -p ${MLPERF_MODELS_PATH}/stable_diffusion
+	chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion
+	cp ${DLCBUILDDIR}/unet.serialized.bin ${MLPERF_MODELS_PATH}/stable_diffusion/.
+	chmod -R 777 ${MLPERF_MODELS_PATH}/stable_diffusion
+	chmod -R 777 ${DLCBUILDDIR}/stable_diffusion
+	# UNET context binary generation completed
+
 ####################################################################################
 # CALIBRATION / QUANTIZATION
 ####################################################################################
@@ -1126,39 +1024,6 @@ ${DLCBUILDDIR}/ade20k/state/quantdata.stamp: \
 	mv ${DLCBUILDDIR}/ade20k/resized_raw ${DLCBUILDDIR}/ade20k/quantdata
 	touch $@
 
-# Imagenet 224x224 calibration data preprocessing
-${DLCBUILDDIR}/imagenet/state/resized_224.stamp: \
-		${DATASETS_OUT}/state/calibration.stamp \
-		${DLCBUILDDIR}/mlperf_mobile_docker_1_1.stamp
-	# Scaling Imagenet images to 224x224
-	rm -rf ${DLCBUILDDIR}/imagenet/resized_224
-	mkdir -p ${DLCBUILDDIR}/imagenet/resized_224
-	docker run \
-		-v ${TOPDIR}/datasets/util:/util \
-		-v ${CALIBRATION_DATA}/imagenet:/imagenet \
-		-v ${DLCBUILDDIR}:/output \
-		-u ${USERID}:${GROUPID} \
-		mlperf_mobile:1.1 \
-	/bin/bash -c "python3 /util/imagenet/Resize224.py /imagenet/images /output/imagenet/resized_224"
-	mkdir -p ${DLCBUILDDIR}/imagenet/state
-	touch $@
-
-${DLCBUILDDIR}/imagenet/state/quantdata_224.stamp: \
-		${DLCBUILDDIR}/imagenet/state/resized_224.stamp \
-		${DLCBUILDDIR}/mlperf_mobile_docker_1_1.stamp
-	# Generating Imagenet quantization data for SNPE
-	rm -rf ${DLCBUILDDIR}/imagenet/resized_224_raw
-	mkdir -p ${DLCBUILDDIR}/imagenet/resized_224_raw
-	docker run \
-		-v ${TOPDIR}/datasets/util:/util \
-		-v ${DLCBUILDDIR}/imagenet:/imagenet \
-		-u ${USERID}:${GROUPID} \
-		mlperf_mobile:1.1 \
-		/bin/bash -c "python3 /util/common/jpg_to_raw.py /imagenet/resized_224"
-	rm -rf ${DLCBUILDDIR}/imagenet/quantdata_224
-	mv ${DLCBUILDDIR}/imagenet/resized_224_raw ${DLCBUILDDIR}/imagenet/quantdata_224
-	touch $@
-
 # Imagenet 384x384 calibration data preprocessing
 ${DLCBUILDDIR}/imagenet/state/resized_384.stamp: \
 		${DATASETS_OUT}/state/calibration.stamp \
@@ -1246,12 +1111,8 @@ gen-htp-dlc-info: \
 		-u ${USERID}:${GROUPID} \
 		mlperf_dlc_prepare \
 		/bin/bash -c '\
-			/snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-info -i /dlc/mobilenet_edgetpu_224_1.0_htp.dlc && \
 			/snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-info -i /dlc/mobilenet_v4_htp.dlc && \
 			/snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-info -i /dlc/ssd_mobiledet_qat_htp.dlc && \
-			/snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-info -i /dlc/mobilenet_edgetpu_224_1.0_htp_batched_3.dlc && \
-			/snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-info -i /dlc/mobilenet_edgetpu_224_1.0_htp_batched_4.dlc && \
-            /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-info -i /dlc/mobilenet_edgetpu_224_1.0_htp_batched_8.dlc && \
             /snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-info -i /dlc/mobilenet_v4_htp_batched_4.dlc && \
 			/snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-info -i /dlc/mobile_mosaic_htp.dlc && \
 			/snpe_sdk/bin/x86_64-linux-clang/snpe-dlc-info -i /dlc/snusr_htp.dlc && \
@@ -1261,4 +1122,5 @@ gen-htp-dlc-info: \
 # Clean
 ####################################################################################
 clean:
-	rm -rf ${BUILDDIR}/DLC
\ No newline at end of file
+	rm -rf ${BUILDDIR}/DLC
+
diff --git a/mobile_back_qti/DLC/README.md b/mobile_back_qti/DLC/README.md
index d467d983e..e4a951579 100644
--- a/mobile_back_qti/DLC/README.md
+++ b/mobile_back_qti/DLC/README.md
@@ -1,6 +1,6 @@
 # DLC
 
-This Makefile will create the DLCs used by the QTI backend.
+This Makefile will create the DLCs / Bins used by the QTI backend.
 
 The DLCs are already checked into [Mobile Models](https://github.com/mlcommons/mobile_models)
 
diff --git a/mobile_back_qti/DLC/util/StableDiffusion/AIMET/Dockerfile b/mobile_back_qti/DLC/util/StableDiffusion/AIMET/Dockerfile
new file mode 100644
index 000000000..d7b9b2dae
--- /dev/null
+++ b/mobile_back_qti/DLC/util/StableDiffusion/AIMET/Dockerfile
@@ -0,0 +1,6 @@
+FROM artifacts.codelinaro.org/codelinaro-aimet/aimet-dev:1.29.0.torch-gpu
+
+RUN pip install --upgrade huggingface_hub
+RUN python3 -m pip install --quiet --upgrade jax ftfy diffusers==0.30.1 transformers==4.30.1 tokenizers==0.11.1 onnx==1.11.0 onnxsim==0.4.33 safetensors==0.3.3 accelerate
+RUN pip install --quiet --upgrade https://github.com/quic/aimet/releases/download/1.29.0/AimetCommon-torch_gpu_1.29.0-cp38-cp38-linux_x86_64.whl
+RUN pip install --quiet --upgrade https://github.com/quic/aimet/releases/download/1.29.0/AimetTorch-torch_gpu_1.29.0-cp38-cp38-linux_x86_64.whl
\ No newline at end of file
diff --git a/mobile_back_qti/DLC/util/StableDiffusion/AIMET/Makefile b/mobile_back_qti/DLC/util/StableDiffusion/AIMET/Makefile
new file mode 100644
index 000000000..8b2c218ba
--- /dev/null
+++ b/mobile_back_qti/DLC/util/StableDiffusion/AIMET/Makefile
@@ -0,0 +1,122 @@
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##########################################################################
+
+this_mkfile:=$(abspath $(lastword $(MAKEFILE_LIST)))
+AIMETBUILDDIR:=$(abspath $(shell dirname ${this_mkfile}))
+TOPDIR:=${AIMETBUILDDIR}/../../../../..
+
+.PHONY: clean aimet_calibration
+
+${AIMETBUILDDIR}/nvidia_docker_runtime.stamp: \
+	### Executing nvidia docker container
+	curl -fsSL https\://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
+    && curl -s -L https\://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
+	sed -i -e '/experimental/ s/^#//g' /etc/apt/sources.list.d/nvidia-container-toolkit.list
+	sudo apt-get update
+	sudo apt-get install -y nvidia-container-toolkit
+	sudo nvidia-ctk runtime configure --runtime=docker
+	sudo systemctl restart docker
+	touch $@
+
+aimet_calibration: \
+	clean \
+	${AIMETBUILDDIR}/aimet_calibration.stamp \
+	${AIMETBUILDDIR}/input_vectors_generation.stamp \
+	copy_files_to_ouptut \
+	copy_configs_to_ouptut
+
+${AIMETBUILDDIR}/aimet_torch_gpu_docker.stamp: \
+	## Building aimet-docker
+	docker image build -t aimet_torch_gpu_docker .
+	touch $@
+
+${AIMETBUILDDIR}/aimet_calibration.stamp: \
+		${AIMETBUILDDIR}/nvidia_docker_runtime.stamp \
+		${AIMETBUILDDIR}/aimet_torch_gpu_docker.stamp
+	# Executing docker instance
+	docker run \
+		-v /etc/localtime:/etc/localtime:ro \
+        -v /etc/timezone:/etc/timezone:ro \
+        -v ${AIMETBUILDDIR}:/tmp \
+        -w /tmp/example1 \
+		--network=host \
+		--ulimit core=-1 \
+		--ipc=host \
+		--shm-size=8G \
+		--cap-add=SYS_PTRACE \
+		--security-opt seccomp=unconfined \
+		aimet_torch_gpu_docker \
+		/bin/bash -c "cp -rv ../aimet.py . && python3 aimet.py"
+
+${AIMETBUILDDIR}/input_vectors_generation.stamp: \
+		${AIMETBUILDDIR}/aimet_torch_gpu_docker.stamp
+	# Creates input vectors to be passed during conversion
+	docker run \
+		-v /etc/localtime:/etc/localtime:ro \
+        -v /etc/timezone:/etc/timezone:ro \
+        -v ${AIMETBUILDDIR}:/tmp \
+        -w /tmp \
+		--network=host \
+		--ulimit core=-1 \
+		--ipc=host \
+		--shm-size=8G \
+		--cap-add=SYS_PTRACE \
+		--security-opt seccomp=unconfined \
+		aimet_torch_gpu_docker \
+		/bin/bash -c "python3 example2/generate_inputs.py --pickle_path example1/_exports_/fp32.npy --working_dir ."
+
+copy_files_to_ouptut:
+	#Copies generated artifacts to output/DLC/stable_diffusion
+	chmod -R 777 ${TOPDIR}/output/DLC/
+	mkdir -p ${TOPDIR}/output/DLC/stable_diffusion
+	cp -rv ${AIMETBUILDDIR}/example1/_exports_/* ${TOPDIR}/output/DLC/stable_diffusion/.
+	cp -rv ${AIMETBUILDDIR}/stable_diffusion_models ${TOPDIR}/output/DLC/stable_diffusion/.
+
+copy_configs_to_ouptut:
+	#Copies configs to output
+	mkdir -p ${TOPDIR}/output/DLC/stable_diffusion/text_encoder
+	mkdir -p ${TOPDIR}/output/DLC/stable_diffusion/vae_decoder
+	mkdir -p ${TOPDIR}/output/DLC/stable_diffusion/unet
+	mv ${TOPDIR}/output/DLC/stable_diffusion/text_encoder_onnx ${TOPDIR}/output/DLC/stable_diffusion/text_encoder/.
+	mv ${TOPDIR}/output/DLC/stable_diffusion/vae_decoder_onnx ${TOPDIR}/output/DLC/stable_diffusion/vae_decoder/.
+	mv ${TOPDIR}/output/DLC/stable_diffusion/unet_onnx_batch_1 ${TOPDIR}/output/DLC/stable_diffusion/unet/.
+	cp -rv ${AIMETBUILDDIR}/graph_config_text_encoder.json ${TOPDIR}/output/DLC/stable_diffusion/text_encoder/graph_config.json
+	cp -rv ${AIMETBUILDDIR}/mcp_config.json ${TOPDIR}/output/DLC/stable_diffusion/text_encoder/.
+	cp -rv ${AIMETBUILDDIR}/graph_config_vae.json ${TOPDIR}/output/DLC/stable_diffusion/vae_decoder/graph_config.json
+	cp -rv ${AIMETBUILDDIR}/mcp_config.json ${TOPDIR}/output/DLC/stable_diffusion/vae_decoder/.
+	cp -rv ${AIMETBUILDDIR}/graph_config_unet.json ${TOPDIR}/output/DLC/stable_diffusion/unet/graph_config.json
+	cp -rv ${AIMETBUILDDIR}/mcp_config.json ${TOPDIR}/output/DLC/stable_diffusion/unet/.
+	mkdir -p ${TOPDIR}/output/DLC/stable_diffusion/text_encoder/stable_diffusion_models
+	cp -rv ${AIMETBUILDDIR}/stable_diffusion_models/text_encoder_onnx ${TOPDIR}/output/DLC/stable_diffusion/text_encoder/stable_diffusion_models/.
+	mkdir -p ${TOPDIR}/output/DLC/stable_diffusion/vae_decoder/stable_diffusion_models
+	cp -rv ${AIMETBUILDDIR}/stable_diffusion_models/vae_decoder_onnx ${TOPDIR}/output/DLC/stable_diffusion/vae_decoder/stable_diffusion_models/.
+	mkdir -p ${TOPDIR}/output/DLC/stable_diffusion/unet/stable_diffusion_models
+	cp -rv ${AIMETBUILDDIR}/stable_diffusion_models/unet_onnx ${TOPDIR}/output/DLC/stable_diffusion/unet/stable_diffusion_models/.
+	chmod -R 777 ${TOPDIR}/output/DLC/stable_diffusion
+
+clean:
+	# Removes all generated outputs (except example1/_exports_) from AIMET and DLC/stable_diffusion
+	rm -rf ${AIMETBUILDDIR}/*.stamp
+	rm -rf ${AIMETBUILDDIR}/example1/*.png
+	rm -rf ${AIMETBUILDDIR}/stable_diffusion_models
+	rm -rf ${TOPDIR}/output/DLC/stable_diffusion/*
+
+deep_clean:
+	# Caution:: Removes all generated outputs (including example1/_exports_) from AIMET and DLC/stable_diffusion
+	rm -rf ${AIMETBUILDDIR}/*.stamp
+	rm -rf ${AIMETBUILDDIR}/example1/_exports_
+	rm -rf ${AIMETBUILDDIR}/example1/*.png
+	rm -rf ${AIMETBUILDDIR}/stable_diffusion_models
+	rm -rf ${TOPDIR}/output/DLC/stable_diffusion/*
\ No newline at end of file
diff --git a/mobile_back_qti/DLC/util/StableDiffusion/AIMET/aimet.py b/mobile_back_qti/DLC/util/StableDiffusion/AIMET/aimet.py
new file mode 100644
index 000000000..bf989b04c
--- /dev/null
+++ b/mobile_back_qti/DLC/util/StableDiffusion/AIMET/aimet.py
@@ -0,0 +1,193 @@
+# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##########################################################################
+
+from huggingface_hub.hf_api import HfFolder
+HfFolder.save_token('<Enter your hugging face token here>')
+
+print("##############Json import########################")
+import json
+from argparse import Namespace
+
+with open('config.json', 'rt') as f:
+    config = Namespace(**json.load(f))
+
+import sys
+import os
+sys.path.append('../')
+
+from utilities.nsptargets import NspTargets
+
+# Android GEN2 and GEN3 are supported for this notebook
+nsp_target = NspTargets.Android.GEN3
+
+# Select quantsim config based on target
+config.config_file = f'./quantsim_configs/htp_quantsim_config_{nsp_target.dsp_arch}.json'
+print(f"Using {config.config_file}")
+
+# Uncomment the cell below to sanity check the pipeline before doing a full run
+# os.environ['SANITY_CHECK_NOTEBOOK_FLOW'] = "True"
+
+if os.environ.get("SANITY_CHECK_NOTEBOOK_FLOW") == "True":
+    config.num_calibration_samples = 1
+    config.adaround_iter_text_encoder = 1
+    config.adaround_samples_text_encoder = 1
+    config.adaround_iter_unet = 1
+    config.adaround_samples_unet = 1
+    config.adaround_iter_vae = 1
+    config.adaround_samples_vae = 1
+
+
+
+print("##############Package import########################")
+import torch
+from redefined_modules.transformers.models.clip.modeling_clip import CLIPTextModel
+from redefined_modules.diffusers.models.unet_2d_condition import UNet2DConditionModel
+from redefined_modules.diffusers.models.vae import AutoencoderKLDecoder
+from diffusers import DPMSolverMultistepScheduler
+from transformers import CLIPTokenizer
+
+if config.stable_diffusion_variant == "1.5":
+    text_encoder_repo = 'benjamin-paine/stable-diffusion-v1-5'
+    text_encoder_subfolder = 'text_encoder'
+    text_encoder_revision = 'main'
+    unet_repo = 'benjamin-paine/stable-diffusion-v1-5'
+    unet_subfolder = 'unet'
+    unet_revision = 'main'
+    vae_repo = 'benjamin-paine/stable-diffusion-v1-5'
+    vae_subfolder = 'vae'
+    vae_revision = 'main'
+    tokenizer_repo = 'openai/clip-vit-large-patch14'
+    tokenizer_subfolder = ''
+    tokenizer_revision = 'main'
+elif config.stable_diffusion_variant == "2.1":
+    text_encoder_repo = "stabilityai/stable-diffusion-2-1-base"
+    text_encoder_subfolder = 'text_encoder'
+    text_encoder_revision = 'main'
+    unet_repo = "stabilityai/stable-diffusion-2-1-base"
+    unet_subfolder = 'unet'
+    unet_revision = 'main'
+    vae_repo = "stabilityai/stable-diffusion-2-1-base"
+    vae_subfolder = 'vae'
+    vae_revision = 'main'
+    tokenizer_repo = "stabilityai/stable-diffusion-2-1-base"
+    tokenizer_subfolder = 'tokenizer'
+    tokenizer_revision = 'main'
+else:
+    raise Exception(f"config.stable_diffusion_variant must be either '1.5' or '2.1', found {config.stable_diffusion_variant}")
+
+
+
+print("############## Hugging face pipeline initialization ########################")
+device = 'cuda'
+dtype = torch.half if config.half_precision else torch.float
+
+print("Loading pre-trained TextEncoder model")
+text_encoder = CLIPTextModel.from_pretrained(text_encoder_repo,
+                                             subfolder=text_encoder_subfolder, revision=text_encoder_revision,
+                                             torch_dtype=dtype, cache_dir=config.cache_dir).to(device)
+text_encoder.config.return_dict = False
+
+print("Loading pre-trained UNET model")
+unet = UNet2DConditionModel.from_pretrained(unet_repo,
+                                            subfolder=unet_subfolder, revision=unet_revision,
+                                            torch_dtype=dtype, cache_dir=config.cache_dir).to(device)
+
+print("Loading pre-trained VAE model")
+vae = AutoencoderKLDecoder.from_pretrained(vae_repo,
+                                           subfolder=vae_subfolder, revision=vae_revision,
+                                           torch_dtype=dtype, cache_dir=config.cache_dir).to(device)
+vae.config.return_dict = False
+
+print("Loading scheduler")
+scheduler = DPMSolverMultistepScheduler(beta_start=0.00085,
+                                        beta_end=0.012,
+                                        beta_schedule="scaled_linear",
+                                        num_train_timesteps=1000)
+scheduler.set_timesteps(config.diffusion_steps)
+scheduler.config.prediction_type = 'epsilon'
+
+print("Loading tokenizer")
+tokenizer = CLIPTokenizer.from_pretrained(tokenizer_repo,
+                                          subfolder=tokenizer_subfolder, revision=tokenizer_revision,
+                                          cache_dir=config.cache_dir)
+
+
+
+print("############## Floating pt evaluation ########################")
+from stable_diff_pipeline import run_the_pipeline, run_tokenizer, run_text_encoder, run_diffusion_steps, run_vae_decoder, save_image
+
+prompt = "decorated modern country house interior, 8 k, light reflections"
+image = run_the_pipeline(prompt, unet, text_encoder, vae, tokenizer, scheduler, config, test_name='fp32')
+save_image(image.squeeze(0), 'generated.png')
+
+from IPython.display import Image, display
+display(Image(filename='generated.png'))
+
+
+
+print("############## Calibrating TE ########################")
+from aimet_quantsim import apply_adaround_te, calibrate_te
+
+with open(config.calibration_prompts, "rt") as f:
+    print(f'Loading prompts from {config.calibration_prompts}')
+    prompts = f.readlines()
+    prompts = prompts[:config.num_calibration_samples]
+
+tokens = [run_tokenizer(tokenizer, prompt) for prompt in prompts]
+
+text_encoder_sim = calibrate_te(text_encoder, tokens, config)
+
+
+print("############## Calibrating UNET ########################")
+from aimet_quantsim import calibrate_unet, replace_mha_with_sha_blocks
+
+embeddings = [(run_text_encoder(text_encoder, uncond),
+               run_text_encoder(text_encoder, cond)) for cond, uncond in tokens]
+embeddings = [torch.cat([uncond, cond])for uncond, cond in embeddings]
+
+unet_sim = calibrate_unet(unet, embeddings, scheduler, config)
+
+replace_mha_with_sha_blocks(unet) # convert unet to SHA so it has same expected inputs as unet_sim which is SHA
+
+
+print("############## Calibrating VAE ########################")
+from aimet_quantsim import calibrate_vae
+from tqdm.auto import tqdm
+
+latents = [run_diffusion_steps(unet, emb, scheduler, config, randomize_seed=True) for emb in tqdm(embeddings)]
+print('Obtained latents using UNET QuantSim')
+
+vae_sim = calibrate_vae(vae, latents, config)
+
+
+
+print("############## Running quantized off target inference ########################")
+image = run_the_pipeline(prompt, unet_sim.model, text_encoder_sim.model, vae_sim.model, tokenizer, scheduler, config, test_name="quantized")
+save_image(image.squeeze(0), 'generated_after_quant.png')
+
+display(Image(filename='generated_after_quant.png'))
+
+
+
+print("############## Export all models ########################")
+from aimet_quantsim import export_all_models
+
+export_all_models(text_encoder_sim, unet_sim, vae_sim, tokens, embeddings, latents, batch_sizes_unet=[1])
+
+
+print("############## Generate artifacts ########################")
+from utilities.generate_target_artifacts import generate_target_artifacts
+
+generate_target_artifacts(text_encoder, unet, None, tokenizer, scheduler, config, diffusion_steps=[20,50], seed_list=[1], min_seed=633994880, max_seed=633994880)
diff --git a/mobile_back_qti/DLC/util/StableDiffusion/AIMET/graph_config_text_encoder.json b/mobile_back_qti/DLC/util/StableDiffusion/AIMET/graph_config_text_encoder.json
new file mode 100644
index 000000000..57345a74f
--- /dev/null
+++ b/mobile_back_qti/DLC/util/StableDiffusion/AIMET/graph_config_text_encoder.json
@@ -0,0 +1,17 @@
+{
+    "graphs": [{
+        "graph_names":["text_encoder"],
+        "vtcm_mb":8,
+        "O" : 3,
+        "fp16_relaxed_precision": 0
+    }],
+    "devices": [
+        {   "pd_session": "unsigned",
+            "dsp_arch": "v75",
+            "cores":[{
+                "rpc_control_latency": 100,
+                "perf_profile": "burst"
+            }]
+        }
+    ]
+  }
\ No newline at end of file
diff --git a/mobile_back_qti/DLC/util/StableDiffusion/AIMET/graph_config_unet.json b/mobile_back_qti/DLC/util/StableDiffusion/AIMET/graph_config_unet.json
new file mode 100644
index 000000000..9d2d4ee11
--- /dev/null
+++ b/mobile_back_qti/DLC/util/StableDiffusion/AIMET/graph_config_unet.json
@@ -0,0 +1,17 @@
+{
+    "graphs": [{
+        "graph_names":["unet"],
+        "vtcm_mb":8,
+        "O" : 3,
+        "fp16_relaxed_precision": 0
+    }],
+    "devices": [
+        {   "pd_session": "unsigned",
+            "dsp_arch": "v75",
+            "cores":[{
+                "rpc_control_latency": 100,
+                "perf_profile": "burst"
+            }]
+        }
+    ]
+  }
diff --git a/mobile_back_qti/DLC/util/StableDiffusion/AIMET/graph_config_vae.json b/mobile_back_qti/DLC/util/StableDiffusion/AIMET/graph_config_vae.json
new file mode 100644
index 000000000..39b312bb1
--- /dev/null
+++ b/mobile_back_qti/DLC/util/StableDiffusion/AIMET/graph_config_vae.json
@@ -0,0 +1,17 @@
+{
+    "graphs": [{
+        "graph_names":["vae_decoder"],
+        "vtcm_mb":8,
+        "O" : 3,
+        "fp16_relaxed_precision": 0
+    }],
+    "devices": [
+        {   "pd_session": "unsigned",
+            "dsp_arch": "v75",
+            "cores":[{
+                "rpc_control_latency": 100,
+                "perf_profile": "burst"
+            }]
+        }
+    ]
+  }
diff --git a/mobile_back_qti/DLC/util/StableDiffusion/AIMET/mcp_config.json b/mobile_back_qti/DLC/util/StableDiffusion/AIMET/mcp_config.json
new file mode 100644
index 000000000..7a3a1927a
--- /dev/null
+++ b/mobile_back_qti/DLC/util/StableDiffusion/AIMET/mcp_config.json
@@ -0,0 +1,5 @@
+{
+"backend_extensions" :
+    {"shared_library_path" : "/qnn_sdk/lib/x86_64-linux-clang/libQnnHtpNetRunExtensions.so",
+    "config_file_path" : "/models/graph_config.json"}
+}
\ No newline at end of file
diff --git a/mobile_back_qti/DLC/util/StableDiffusion/README.md b/mobile_back_qti/DLC/util/StableDiffusion/README.md
new file mode 100644
index 000000000..44c1339b4
--- /dev/null
+++ b/mobile_back_qti/DLC/util/StableDiffusion/README.md
@@ -0,0 +1,67 @@
+# Stable Diffusion
+
+## This readme contains necessary steps to
+
+* Run AIMET quantization
+* Convert generated onnx files to bin files
+* To generate all the artifacts needed for stable diffusion inference on Qualcomm Soc
+
+### Platform requirements
+
+* Machine running Ubuntu 20.04 at least
+* AIMET PRO version 1.29.0 `(make script will automatically be installing it)`
+* Docker version 20.10.24
+* Machine enabled with Nvidia Tesla A100 or Tesla V100 (32GB at least)
+* NVIDIA driver version equivalent to 525.60.13
+
+### Steps to execute
+
+`Please follow below steps in the mentioned order and run them as root to avoid permission issues`
+
+#### Prerequisites
+
+* Clone the mobile_app_open repository
+
+* Install Qualcomm Package manager on the linux machine
+
+```shell
+sudo dpkg -i ./QualcommPackageManager3.3.0.111.1.Linux-x86.deb
+```
+
+* Extract the SNPE SDK (from Requirements above) to mobile_app_open/mobile_back_qti
+
+```shell
+qpm-cli --extract ./qualcomm_neural_processing_sdk.2.25.0.240728.Linux-AnyCPU.qik
+mkdir mobile_app_open/mobile_back_qti/qairt/
+cp -rv /opt/qcom/aistack/qairt/2.25.0.240728 mobile_app_open/mobile_back_qti/qairt/
+```
+
+Once done,
+
+* Clone the AIMET SD notebook repository inside
+  `<path_to_mobile_app_open>/mobile_back_qti/DLC/util/StableDiffusion/AIMET`
+  
+* Create hugging face access token and paste it on `line 2 of aimet.py` script, inside `/mobile_back_qti/DLC/util/StableDiffusion/AIMET` folder.
+  Place holder provided in `aimet.py`.
+  
+* Inside AIMET directory run this make command
+
+   ```shell
+    sudo make aimet_calibration
+   ```
+
+* Once, the above make command completes successfully, move to
+  `<path_to_mobile_app_open>/mobile_back_qti/DLC` or type
+
+   ```shell
+    cd ../../../
+   ```
+
+* After reaching `<path to mobile_app_open>/mobile_back_qti/DLC` run this make command
+
+   ```shell
+    sudo make stable_diffusion_qnn SNPE_SDK=<path_to_mobile_app_open>/mobile_back_qti/qairt/<sdk_version>
+   ```
+
+* After successful execution, all the artifacts needed to run stable diffusion inference on device will be located in
+  `<path_to_mobile_app_open>/output/DLC/mlperf_models/stable_diffusion`
diff --git a/mobile_back_qti/DLC/util/StableDiffusion/flatten.py b/mobile_back_qti/DLC/util/StableDiffusion/flatten.py
new file mode 100644
index 000000000..4a0f25d68
--- /dev/null
+++ b/mobile_back_qti/DLC/util/StableDiffusion/flatten.py
@@ -0,0 +1,294 @@
+# Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##########################################################################
+
+#!/usr/bin/python3
+import argparse
+from datetime import datetime
+import pickle
+import copy
+import sys
+import os
+import numpy as np
+import pprint
+pp = pprint.PrettyPrinter(indent=4)
+
+def check_shape_type(name, tensor_np, expected_shape, expected_np_type):
+    if tensor_np.shape != expected_shape:
+        raise Exception(f"{name} shape is {tensor_np.shape}, expect {expected_shape}")
+    if tensor_np.dtype != expected_np_type:
+        raise Exception(f"{name} is {tensor_np.dtype}, expect {expected_np_type}")
+
+def extract_data(ts_dict):
+    iter = ts_dict['iteration']
+    if not 'time_step' in ts_dict:
+        raise Exception(f'Picke file parsing error: key "time_step" is not found')
+    ts_np = ts_dict['time_step'].astype(np.int32)
+    print(f'iteration {iter}, time_step: {ts_np}')
+
+    if not 'timeembedding' in ts_dict:
+        raise Exception(f'Picke file parsing error: key "timeembedding" is not found')
+
+    ts_embed_np = ts_dict['timeembedding']
+    print(f'iteration {iter}, timeembedding: ', type(ts_embed_np), ts_embed_np.shape, ts_embed_np.dtype,ts_embed_np[0,0:4])
+    check_shape_type('timeembedding', ts_embed_np, (1,1280), np.float32)
+    if not 'random_init' in ts_dict:
+        raise Exception(f'Picke file parsing error: key "random_init" is not found')
+
+    rad = ts_dict['random_init']
+    if not 'seed' in rad:
+        raise Exception(f'Picke file parsing error: key "seed" is not found')
+    if not 'latent_vector' in rad:
+        raise Exception(f'Picke file parsing error: key "latent_vector" is not found')
+
+    seed = rad['seed']
+    lv_np = rad['latent_vector']
+    print('random_init.latent_vector_nchw', type(lv_np), lv_np.shape, lv_np.dtype)
+    check_shape_type('random_init.latent_vector', lv_np, (1,4,64,64), np.float32)
+
+    lv_nhwc_np = np.moveaxis(lv_np, [0,1,2,3], [0,3,1,2])
+    print(f'iteration {iter}, seed: {seed}, random_init.latent_vector_nhwc', type(lv_nhwc_np), lv_nhwc_np.shape, lv_nhwc_np.dtype)
+    print(lv_nhwc_np[0,0,0:2,:])
+
+    # extract uncond_text_embedding_np, TBD
+    uncond_embedding_key = 'uncond_text_embedding'
+    if not uncond_embedding_key in ts_dict:
+        print(f'Picke file parsing error: key {uncond_embedding_key} is not found')
+        uncond_text_embedding_np = None
+    else:
+        uncond_text_embedding_np = ts_dict[uncond_embedding_key]
+        print(f'iteration {iter}, {uncond_embedding_key}: ', type(uncond_text_embedding_np),
+              uncond_text_embedding_np.shape, uncond_text_embedding_np.dtype,uncond_text_embedding_np[0,0,0:4])
+        check_shape_type(uncond_embedding_key, uncond_text_embedding_np, (1,77,768), np.float32)
+
+    return (iter, ts_np, ts_embed_np, seed, lv_nhwc_np, uncond_text_embedding_np)
+
+
+def parse_pickle(pickle_file):
+    random_init_dict = {}
+    ts_list_dict = {}
+    ts_embed_list_dict = {}
+    num_rec = 0
+
+    print(f'parsing {pickle_file} ...')
+    fd = open(pickle_file, 'rb')
+    tensor_dict_list = pickle.load(fd)
+
+    num_steps = 0
+    ts_list = []
+    ts_embed_list = []
+    for ts_dict in tensor_dict_list:
+        if not 'iteration' in ts_dict:
+            raise Exception(f'Picke file parsing error: key "iteration" is not found')
+
+        iter = ts_dict['iteration']
+        (iter, ts_np, ts_embed_np, seed, lv_nhwc_np, uncond_text_embedding_np) = extract_data(ts_dict)
+        num_rec += 1
+        if iter == 0 and iter < num_steps:
+            print(f'num_steps is {num_steps}, iteration:{iter} wrap around, reset num_steps...')
+            # iter wrap around start of next session, the first session is done
+            ts_list_dict[num_steps] = ts_list
+            ts_embed_list_dict[num_steps] = ts_embed_list
+            num_steps = 0
+            ts_list = []
+            ts_embed_list = []
+
+        num_steps += 1
+        ts_list.append(ts_np)
+        ts_embed_list.append(ts_embed_np)
+
+        random_init_dict[seed] = lv_nhwc_np
+
+    ts_list_dict[num_steps] = ts_list
+    ts_embed_list_dict[num_steps] = ts_embed_list
+
+    return (num_rec, ts_list_dict, ts_embed_list_dict, random_init_dict, uncond_text_embedding_np)
+
+def parse_random_latent_init_pickle(pickle_file):
+    random_init_dict = {}
+
+    print(f'parsing {pickle_file} ...')
+    fd = open(pickle_file, 'rb')
+    tensor_dict_list = pickle.load(fd)
+
+    for seed in tensor_dict_list:
+        lv_np = tensor_dict_list[seed]
+        print('random_init.latent_vector_nchw', type(lv_np), lv_np.shape, lv_np.dtype)
+        check_shape_type('random_init.latent_vector', lv_np, (1,4,64,64), np.float32)
+
+        lv_nhwc_np = np.moveaxis(lv_np, [0,1,2,3], [0,3,1,2])
+        print(f'seed: {seed}, random_init.latent_vector_nhwc', type(lv_nhwc_np), lv_nhwc_np.shape, lv_nhwc_np.dtype)
+        print(lv_nhwc_np[0,0,0:2,:])
+
+        random_init_dict[int(seed)] = lv_nhwc_np
+
+    return len(tensor_dict_list.keys()), random_init_dict
+
+def parse_ts_embedding_pickle(pickle_file):
+    print(f'parsing {pickle_file} ...')
+    fd = open(pickle_file, 'rb')
+    tensor_dict_list = pickle.load(fd)
+
+    keys = sorted([int(key) for key in tensor_dict_list.keys()])
+    print(keys)
+
+    ts_embed_list = []
+    for iter_num in keys:
+        ts_embed_np = tensor_dict_list[str(iter_num)]
+        print(f'iteration {iter_num}, timeembedding: ', type(ts_embed_np), ts_embed_np.shape, ts_embed_np.dtype,ts_embed_np[0,0:4])
+        check_shape_type('timeembedding', ts_embed_np, (1,1280), np.float32)
+        ts_embed_list.append(ts_embed_np)
+
+    return len(keys), ts_embed_list
+
+def parse_ts_list_pickle(pickle_file):
+    print(f'parsing {pickle_file} ...')
+    fd = open(pickle_file, 'rb')
+    ts_data = pickle.load(fd)
+
+    ts_np = ts_data.astype(np.int32)
+    print(f'time_step: {ts_np}')
+
+    return len(ts_np), ts_np
+
+def parse_unconditional_encoding_pickle(pickle_file):
+    print(f'parsing {pickle_file} ...')
+    fd = open(pickle_file, 'rb')
+    uncond_text_embedding_np = pickle.load(fd)
+
+    print(f'uncond_text_embedding: ', type(uncond_text_embedding_np),
+          uncond_text_embedding_np.shape, uncond_text_embedding_np.dtype,uncond_text_embedding_np[0,0,0:4])
+    check_shape_type('uncond_text_embedding', uncond_text_embedding_np, (1,77,768), np.float32)
+
+    return uncond_text_embedding_np
+
+def dump_data(pickle_stats,
+              ts_list_dict, ts_embed_list_dict, random_init_dict, uncond_text_embedding_np, dumpdir):
+
+    file_list =[]
+    #dump random_init varaibles
+    seed_list = list(sorted(random_init_dict.keys()))
+    seed = seed_list[0]
+    shape_str = 'x'.join([str(e) for e in random_init_dict[seed].shape])
+    f_name = os.path.join(dumpdir, f'rand_init_{len(seed_list)}_seeds_{shape_str}_float32.bin.rand')
+    rand_file = open(f_name, 'wb')
+
+    v_np = np.array(len(seed_list)).astype(np.int32)
+    v_np.tofile(rand_file)
+    for seed in seed_list:
+        v_np = np.array(seed).astype(np.int32)
+        v_np.tofile(rand_file)
+    for seed in seed_list:
+        random_init_dict[seed].tofile(rand_file)
+    rand_file.close()
+    file_list.append(f_name)
+
+    for num_steps in ts_list_dict:
+        ts_list = ts_list_dict[num_steps]
+        ts_embed_list = ts_embed_list_dict[num_steps]
+        shape_str = 'x'.join([str(e) for e in ts_embed_list[0].shape])
+        f_name = os.path.join(dumpdir, f'timestep_steps_{num_steps}_int32_embedding_{shape_str}_float32.bin.ts')
+        ts_file = open(f_name, 'wb')
+        v_np = np.array(len(ts_list)).astype(np.int32)
+        v_np.tofile(ts_file)
+        for l in ts_list:
+            l.tofile(ts_file)
+        for l in ts_embed_list:
+            l.tofile(ts_file)
+        ts_file.close()
+        file_list.append(f_name)
+
+    shape_str = 'x'.join([str(e) for e in uncond_text_embedding_np.shape])
+    uncond_text_embedding_file = os.path.join(dumpdir, f'batch_1_uncond_text_embedding_{shape_str}_float32.bin.cte')
+    with open(uncond_text_embedding_file, 'wb') as f:
+        uncond_text_embedding_np.tofile(f)
+
+    with open(os.path.join(dumpdir, 'readme.txt'), 'w') as f:
+        print('From:', file=f)
+        for pfile in pickle_stats:
+            f_stat = os.stat(pfile)
+            dts = datetime.fromtimestamp(f_stat.st_ctime)
+            (v0, v1, v2) = pickle_stats[pfile]
+            print(f'    {pfile}, total_rec: {v0}, created on {dts} ', file=f)
+            print(f'        num_steps: {v1}', file=f)
+            print(f'        {len(v2)} unique_random_seeds: {v2}', file=f)
+        (v1,v2) = (list(sorted(ts_list_dict.keys())), sorted(seed_list))
+        print('\nTotal:', file=f)
+        print(f'    num_steps: {v1}', file=f)
+        print(f'    {len(v2)} unique_random_seeds: {v2}', file=f)
+        print(f'    number of uncond_text_embedding: 1', file=f)
+    #tar the files together
+    tar_file = os.path.join(dumpdir, f'sd_precompute_data.tar')
+    cmd = f'/bin/tar cvf {tar_file} {os.path.join(dumpdir, "readme.txt")} ' + ' '.join(file_list) + f' {uncond_text_embedding_file}'
+    print(f'Run {cmd}')
+    os.system(cmd)
+
+# for debugging only
+def create_smaller_pickle(file_name, count=10):
+    s_file_name = "small_" + file_name
+    with open(file_name, 'rb') as f:
+        data_dict_seq = pickle.load(f)
+    with open(s_file_name, 'wb') as f:
+        pickle.dump(data_dict_seq[0:count],f)
+
+if __name__ == '__main__':
+    default_logdir = os.path.join("tar_output", datetime.now().strftime("%Y-%m-%d-%H-%M-%S"))
+
+    parser = argparse.ArgumentParser(
+        description='Generates sd_precompute_data.tar file based onprovided pkl files.')
+
+    parser.add_argument('--random_latent_init', type=str,
+                        required=True,
+                        help="Path to a random-latent-init pkl file containing random initial latents.")
+    parser.add_argument('--time_step_embedding', type=str,
+                        required=True,
+                        help="Comma seperated time-step-embedding pkl files containing ts-embedding.")
+    parser.add_argument('--time_step_list', type=str,
+                        required=True,
+                        help="Comma seperated time-step-list pkl files containing timestamp.")
+    parser.add_argument('--unconditional_text_emb', type=str,
+                        required=True,
+                        help="Path to a unconditional-text-emb pkl file containing unconditional text embedding.")
+    parser.add_argument('--dumpdir', type=str,
+                        default=default_logdir,
+                        help="Path to a directory for dumping.\
+                              Default value is 'tar_output/<Y-m-d-H-M-S>'")
+
+    config = parser.parse_args()
+    config.time_step_embedding = config.time_step_embedding.split(',')
+    config.time_step_list = config.time_step_list.split(',')
+
+    os.makedirs(config.dumpdir, exist_ok=True)
+
+    pickle_stats ={}
+
+    num_rec, random_init_dict = parse_random_latent_init_pickle(config.random_latent_init)
+
+    ts_embed_list_dict = {}
+    for ts_embedd_file in config.time_step_embedding:
+        length, data = parse_ts_embedding_pickle(ts_embedd_file)
+        ts_embed_list_dict[length] = data
+
+    ts_list_dict = {}
+    for ts_list_file in config.time_step_list:
+        length, data = parse_ts_list_pickle(ts_list_file)
+        ts_list_dict[length] = data
+
+    uncond_text_embedding_np = parse_unconditional_encoding_pickle(config.unconditional_text_emb)
+
+    if sorted(ts_embed_list_dict.keys()) != sorted(ts_list_dict.keys()):
+        raise Exception('Wrong files for time_step_embedding and time_step_list')
+
+    dump_data(pickle_stats,
+              ts_list_dict, ts_embed_list_dict, random_init_dict, uncond_text_embedding_np, config.dumpdir)
diff --git a/mobile_back_qti/DLC/util/StableDiffusion/readme.txt b/mobile_back_qti/DLC/util/StableDiffusion/readme.txt
new file mode 100644
index 000000000..e69de29bb
diff --git a/mobile_back_qti/README.md b/mobile_back_qti/README.md
index 15201db4a..b4c2d771a 100644
--- a/mobile_back_qti/README.md
+++ b/mobile_back_qti/README.md
@@ -13,12 +13,12 @@ uploaded with the other submission files to here: `<path where needs to be uploa
 
 <!-- markdown-link-check-disable-next-line -->
 * [Qualcomm Package Manager 3](https://qpm.qualcomm.com/#/main/tools/details/QPM3)
-* [SNPE SDK](https://qpm.qualcomm.com/#/main/tools/details/qualcomm_neural_processing_sdk) (Version 2.20.0)
+* [SNPE SDK](https://qpm.qualcomm.com/#/main/tools/details/qualcomm_neural_processing_sdk) (Version 2.25.0.240728)
 * Linux machine capable of running Ubuntu docker images
 
 ### Optional
 
-If you wish to rebuild the DLC files yourself, you will have these additional requirements:
+To rebuild the DLC files yourself, you will have these additional requirements:
 
 * Imagenet dataset (LSVRC2012_img_val.tar) put in the build/imagenet/downloads directory
 * Linux machine also capable of running Tensorflow debian based docker images
@@ -27,6 +27,11 @@ Use your browser to download the SNPE SDK using the links above.
 
 Create your Github personal access token.
 
+### Note for Stable Diffusion
+
+To generate stable diffusion model, please follow the instructions mentioned at
+`<path/to/mobile_app_open/mobile_back_qti/DLC/util/StableDiffusion/README.md>`(DLC/util/StableDiffusion/README.md)
+
 ```shell
 export SNPE_SDK=</path/to/snpe-sdk>
 cd DLC/ && make
@@ -46,14 +51,15 @@ cd mobile_app_open
 * Install Qualcomm Package manager on the linux machine
 
 ```shell
-sudo apt-get install ./QualcommPackageManager3.3.0.99.0.Linux-x86.deb
+sudo dpkg -i ./QualcommPackageManager3.3.0.111.1.Linux-x86.deb
 ```
 
-* Extract the SNPE SDK (from Requirements above) to mobile_app_open/mobile_back_qti
+* Extract the QAIRT SDK (from Requirements above) to mobile_app_open/mobile_back_qti
 
 ```shell
-qpm-cli --extract ./qualcomm_neural_processing_sdk.2.20.0.240223.Linux-AnyCPU.qik
-cp -rv /opt/qcom/aistack/snpe/2.20.0.240223/. mobile_app_open/mobile_back_qti/qaisw-2.20.0.240223
+qpm-cli --extract ./qualcomm_neural_processing_sdk.2.25.0.240728.Linux-AnyCPU.qik
+mkdir mobile_app_open/mobile_back_qti/qairt/
+cp -rv /opt/qcom/aistack/qairt/2.25.0.240728 mobile_app_open/mobile_back_qti/qairt/
 ```
 
 * If you have an HTTP proxy, you may need the following
@@ -66,7 +72,18 @@ export USE_PROXY_WORKAROUND=1
 Build with the following build command.
 
 ```shell
-make OFFICIAL_BUILD=true FLUTTER_BUILD_NUMBER=1  WITH_QTI=1 docker/flutter/android/release
+make OFFICIAL_BUILD=true FLUTTER_BUILD_NUMBER=1 WITH_QTI=1 docker/flutter/android/release
+```
+
+Build with the following build command to include `stable_diffusion`
+
+* Download `Tutorial for stable diffusion` from QPM Manager.
+* Copy `include` folder from `<path_to_tutorial>/model/example3/host_linux_target_android_with_MLPerf/include` to `mobile_back_qti/cpp/backend_qti/StableDiffusionShared/`
+* Copy `libStableDiffusion.so` from `<path_to_notebook>/model/example3/host_linux_target_android_with_MLPerf/libs/aarch64-android/` to `mobile_back_qti/cpp/backend_qti/StableDiffusionShared/`
+* Run the command below:
+
+```shell
+make OFFICIAL_BUILD=true FLUTTER_BUILD_NUMBER=1 WITH_STABLEDIFFUSION=1 WITH_QTI=1 docker/flutter/android/release
 ```
 
 This will generate the MLPerf flutter app with QTI backend in ```mobile_app_open/output/android-apks/<date>_mlperfbench-<commit_id>-<qtpsm>.apk```
@@ -95,9 +112,8 @@ uploaded with the other submission files to here: `<path where needs to be uploa
 
 ## Requirements for WoS
 
-<!-- markdown-link-check-disable-next-line -->
-* [SNPE windows SDK] (<https://developer.qualcomm.com/software/qualcomm-neural-processing-sdk/tools>)
-  * Version 2.20.0
+* [SNPE windows SDK](https://qpm.qualcomm.com/#/main/tools/details/qualcomm_neural_processing_sdk)
+  * Version 2.25.0.240728
 * Windows x86 machine
 
 ## Setting up the environment
@@ -152,16 +168,20 @@ Run accuracy mode with following command
 .\run_mlperf_test.bat --models <path to mlperf_models> --dataset <path to mlperf_datasets> --usecase <optional, can be one of below mentioned usecases> --mode accuracy
 ```
 
-* --usecase parameter can take one of these arguments => image_classification_v2, image_classification, object_detection, image_segmentation, language_understanding, super_resolution, image_classification_offline_v2, image_classification_offline
+* --usecase parameter can take one of these arguments => image_classification_v2, object_detection, image_segmentation, language_understanding, super_resolution, image_classification_offline_v2
 * see the results in accuracy_results.txt and performance_results.txt
 
 ## FAQ
 
 ### What devices does this backend support?
 
-This backend only supports SDX_Elite, SD7G3, SD8SG3, SD8G3 devices.
+This backend only supports SDX_Elite, SD8SG3, SD8G3 devices.
 Other already launched Snapdragon based devices can also run the MLPerf app as default fallback.
 
 ### Is SNPE used to run all the models?
 
-Yes. All the models use SNPE for execution for current version.
+Yes. All the models use Qualcomm AI Runtime(QAIRT) for execution for current version.
+
+### What devices supports stable diffusion?
+
+Currently, SD8G3 device supports stable_diffusion.
diff --git a/mobile_back_qti/cpp/backend_qti/BUILD b/mobile_back_qti/cpp/backend_qti/BUILD
index 5fe46ce8f..36f6d6415 100644
--- a/mobile_back_qti/cpp/backend_qti/BUILD
+++ b/mobile_back_qti/cpp/backend_qti/BUILD
@@ -21,6 +21,7 @@
 #
 
 load("@bazel_skylib//rules:common_settings.bzl", "string_flag")
+load("@bazel_skylib//lib:selects.bzl", "selects")
 load("@org_tensorflow//tensorflow/lite:build_def.bzl", "tflite_copts", "tflite_jni_binary")
 load("@snpe_version_loader//:snpe_var_def.bzl", "SNPE_VERSION")
 load("//flutter/cpp/proto:pbtxt2header.bzl", "pbtxt2header")
@@ -49,6 +50,41 @@ string_flag(
     ],
 )
 
+string_flag(
+    name = "with_stablediffusion",
+    build_setting_default = "2",
+    values = [
+        "1",
+        "2",
+    ],
+)
+
+config_setting(
+    name = "stablediffusion_option",
+    flag_values = {":with_stablediffusion": "1"},
+)
+
+config_setting(
+    name = "nostablediffusion_option",
+    flag_values = {":with_stablediffusion": "2"},
+)
+
+selects.config_setting_group(
+    name = "android_with_stablediffusion",
+    match_all = [
+        "android_arm64",
+        "stablediffusion_option",
+    ],
+)
+
+selects.config_setting_group(
+    name = "android_without_stablediffusion",
+    match_all = [
+        "android_arm64",
+        "nostablediffusion_option",
+    ],
+)
+
 config_setting(
     name = "debug_option",
     flag_values = {":with_qti": "2"},
@@ -69,8 +105,10 @@ config_setting(
 )
 
 snpe_copts = [
-    "-Imobile_back_qti/" + SNPE_VERSION + "/include/SNPE",
-    "-fexceptions",
+    "-Imobile_back_qti/" + "qairt/" + SNPE_VERSION + "/include/SNPE",
+    "-Imobile_back_qti/" + "qairt/" + SNPE_VERSION + "/include/QNN",
+    "-Imobile_back_qti/" + "qairt/" + SNPE_VERSION + "/include/QNN/HTP",
+    "-fcxx-exceptions",
     "-lc++_shared",
 ]
 
@@ -94,6 +132,7 @@ pbtxt2header(
         "settings/qti_settings_sm4450.pbtxt",
         "settings/qti_settings_sm7550.pbtxt",
         "settings/qti_settings_sm8635.pbtxt",
+        "settings/qti_settings_stablediffusion.pbtxt",
     ],
 )
 
@@ -166,18 +205,29 @@ cc_library(
                         "debug_option": ["DEBUG_FLAG"],
                         "//conditions:default": [],
                         "config_option": ["EXTERNAL_CONFIG"],
+                    }) +
+                    select({
+                        "stablediffusion_option": ["STABLEDIFFUSION_FLAG"],
+                        "//conditions:default": [],
                     }),
     deps = [
+        "//flutter/cpp/c:headers",
         ":qti_allocator",
         ":qti_settings",
-        "//flutter/cpp/c:headers",
         "@org_tensorflow//tensorflow/core:tflite_portable_logging",
     ] + select({
-        "android_arm64": [
-            "//mobile_back_qti:snpe",
+        ":android_without_stablediffusion": [
+            "@org_tensorflow//tensorflow/lite/delegates/gpu:delegate",
             "@org_tensorflow//tensorflow/lite/c:c_api",
             "@org_tensorflow//tensorflow/lite/c:common",
+            "//mobile_back_qti:snpe",
+        ],
+        ":android_with_stablediffusion": [
             "@org_tensorflow//tensorflow/lite/delegates/gpu:delegate",
+            "@org_tensorflow//tensorflow/lite/c:c_api",
+            "@org_tensorflow//tensorflow/lite/c:common",
+            "//mobile_back_qti:snpe",
+            "//mobile_back_qti/cpp/backend_qti/StableDiffusionShared:StableDiffusionShared",
         ],
         "windows_arm64": [
             "//mobile_back_qti:snpewindowslib",
diff --git a/mobile_back_qti/cpp/backend_qti/StableDiffusionShared/BUILD b/mobile_back_qti/cpp/backend_qti/StableDiffusionShared/BUILD
new file mode 100644
index 000000000..a81cff9c3
--- /dev/null
+++ b/mobile_back_qti/cpp/backend_qti/StableDiffusionShared/BUILD
@@ -0,0 +1,54 @@
+# Copyright (c) 2020-2024 Qualcomm Innovation Center, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##########################################################################
+
+load("@stable_diffusion_external_deps_shared//:stable_diffusion_var_def_shared.bzl", "OPENCV_ROOT_DIR")
+
+package(
+    default_visibility = ["//visibility:public"],
+    licenses = ["notice"],  # Apache 2.0
+)
+
+config_setting(
+    name = "android_arm64",
+    values = {"cpu": "arm64-v8a"},
+)
+
+cc_library(
+    name = "opencv_core",
+    srcs = [
+        OPENCV_ROOT_DIR + "/libs/arm64-v8a/libopencv_core.so",
+        OPENCV_ROOT_DIR + "/libs/arm64-v8a/libopencv_imgcodecs.so",
+    ],
+    hdrs = glob([
+        OPENCV_ROOT_DIR + "/jni/include/opencv2/*.hpp",
+        OPENCV_ROOT_DIR + "/jni/include/opencv2/**/*.hpp",
+    ]),
+    includes = [OPENCV_ROOT_DIR + "/jni/include/"],
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "StableDiffusionShared",
+    srcs = ["libStableDiffusion.so"],
+    hdrs = glob([
+        "include/*.hpp",
+        "include/**/*.hpp",
+        "include/**/*.h",
+    ]),
+    visibility = ["//visibility:public"],
+    deps = [
+        ":opencv_core",
+    ],
+)
diff --git a/mobile_back_qti/cpp/backend_qti/StableDiffusionShared/variables.bzl b/mobile_back_qti/cpp/backend_qti/StableDiffusionShared/variables.bzl
new file mode 100644
index 000000000..d67e29abc
--- /dev/null
+++ b/mobile_back_qti/cpp/backend_qti/StableDiffusionShared/variables.bzl
@@ -0,0 +1,30 @@
+# Copyright (c) 2020-2024 Qualcomm Innovation Center, Inc. All rights reserved.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#    http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Find OPENCV folder and pass it to bazel build config"""
+
+def _impl(repository_ctx):
+    opencv_path = "opencv"
+    repository_ctx.file("BUILD", "")
+    repository_ctx.file(
+        "stable_diffusion_var_def_shared.bzl",
+        "OPENCV_ROOT_DIR = \"include/%s\"" % opencv_path,
+    )
+
+stable_diffusion_external_deps_shared = repository_rule(
+    implementation = _impl,
+    environ = ["OPENCV_ROOT_DIR"],
+    local = True,
+    attrs = {"workspace_dir": attr.string(mandatory = True)},
+)
diff --git a/mobile_back_qti/cpp/backend_qti/mlperf_helper.h b/mobile_back_qti/cpp/backend_qti/mlperf_helper.h
index d7ca688cb..8e747697f 100644
--- a/mobile_back_qti/cpp/backend_qti/mlperf_helper.h
+++ b/mobile_back_qti/cpp/backend_qti/mlperf_helper.h
@@ -57,6 +57,8 @@ static void process_config(const mlperf_backend_configuration_t *configs,
       backend_data->scenario_ = configs->values[i];
     } else if (strcmp(configs->keys[i], "snpe_output_layers") == 0) {
       backend_data->snpeOutputLayers_ = configs->values[i];
+    } else if (strcmp(configs->keys[i], "snpe_output_tensors") == 0) {
+      backend_data->snpeOutputTensors_ = configs->values[i];
     } else if (strcmp(configs->keys[i], "bg_load") == 0) {
       if (strcmp(configs->values[i], "true") == 0) {
         backend_data->bgLoad_ = true;
@@ -141,12 +143,19 @@ static void process_config(const mlperf_backend_configuration_t *configs,
       } else {
         backend_data->useCpuInt8_ = false;
       }
+    } else if (strcmp(configs->keys[i], "pipeline") == 0) {
+      if (std::strcmp(configs->values[i], "StableDiffusionPipeline") == 0) {
+        backend_data->isStableDiffusion = true;
+      } else {
+        backend_data->isStableDiffusion = false;
+      }
     }
   }
 
   LOG(INFO) << "Config: delegate: " << delegate
             << " | scenario: " << backend_data->scenario_
-            << " | output: " << backend_data->snpeOutputLayers_
+            << " | output layer: " << backend_data->snpeOutputLayers_
+            << " | output tensor: " << backend_data->snpeOutputTensors_
             << " | isTfLite: " << backend_data->isTflite_
             << " | batchSize: " << backend_data->batchSize_
             << " | useSNPE: " << backend_data->useSnpe_
@@ -159,7 +168,8 @@ static void process_config(const mlperf_backend_configuration_t *configs,
             << " | profileLevel: " << profileLevel
             << " | useIonBuffer: " << backend_data->useIonBuffers_
             << " | acceleratorName: " << backend_data->acceleratorName_
-            << " | useCpuInt8: " << backend_data->useCpuInt8_;
+            << " | useCpuInt8: " << backend_data->useCpuInt8_
+            << " | isStableDiffusion: " << backend_data->isStableDiffusion;
 }
 
 #endif
diff --git a/mobile_back_qti/cpp/backend_qti/qti_backend_helper.cc b/mobile_back_qti/cpp/backend_qti/qti_backend_helper.cc
index b7009ab3b..839aac86b 100644
--- a/mobile_back_qti/cpp/backend_qti/qti_backend_helper.cc
+++ b/mobile_back_qti/cpp/backend_qti/qti_backend_helper.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "qti_backend_helper.h"
 
+#include <random>
 #include <string>
 #include <vector>
 
@@ -67,15 +68,15 @@ static void split(std::vector<std::string> &split_string,
   }
 }
 
-static Snpe_StringList_Handle_t ResolveOutputLayerNames(std::string &line) {
-  Snpe_StringList_Handle_t outputLayersHandle = Snpe_StringList_Create();
+static Snpe_StringList_Handle_t ResolveCommaSeparatedList(std::string &line) {
+  Snpe_StringList_Handle_t stringListHandle = Snpe_StringList_Create();
   if (!line.empty()) {
     std::vector<std::string> names;
     split(names, line.substr(0), ',');
     for (auto &name : names)
-      Snpe_StringList_Append(outputLayersHandle, name.c_str());
+      Snpe_StringList_Append(stringListHandle, name.c_str());
   }
-  return outputLayersHandle;
+  return stringListHandle;
 }
 
 static Snpe_TensorShape_Handle_t calcStrides(
@@ -168,7 +169,10 @@ void QTIBackendHelper::use_psnpe(const char *model_path) {
             SNPE_PSNPE_INPUTOUTPUTTRANSMISSIONMODE_SYNC));
 
     Snpe_StringList_Handle_t outputLayers =
-        ResolveOutputLayerNames(snpeOutputLayers_);
+        ResolveCommaSeparatedList(snpeOutputLayers_);
+
+    Snpe_StringList_Handle_t outputTensors =
+        ResolveCommaSeparatedList(snpeOutputTensors_);
 
     Snpe_SNPEBuilder_Handle_t snpeBuilderHandle =
         Snpe_SNPEBuilder_Create(containerHandle);
@@ -180,6 +184,7 @@ void QTIBackendHelper::use_psnpe(const char *model_path) {
     Snpe_SNPEBuilder_SetRuntimeProcessorOrder(snpeBuilderHandle,
                                               dummyInputRuntimeListHandle);
     Snpe_SNPEBuilder_SetOutputLayers(snpeBuilderHandle, outputLayers);
+    Snpe_SNPEBuilder_SetOutputTensors(snpeBuilderHandle, outputTensors);
 
     if (Snpe_StringList_Size(outputLayers) > 0)
       Snpe_BuildConfig_SetOutputBufferNames(buildConfigHandle, outputLayers);
@@ -302,7 +307,9 @@ void QTIBackendHelper::use_snpe(const char *model_path) {
         Snpe_SNPEBuilder_Create(containerHandle);
     Snpe_SNPEBuilder_SetCpuFixedPointMode(snpeBuilderHandle, useCpuInt8_);
     Snpe_StringList_Handle_t outputLayers =
-        ResolveOutputLayerNames(snpeOutputLayers_);
+        ResolveCommaSeparatedList(snpeOutputLayers_);
+    Snpe_StringList_Handle_t outputTensors =
+        ResolveCommaSeparatedList(snpeOutputTensors_);
     Snpe_SNPEBuilder_SetPerformanceProfile(snpeBuilderHandle, perfProfile_);
     Snpe_SNPEBuilder_SetProfilingLevel(snpeBuilderHandle, profilingLevel_);
     Snpe_SNPEBuilder_SetExecutionPriorityHint(snpeBuilderHandle,
@@ -311,6 +318,7 @@ void QTIBackendHelper::use_snpe(const char *model_path) {
                                               inputRuntimeListHandle);
     Snpe_SNPEBuilder_SetUseUserSuppliedBuffers(snpeBuilderHandle, true);
     Snpe_SNPEBuilder_SetOutputLayers(snpeBuilderHandle, outputLayers);
+    Snpe_SNPEBuilder_SetOutputTensors(snpeBuilderHandle, outputTensors);
 
     std::string platformOptionStr = "";
     if (Socs::soc_check_feature(useIonBuffers_, platformOptionStr)) {
@@ -603,7 +611,8 @@ void QTIBackendHelper::get_data_formats() {
     long bufSize = calcSizeFromDims(Snpe_TensorShape_Rank(dimsHandle),
                                     Snpe_TensorShape_GetDimensions(dimsHandle));
     if (outputBufferType_ == FLOAT_32) {
-      if (snpeOutputLayers_ == "transpose") {
+      if (snpeOutputLayers_ == "transpose" ||
+          snpeOutputTensors_ == "transpose:0") {
         // For mobileBERT, return output size as half the size of computed
         // values,
         // because the DLC returns only single layer as output but the app needs
@@ -698,3 +707,108 @@ std::string QTIBackendHelper::get_snpe_version() {
   Snpe_DlVersion_Handle_t version = Snpe_Util_GetLibraryVersion();
   return Snpe_DlVersion_GetBuild(version);
 }
+
+std::vector<float> get_normal(unsigned numbers, unsigned seed = 5,
+                              float mean = 0.0, float stddev = 1.0) {
+  std::default_random_engine generator(seed);
+  std::normal_distribution<float> distribution(mean, stddev);
+
+  std::vector<float> d;
+  for (unsigned i = 0; i < numbers; i++) d.push_back(distribution(generator));
+
+  return d;
+}
+
+void QTIBackendHelper::initSd(const char *model_path, const char *lib_path) {
+#ifdef STABLEDIFFUSION_FLAG
+  bool use_mmap = false;  // we don't want to use cached
+  uint64_t context_bin_mmap_read_budget = 100000;
+  std::string temp(lib_path);
+  native_lib_path = temp;
+  std::string newtemp(model_path);
+  data_folder_path = newtemp;
+
+  // TODO: Below vars are using in preprocessInputSd
+  // May need to be set from the configuration from MLC. Hardcoded for now.
+  num_steps = 20;
+  seed = 633994880;
+  guidance_scale = 7.5;
+
+  mlperf_data_t input;
+  input.type = mlperf_data_t::Int32;
+  input.size = 77 * 1;  // tokenized inputs 77 numbers
+  inputFormat_.push_back(input);
+
+  mlperf_data_t output;
+  output.type = mlperf_data_t::Uint8;
+  output.size = 512 * 512 * 3;
+  outputFormat_.push_back(output);
+
+  sd_pipeline = new QnnApiHelpers();
+
+  if (0 != sd_pipeline->Init(data_folder_path, native_lib_path, 768, 77, 1.0,
+                             512, 512, 3.0, use_mmap,
+                             context_bin_mmap_read_budget)) {
+    LOG(FATAL) << "Initialization Failure";
+  }
+#endif
+}
+
+bool QTIBackendHelper::preprocessInputSd(void *data) {
+#ifdef STABLEDIFFUSION_FLAG
+  int32_t *input_prompt_ids = (int32_t *)data;
+  std::vector<float32_t> noise = get_normal(64 * 64 * 4, seed);
+  return sd_pipeline->PreProcessInput(input_prompt_ids, noise, num_steps,
+                                      guidance_scale);
+#else
+  return false;
+#endif
+}
+
+bool QTIBackendHelper::executeSd() {
+#ifdef STABLEDIFFUSION_FLAG
+  for (int stepIdx = 0; stepIdx < num_steps; stepIdx++) {
+    bool runVAE = ((stepIdx + 1) == num_steps);
+    if (true != sd_pipeline->RunInference(runVAE)) {
+      LOG(FATAL) << "RunInference failure";
+      return false;
+    }
+  }
+  return true;
+#else
+  return false;
+#endif
+}
+
+bool QTIBackendHelper::getOutputSd(void **data) {
+#ifdef STABLEDIFFUSION_FLAG
+  JniHelpers::InferenceReturn inferenceReturn;
+  if (true != sd_pipeline->PostProcessOutput(false, false, inferenceReturn)) {
+    LOG(FATAL) << "PostProcessOutput failure";
+    return false;
+  }
+  *data = inferenceReturn.m_ImageData;
+
+  // delete sd_pipeline;
+  // sd_pipeline = new QnnApiHelpers();
+  return true;
+#else
+  return false;
+#endif
+}
+
+void QTIBackendHelper::deinitSd() {
+#ifdef STABLEDIFFUSION_FLAG
+  bool use_mmap = false;  // we don't want to use cached
+  uint64_t context_bin_mmap_read_budget = 100000;
+  /*if (0 != sd_pipeline->Init(data_folder_path, native_lib_path,
+                     768, 77, 1.0,
+                     512, 512, 3.0,
+                     use_mmap, context_bin_mmap_read_budget)) {
+                     LOG(FATAL) << "Initialization Failure";
+                     }
+*/
+  delete sd_pipeline;
+  sd_pipeline = nullptr;
+#endif
+}
\ No newline at end of file
diff --git a/mobile_back_qti/cpp/backend_qti/qti_backend_helper.h b/mobile_back_qti/cpp/backend_qti/qti_backend_helper.h
index bc5c45893..736dc85ab 100644
--- a/mobile_back_qti/cpp/backend_qti/qti_backend_helper.h
+++ b/mobile_back_qti/cpp/backend_qti/qti_backend_helper.h
@@ -25,6 +25,10 @@ limitations under the License.
 #include "flutter/cpp/c/backend_c.h"
 #include "flutter/cpp/c/type.h"
 
+#ifdef STABLEDIFFUSION_FLAG
+#include "StableDiffusionShared/include/QnnApiHelpers.hpp"
+#endif
+
 class snpe_handler {
  public:
   Snpe_SNPE_Handle_t snpeHandle;
@@ -63,10 +67,16 @@ class QTIBackendHelper {
   const char *name_ = "snpe";
   const char *acceleratorName_;
   std::string snpeOutputLayers_;
+  std::string snpeOutputTensors_;
   std::vector<mlperf_data_t> inputFormat_;
   std::vector<mlperf_data_t> outputFormat_;
   std::unique_ptr<psnpe_handler> psnpe_;
   std::unique_ptr<snpe_handler> snpe_;
+#ifdef STABLEDIFFUSION_FLAG
+  QnnApiHelpers *sd_pipeline;
+#else
+  void *sd_pipeline;
+#endif
   Snpe_UserBufferList_Handle_t inputMapListHandle_, outputMapListHandle_;
   Snpe_UserMemoryMap_Handle_t userMemoryMappedBufferMapHandle_;
   std::vector<
@@ -95,6 +105,7 @@ class QTIBackendHelper {
   bool useIonBuffers_ = true;
   bool useCpuInt8_ = false;
   bool isIonRegistered;
+  bool isStableDiffusion = false;
 
   /* exposed functions */
   void use_psnpe(const char *model_path);
@@ -106,6 +117,18 @@ class QTIBackendHelper {
   void set_runtime_config();
   std::string get_snpe_version();
 
+  void initSd(const char *model_path, const char *native_lib_path);
+  bool preprocessInputSd(void *data);
+  bool executeSd();
+  void deinitSd();
+  bool getOutputSd(void **data);
+
+  int num_steps;
+  int seed;
+  float guidance_scale;
+  std::string native_lib_path;
+  std::string data_folder_path;
+
   static bool IsRuntimeAvailable(const snpe_runtimes_t delegate);
 
   QTIBackendHelper()
@@ -117,7 +140,8 @@ class QTIBackendHelper {
         inputMapListHandle_(Snpe_UserBufferList_Create()),
         outputMapListHandle_(Snpe_UserBufferList_Create()),
         snpe_(new snpe_handler()),
-        psnpe_(new psnpe_handler()) {
+        psnpe_(new psnpe_handler()),
+        sd_pipeline(nullptr) {
     odLayerMap[0] = "detection_boxes:0";
     odLayerMap[1] = "Postprocessor/BatchMultiClassNonMaxSuppression_classes";
     odLayerMap[2] = "detection_scores:0";
@@ -125,15 +149,23 @@ class QTIBackendHelper {
         "Postprocessor/BatchMultiClassNonMaxSuppression_num_detections";
     userMemoryMappedBufferMapHandle_ = Snpe_UserMemoryMap_Create();
     isIonRegistered = false;
+
+    num_steps = 20;
+    seed = 0;
+    guidance_scale = 7.5;
   }
 
   ~QTIBackendHelper() {
-    Snpe_RuntimeList_Delete(inputRuntimeListHandle);
-    Snpe_RuntimeList_Delete(dummyInputRuntimeListHandle);
-    Snpe_StringList_Delete(networkInputTensorNamesHandle_);
-    Snpe_StringList_Delete(networkOutputTensorNamesHandle_);
-    Snpe_UserBufferList_Delete(inputMapListHandle_);
-    Snpe_UserBufferList_Delete(outputMapListHandle_);
+    if (isStableDiffusion) {
+      deinitSd();
+    } else {
+      Snpe_RuntimeList_Delete(inputRuntimeListHandle);
+      Snpe_RuntimeList_Delete(dummyInputRuntimeListHandle);
+      Snpe_StringList_Delete(networkInputTensorNamesHandle_);
+      Snpe_StringList_Delete(networkOutputTensorNamesHandle_);
+      Snpe_UserBufferList_Delete(inputMapListHandle_);
+      Snpe_UserBufferList_Delete(outputMapListHandle_);
+    }
   }
 };
 
diff --git a/mobile_back_qti/cpp/backend_qti/qti_c.cc b/mobile_back_qti/cpp/backend_qti/qti_c.cc
index a6d37acaa..c9c18906c 100644
--- a/mobile_back_qti/cpp/backend_qti/qti_c.cc
+++ b/mobile_back_qti/cpp/backend_qti/qti_c.cc
@@ -44,18 +44,8 @@ bool useIonBuffer_g;
 extern "C" {
 #endif  // __cplusplus
 
-// Should return true if current hardware is supported.
-bool mlperf_backend_matches_hardware(const char **not_allowed_message,
-                                     const char **settings,
-                                     const mlperf_device_info_t *device_info) {
-  if (device_info && device_info->model && device_info->manufacturer) {
-    LOG(INFO) << "QTI HW supported check: model: " << device_info->model
-              << ", manufacturer: " << device_info->manufacturer;
-  }
-
-  std::ifstream in_file;
+bool set_system_paths(const char *native_lib_path) {
 #ifdef __ANDROID__
-  const char *native_lib_path = device_info->native_lib_path;
   std::stringstream adsp_lib_path;
   adsp_lib_path << native_lib_path << ";";
   adsp_lib_path << "/system/lib/rfsa/adsp;/system/vendor/lib/rfsa/adsp;/dsp";
@@ -68,6 +58,21 @@ bool mlperf_backend_matches_hardware(const char **not_allowed_message,
   setenv("LD_LIBRARY_PATH", ld_lib_path.str().c_str(), 1 /*override*/);
 #endif
 
+  return false;
+}
+
+// Should return true if current hardware is supported.
+bool mlperf_backend_matches_hardware(const char **not_allowed_message,
+                                     const char **settings,
+                                     const mlperf_device_info_t *device_info) {
+  if (device_info && device_info->model && device_info->manufacturer) {
+    LOG(INFO) << "QTI HW supported check: model: " << device_info->model
+              << ", manufacturer: " << device_info->manufacturer;
+  }
+
+  std::ifstream in_file;
+  set_system_paths(device_info->native_lib_path);
+
   *not_allowed_message = nullptr;
   bool isQSoC = Socs::isSnapDragon(device_info->manufacturer);
   LOG(INFO) << "Is QTI SOC: " << isQSoC;
@@ -117,14 +122,8 @@ mlperf_backend_ptr_t mlperf_backend_create(
 
   // use lowLatency cores for all snpe models
   CpuCtrl::lowLatency();
+  set_system_paths(native_lib_path);
 
-#ifdef __ANDROID__
-  std::stringstream adsp_lib_path;
-  adsp_lib_path << native_lib_path << ";";
-  adsp_lib_path << "/system/lib/rfsa/adsp;/system/vendor/lib/rfsa/adsp;/dsp";
-  LOG(INFO) << "lib_path: " << adsp_lib_path.str();
-  setenv("ADSP_LIBRARY_PATH", adsp_lib_path.str().c_str(), 1 /*override*/);
-#endif
   std::string snpe_version = xverstr(SNPE_VERSION_STRING);
   if (snpe_version.compare("default") != 0) {
     int dotPosition = snpe_version.find_last_of(".");
@@ -137,23 +136,29 @@ mlperf_backend_ptr_t mlperf_backend_create(
   }
   LOG(INFO) << "snpe_version: " << snpe_version;
 
-  // set runtime config
-  backend_data->set_runtime_config();
-  // Use PSNPE or SNPE
-  if (backend_data->useSnpe_) {
-    backend_data->use_snpe(model_path);
+  // Stable Diffusion initialization
+  if (backend_data->isStableDiffusion) {
+    backend_data->initSd(model_path, native_lib_path);
+
+    LOG(INFO) << "StableDiffusion build completed successfully";
   } else {
-    backend_data->use_psnpe(model_path);
+    // set runtime config
+    backend_data->set_runtime_config();
+    // Use PSNPE or SNPE
+    if (backend_data->useSnpe_) {
+      backend_data->use_snpe(model_path);
+    } else {
+      backend_data->use_psnpe(model_path);
+    }
+
+    backend_data->queryCount_ = 0;
+
+    backend_data->get_data_formats();
+    backend_data->map_inputs();
+    backend_data->map_outputs();
+
+    LOG(INFO) << "SNPE build completed successfully";
   }
-
-  backend_data->queryCount_ = 0;
-
-  backend_data->get_data_formats();
-  backend_data->map_inputs();
-  backend_data->map_outputs();
-
-  LOG(INFO) << "SNPE build completed successfully";
-
   return backend_data;
 }
 
@@ -185,6 +190,9 @@ void mlperf_backend_delete(mlperf_backend_ptr_t backend_ptr) {
   if (backend_data->isTflite_) {
     tflite_backend_delete(backend_data->tfliteBackend_);
   }
+  if (backend_data->isStableDiffusion) {
+    backend_data->deinitSd();
+  }
   delete backend_data;
   backend_data_ = nullptr;
 }
@@ -201,7 +209,15 @@ mlperf_status_t mlperf_backend_issue_query(mlperf_backend_ptr_t backend_ptr) {
     return tflite_backend_issue_query(backend_data->tfliteBackend_);
   }
 
-  ret = backend_data->execute();
+  if (backend_data->isStableDiffusion) {
+    if (backend_data->executeSd()) {
+      ret = MLPERF_SUCCESS;
+    } else {
+      ret = MLPERF_FAILURE;
+    }
+  } else {
+    ret = backend_data->execute();
+  }
 
 #ifdef DEBUG_FLAG
   auto end = high_resolution_clock::now();
@@ -250,6 +266,15 @@ mlperf_status_t mlperf_backend_set_input(mlperf_backend_ptr_t backend_ptr,
     return tflite_backend_set_input(backend_data->tfliteBackend_, batchIndex, i,
                                     data);
   }
+
+  if (backend_data->isStableDiffusion) {
+    if (backend_data->preprocessInputSd(data)) {
+      return MLPERF_SUCCESS;
+    } else {
+      return MLPERF_FAILURE;
+    }
+  }
+
   void *batchedDataPtr = ((backend_data->useIonBuffers_ == false) &&
                           (backend_data->inputBatch_ <= 1))
                              ? data
@@ -304,13 +329,28 @@ mlperf_status_t mlperf_backend_get_output(mlperf_backend_ptr_t backend_ptr,
     return tflite_backend_get_output(backend_data->tfliteBackend_, batchIndex,
                                      outputIndex, data);
   }
-  if (backend_data->snpeOutputLayers_ ==
-      "Postprocessor/BatchMultiClassNonMaxSuppression") {
+
+  if (backend_data->isStableDiffusion) {
+    if (backend_data->getOutputSd(data)) {
+      return MLPERF_SUCCESS;
+    } else {
+      *data = nullptr;
+      return MLPERF_FAILURE;
+    }
+  }
+
+  if (backend_data->snpeOutputTensors_.find(
+          "Postprocessor/BatchMultiClassNonMaxSuppression_classes") !=
+          std::string::npos ||
+      backend_data->snpeOutputLayers_ ==
+          "Postprocessor/BatchMultiClassNonMaxSuppression") {
     // Reorder snpeOutputLayers_ for coco process_output
     const char *outputLayerName = backend_data->odLayerMap[outputIndex].c_str();
     *data = backend_data->bufs_[batchIndex].at(outputLayerName).data();
     return MLPERF_SUCCESS;
-  } else if (backend_data->snpeOutputLayers_ == "transpose") {
+  } else if (backend_data->snpeOutputTensors_.find("transpose:0") !=
+                 std::string::npos ||
+             backend_data->snpeOutputLayers_ == "transpose") {
     *data = backend_data->bufs_[int(batchIndex / backend_data->inputBatch_)]
                 .at(Snpe_StringList_At(
                     backend_data->networkOutputTensorNamesHandle_, 0))
diff --git a/mobile_back_qti/cpp/backend_qti/qti_settings.h b/mobile_back_qti/cpp/backend_qti/qti_settings.h
index ef88863fd..a052aa15e 100644
--- a/mobile_back_qti/cpp/backend_qti/qti_settings.h
+++ b/mobile_back_qti/cpp/backend_qti/qti_settings.h
@@ -47,6 +47,7 @@ const std::string empty_settings = "";
 #include INCLUDE_SETTINGS(default_dsp)
 #include INCLUDE_SETTINGS(default_cpu)
 #include INCLUDE_SETTINGS(default_gpu)
+#include INCLUDE_SETTINGS(stablediffusion)
 
 STRING_SETTINGS(sd7g1)
 STRING_SETTINGS(sd7pg2)
@@ -65,5 +66,6 @@ STRING_SETTINGS(sm7550)
 STRING_SETTINGS(default_dsp)
 STRING_SETTINGS(default_cpu)
 STRING_SETTINGS(default_gpu)
+STRING_SETTINGS(stablediffusion)
 
 #endif
diff --git a/mobile_back_qti/cpp/backend_qti/rpcmem.cc b/mobile_back_qti/cpp/backend_qti/rpcmem.cc
index 3daf45440..dbaf4b22b 100644
--- a/mobile_back_qti/cpp/backend_qti/rpcmem.cc
+++ b/mobile_back_qti/cpp/backend_qti/rpcmem.cc
@@ -31,7 +31,6 @@ RpcMem::RpcMem() {
     libHandle_ = dlopen("libcdsprpc.so", RTLD_NOW);
 #else
     std::string windowsRpcPath = Socs::getServiceBinaryPath(L"qcnspmcdm");
-    ;
     std::string windowsLibName = "libcdsprpc.dll";
     windowsRpcPath = windowsRpcPath + '\\' + windowsLibName;
     libHandle_ = LoadLibrary(windowsRpcPath.c_str());
@@ -55,6 +54,7 @@ RpcMem::RpcMem() {
     rpcmemFree_ = reinterpret_cast<RpcMemFreePtr>(
         (void *)(intptr_t)GetProcAddress((HINSTANCE)libHandle_, "rpcmem_free"));
 #endif
+
     if (rpcmemAlloc_ && rpcmemFree_) {
       isSuccess_ = true;
     } else {
diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_default_cpu.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_default_cpu.pbtxt
index b22113d1b..5c4a508cd 100644
--- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_default_cpu.pbtxt
+++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_default_cpu.pbtxt
@@ -19,26 +19,6 @@ common_setting {
   }
 }
 
-benchmark_setting {
-  benchmark_id: "image_classification"
-  framework: "SNPE"
-  custom_setting {
-    id: "use_ion_buffer"
-    value: "false"
-  }
-  delegate_choice: {
-    priority: 1
-    delegate_name: "SNPE_CPU"
-    accelerator_name: "snpe_cpu"
-    accelerator_desc: "CPU"
-    model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_edgetpu_224_1.0_htp.dlc"
-      model_checksum: "cdf1fe622b309f692e05781661248a2b"
-    }
-  }
-  delegate_selected: "SNPE_CPU"
-}
-
 benchmark_setting {
   benchmark_id: "image_classification_v2"
   framework: "SNPE"
@@ -56,39 +36,14 @@ benchmark_setting {
     accelerator_name: "snpe_cpu"
     accelerator_desc: "CPU"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_v4_htp.dlc"
-      model_checksum: "dbab3e231e5f83aabc80d5b69e6dad32"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp.dlc"
+      model_checksum: "56e5039260e20e5c2a0b54cc0fac8098"
     }
   }
   single_stream_expected_latency_ns: 250000
   delegate_selected: "SNPE_CPU"
 }
 
-benchmark_setting {
-  benchmark_id: "image_classification_offline"
-  framework: "SNPE"
-  custom_setting {
-    id: "use_ion_buffer"
-    value: "false"
-  }
-  custom_setting {
-    id: "scenario"
-    value: "Offline"
-  }
-  delegate_choice: {
-    priority: 1
-    delegate_name: "SNPE_CPU"
-    accelerator_name: "psnpe_cpu"
-    accelerator_desc: "CPU"
-    batch_size: 128
-    model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_edgetpu_224_1.0_htp_batched_4.dlc"
-      model_checksum: "6523060565b8d3f326f3f323c531fc1c"
-    }
-  }
-  delegate_selected: "SNPE_CPU"
-}
-
 benchmark_setting {
   benchmark_id: "image_classification_offline_v2"
   framework: "SNPE"
@@ -115,8 +70,8 @@ benchmark_setting {
     accelerator_desc: "CPU"
     batch_size: 12360
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_v4_htp_batched_4.dlc"
-      model_checksum: "0de3b75022ce5c27d5902a080ec1cea0"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp_batched_4.dlc"
+      model_checksum: "7863deea588936fe6e09565ed47dde95"
     }
   }
   delegate_selected: "SNPE_CPU"
@@ -126,8 +81,8 @@ benchmark_setting {
   benchmark_id: "object_detection"
   framework: "SNPE"
   custom_setting {
-    id: "snpe_output_layers"
-    value: "Postprocessor/BatchMultiClassNonMaxSuppression"
+    id: "snpe_output_tensors"
+    value: "Postprocessor/BatchMultiClassNonMaxSuppression_classes,Postprocessor/BatchMultiClassNonMaxSuppression_num_detections,detection_scores:0,detection_boxes:0"
   }
   custom_setting {
     id: "use_ion_buffer"
@@ -139,8 +94,8 @@ benchmark_setting {
     accelerator_name: "snpe_cpu"
     accelerator_desc: "CPU"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/ssd_mobiledet_qat_htp.dlc"
-      model_checksum: "c333fc135a8c474679d716fe391a9e2a"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/ssd_mobiledet_qat_htp.dlc"
+      model_checksum: "49c6afbfefffb78269fe73a6ee1b4a85"
     }
   }
   delegate_selected: "SNPE_CPU"
@@ -150,8 +105,8 @@ benchmark_setting {
   benchmark_id: "natural_language_processing"
   framework: "SNPE"
   custom_setting {
-    id: "snpe_output_layers"
-    value: "transpose"
+    id: "snpe_output_tensors"
+    value: "transpose:0"
   }
   custom_setting {
     id: "input_buffer_type"
@@ -167,8 +122,8 @@ benchmark_setting {
     accelerator_name: "snpe_cpu"
     accelerator_desc: "CPU"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilebert_quantized_htp.dlc"
-      model_checksum: "7a641e4df84fc06a1237b7fe1b1c5b08"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilebert_quantized_htp.dlc"
+      model_checksum: "96d947175f04950898a372890907dda1"
     }
   }
   delegate_selected: "SNPE_CPU"
@@ -195,8 +150,8 @@ benchmark_setting {
     accelerator_name: "snpe_cpu"
     accelerator_desc: "CPU"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobile_mosaic_htp.dlc"
-      model_checksum: "e870526444c1e48df4f0505e530ecfdf"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobile_mosaic_htp.dlc"
+      model_checksum: "3c0dfbacda053773d6afb34503d9991a"
     }
   }
   delegate_selected: "SNPE_CPU"
@@ -223,8 +178,8 @@ benchmark_setting {
     accelerator_name: "snpe_cpu"
     accelerator_desc: "CPU"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/snusr_htp.dlc"
-      model_checksum: "84ef0d9c2e7b710381cea962a22a0b41"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/snusr_htp.dlc"
+      model_checksum: "668da9816073d67972704e237137a50f"
     }
   }
   delegate_selected: "SNPE_CPU"
diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_default_dsp.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_default_dsp.pbtxt
index d32678782..b0eb1069f 100644
--- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_default_dsp.pbtxt
+++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_default_dsp.pbtxt
@@ -60,37 +60,8 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "HTP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_v4_htp.dlc"
-      model_checksum: "dbab3e231e5f83aabc80d5b69e6dad32"
-    }
-  }
-  delegate_selected: "SNPE_DSP"
-}
-
-benchmark_setting {
-  benchmark_id: "image_classification_offline"
-  framework: "SNPE"
-  custom_setting {
-    id: "scenario"
-    value: "Offline"
-  }
-  custom_setting {
-    id: "bg_load"
-    value: "false"
-  }
-  custom_setting {
-    id: "use_ion_buffer"
-    value: "false"
-  }
-  delegate_choice: {
-    priority: 1
-    delegate_name: "SNPE_DSP"
-    accelerator_name: "psnpe_dsp"
-    accelerator_desc: "HTP"
-    batch_size: 12288
-    model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_edgetpu_224_1.0_htp_batched_4.dlc"
-      model_checksum: "6523060565b8d3f326f3f323c531fc1c"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp.dlc"
+      model_checksum: "56e5039260e20e5c2a0b54cc0fac8098"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -118,8 +89,8 @@ benchmark_setting {
     accelerator_desc: "HTP"
     batch_size: 12288
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_v4_htp_batched_4.dlc"
-      model_checksum: "0de3b75022ce5c27d5902a080ec1cea0"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp_batched_4.dlc"
+      model_checksum: "7863deea588936fe6e09565ed47dde95"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -129,8 +100,8 @@ benchmark_setting {
   benchmark_id: "object_detection"
   framework: "SNPE"
   custom_setting {
-    id: "snpe_output_layers"
-    value: "Postprocessor/BatchMultiClassNonMaxSuppression"
+    id: "snpe_output_tensors"
+    value: "Postprocessor/BatchMultiClassNonMaxSuppression_classes,Postprocessor/BatchMultiClassNonMaxSuppression_num_detections,detection_scores:0,detection_boxes:0"
   }
   custom_setting {
     id: "bg_load"
@@ -146,8 +117,8 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "HTP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/ssd_mobiledet_qat_htp.dlc"
-      model_checksum: "c333fc135a8c474679d716fe391a9e2a"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/ssd_mobiledet_qat_htp.dlc"
+      model_checksum: "49c6afbfefffb78269fe73a6ee1b4a85"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -157,8 +128,8 @@ benchmark_setting {
   benchmark_id: "natural_language_processing"
   framework: "SNPE"
   custom_setting {
-    id: "snpe_output_layers"
-    value: "transpose"
+    id: "snpe_output_tensors"
+    value: "transpose:0"
   }
   custom_setting {
     id: "bg_load"
@@ -182,8 +153,8 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "DSP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilebert_quantized_htp.dlc"
-      model_checksum: "7a641e4df84fc06a1237b7fe1b1c5b08"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilebert_quantized_htp.dlc"
+      model_checksum: "96d947175f04950898a372890907dda1"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -214,8 +185,8 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "DSP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobile_mosaic_htp.dlc"
-      model_checksum: "e870526444c1e48df4f0505e530ecfdf"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobile_mosaic_htp.dlc"
+      model_checksum: "3c0dfbacda053773d6afb34503d9991a"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -250,8 +221,8 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "DSP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/snusr_htp.dlc"
-      model_checksum: "84ef0d9c2e7b710381cea962a22a0b41"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/snusr_htp.dlc"
+      model_checksum: "668da9816073d67972704e237137a50f"
     }
   }
   delegate_selected: "SNPE_DSP"
diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_default_gpu.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_default_gpu.pbtxt
index d5226818a..7ad7a249a 100644
--- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_default_gpu.pbtxt
+++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_default_gpu.pbtxt
@@ -19,36 +19,6 @@ common_setting {
   }
 }
 
-benchmark_setting {
-  benchmark_id: "image_classification"
-  framework: "SNPE"
-  custom_setting {
-    id: "use_ion_buffer"
-    value: "false"
-  }
-  delegate_choice: {
-    priority: 1
-    delegate_name: "SNPE_GPU"
-    accelerator_name: "snpe_gpu"
-    accelerator_desc: "GPU"
-    model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_edgetpu_224_1.0_htp.dlc"
-      model_checksum: "cdf1fe622b309f692e05781661248a2b"
-    }
-  }
-  delegate_choice: {
-    priority: 2
-    delegate_name: "SNPE_CPU"
-    accelerator_name: "snpe_cpu"
-    accelerator_desc: "CPU"
-    model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_edgetpu_224_1.0_htp.dlc"
-      model_checksum: "cdf1fe622b309f692e05781661248a2b"
-    }
-  }
-  delegate_selected: "SNPE_GPU"
-}
-
 benchmark_setting {
   benchmark_id: "image_classification_v2"
   framework: "SNPE"
@@ -62,8 +32,8 @@ benchmark_setting {
     accelerator_name: "snpe_gpu"
     accelerator_desc: "GPU"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_v4_htp.dlc"
-      model_checksum: "dbab3e231e5f83aabc80d5b69e6dad32"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp.dlc"
+      model_checksum: "56e5039260e20e5c2a0b54cc0fac8098"
     }
   }
   delegate_choice: {
@@ -73,44 +43,8 @@ benchmark_setting {
     accelerator_desc: "CPU"
     batch_size: 128
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_v4_htp.dlc"
-      model_checksum: "dbab3e231e5f83aabc80d5b69e6dad32"
-    }
-  }
-  delegate_selected: "SNPE_GPU"
-}
-
-benchmark_setting {
-  benchmark_id: "image_classification_offline"
-  framework: "SNPE"
-  custom_setting {
-    id: "use_ion_buffer"
-    value: "false"
-  }
-  custom_setting {
-    id: "scenario"
-    value: "Offline"
-  }
-  delegate_choice: {
-    priority: 1
-    delegate_name: "SNPE_GPU"
-    accelerator_name: "psnpe_gpu"
-    accelerator_desc: "GPU"
-    batch_size: 128
-    model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_edgetpu_224_1.0_htp_batched_4.dlc"
-      model_checksum: "6523060565b8d3f326f3f323c531fc1c"
-    }
-  }
-  delegate_choice: {
-    priority: 2
-    delegate_name: "SNPE_CPU"
-    accelerator_name: "psnpe_cpu"
-    accelerator_desc: "CPU"
-    batch_size: 128
-    model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_edgetpu_224_1.0_htp_batched_4.dlc"
-      model_checksum: "6523060565b8d3f326f3f323c531fc1c"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp.dlc"
+      model_checksum: "56e5039260e20e5c2a0b54cc0fac8098"
     }
   }
   delegate_selected: "SNPE_GPU"
@@ -134,8 +68,8 @@ benchmark_setting {
     accelerator_desc: "GPU"
     batch_size: 12360
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_v4_htp_batched_4.dlc"
-      model_checksum: "0de3b75022ce5c27d5902a080ec1cea0"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp_batched_4.dlc"
+      model_checksum: "7863deea588936fe6e09565ed47dde95"
     }
   }
   delegate_choice: {
@@ -145,8 +79,8 @@ benchmark_setting {
     accelerator_desc: "CPU"
     batch_size: 128
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_v4_htp_batched_4.dlc"
-      model_checksum: "0de3b75022ce5c27d5902a080ec1cea0"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp_batched_4.dlc"
+      model_checksum: "7863deea588936fe6e09565ed47dde95"
     }
   }
   delegate_selected: "SNPE_GPU"
@@ -156,8 +90,8 @@ benchmark_setting {
   benchmark_id: "object_detection"
   framework: "SNPE"
   custom_setting {
-    id: "snpe_output_layers"
-    value: "Postprocessor/BatchMultiClassNonMaxSuppression"
+    id: "snpe_output_tensors"
+    value: "Postprocessor/BatchMultiClassNonMaxSuppression_classes,Postprocessor/BatchMultiClassNonMaxSuppression_num_detections,detection_scores:0,detection_boxes:0"
   }
   custom_setting {
     id: "use_ion_buffer"
@@ -169,8 +103,8 @@ benchmark_setting {
     accelerator_name: "snpe_gpu"
     accelerator_desc: "GPU"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/ssd_mobiledet_qat_htp.dlc"
-      model_checksum: "c333fc135a8c474679d716fe391a9e2a"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/ssd_mobiledet_qat_htp.dlc"
+      model_checksum: "49c6afbfefffb78269fe73a6ee1b4a85"
     }
   }
   delegate_choice: {
@@ -179,8 +113,8 @@ benchmark_setting {
     accelerator_name: "snpe_cpu"
     accelerator_desc: "CPU"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/ssd_mobiledet_qat_htp.dlc"
-      model_checksum: "c333fc135a8c474679d716fe391a9e2a"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/ssd_mobiledet_qat_htp.dlc"
+      model_checksum: "49c6afbfefffb78269fe73a6ee1b4a85"
     }
   }
   delegate_selected: "SNPE_GPU"
@@ -190,8 +124,8 @@ benchmark_setting {
   benchmark_id: "natural_language_processing"
   framework: "SNPE"
   custom_setting {
-    id: "snpe_output_layers"
-    value: "transpose"
+    id: "snpe_output_tensors"
+    value: "transpose:0"
   }
   custom_setting {
     id: "input_buffer_type"
@@ -207,8 +141,8 @@ benchmark_setting {
     accelerator_name: "snpe_gpu"
     accelerator_desc: "GPU"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilebert_quantized_htp.dlc"
-      model_checksum: "7a641e4df84fc06a1237b7fe1b1c5b08"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilebert_quantized_htp.dlc"
+      model_checksum: "96d947175f04950898a372890907dda1"
     }
   }
   delegate_choice: {
@@ -217,8 +151,8 @@ benchmark_setting {
     accelerator_name: "snpe_cpu"
     accelerator_desc: "CPU"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilebert_quantized_htp.dlc"
-      model_checksum: "7a641e4df84fc06a1237b7fe1b1c5b08"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilebert_quantized_htp.dlc"
+      model_checksum: "96d947175f04950898a372890907dda1"
     }
   }
   delegate_selected: "SNPE_GPU"
@@ -245,8 +179,8 @@ benchmark_setting {
     accelerator_name: "snpe_gpu"
     accelerator_desc: "GPU"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobile_mosaic_htp.dlc"
-      model_checksum: "e870526444c1e48df4f0505e530ecfdf"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobile_mosaic_htp.dlc"
+      model_checksum: "3c0dfbacda053773d6afb34503d9991a"
     }
   }
   delegate_choice: {
@@ -255,8 +189,8 @@ benchmark_setting {
     accelerator_name: "snpe_cpu"
     accelerator_desc: "CPU"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobile_mosaic_htp.dlc"
-      model_checksum: "e870526444c1e48df4f0505e530ecfdf"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobile_mosaic_htp.dlc"
+      model_checksum: "3c0dfbacda053773d6afb34503d9991a"
     }
   }
   delegate_selected: "SNPE_GPU"
@@ -283,8 +217,8 @@ benchmark_setting {
     accelerator_name: "snpe_gpu"
     accelerator_desc: "GPU"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/snusr_htp.dlc"
-      model_checksum: "84ef0d9c2e7b710381cea962a22a0b41"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/snusr_htp.dlc"
+      model_checksum: "668da9816073d67972704e237137a50f"
     }
   }
   delegate_choice: {
@@ -293,9 +227,9 @@ benchmark_setting {
     accelerator_name: "snpe_cpu"
     accelerator_desc: "CPU"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/snusr_htp.dlc"
-      model_checksum: "84ef0d9c2e7b710381cea962a22a0b41"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/snusr_htp.dlc"
+      model_checksum: "668da9816073d67972704e237137a50f"
     }
   }
   delegate_selected: "SNPE_GPU"
-}
\ No newline at end of file
+}
diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_gpufp16.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_gpufp16.pbtxt
index 2e5eca9b3..da75dd996 100644
--- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_gpufp16.pbtxt
+++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_gpufp16.pbtxt
@@ -20,7 +20,7 @@ common_setting {
 }
 
 benchmark_setting {
-  benchmark_id: "image_classification"
+  benchmark_id: "image_classification_v2"
   framework: "SNPE"
   custom_setting {
     id: "bg_load"
@@ -32,58 +32,14 @@ benchmark_setting {
     accelerator_name: "snpe_gpu_fp16"
     accelerator_desc: "GPU_FP16"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_edgetpu_224_1.0_htp.dlc"
-      model_checksum: "cdf1fe622b309f692e05781661248a2b"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp.dlc"
+      model_checksum: "56e5039260e20e5c2a0b54cc0fac8098"
     }
   }
   single_stream_expected_latency_ns: 500000
   delegate_selected: "SNPE_GPU_FP16"
 }
 
-benchmark_setting {
-  benchmark_id: "image_classification_v2"
-  framework: "SNPE"
-  custom_setting {
-    id: "bg_load"
-    value: "false"
-  }
-  delegate_choice: {
-    priority: 1
-    delegate_name: "SNPE_GPU_FP16"
-    accelerator_name: "snpe_gpu_fp16
-    accelerator_desc: "GPU_FP16"
-    model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_v4_htp.dlc"
-      model_checksum: "dbab3e231e5f83aabc80d5b69e6dad32"
-    }
-  }
-  single_stream_expected_latency_ns: 500000
-  delegate_selected: "SNPE_GPU_FP16"
-}
-benchmark_setting {
-  benchmark_id: "image_classification_offline"
-  framework: "SNPE"
-  custom_setting {
-    id: "scenario"
-    value: "Offline"
-  }
-  custom_setting {
-    id: "bg_load"
-    value: "false"
-  }
-  delegate_choice: {
-    priority: 1
-    delegate_name: "SNPE_GPU_FP16"
-    accelerator_name: "psnpe_gpu_fp16"
-    accelerator_desc: "GPU_FP16"
-    batch_size: 12288
-    model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_edgetpu_224_1.0_htp_batched_4.dlc"
-      model_checksum: "6523060565b8d3f326f3f323c531fc1c"
-    }
-  }
-  delegate_selected: "SNPE_GPU_FP16"
-}
 benchmark_setting {
   benchmark_id: "image_classification_offline_v2"
   framework: "SNPE"
@@ -102,18 +58,19 @@ benchmark_setting {
     accelerator_desc: "GPU_FP16"
     batch_size: 12360
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_v4_htp_batched_4.dlc"
-      model_checksum: "0de3b75022ce5c27d5902a080ec1cea0"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp_batched_4.dlc"
+      model_checksum: "7863deea588936fe6e09565ed47dde95"
     }
   }
   delegate_selected: "SNPE_GPU_FP16"
 }
+
 benchmark_setting {
   benchmark_id: "natural_language_processing"
   framework: "SNPE"
   custom_setting {
-    id: "snpe_output_layers"
-    value: "transpose"
+    id: "snpe_output_tensors"
+    value: "transpose:0"
   }
   custom_setting {
     id: "bg_load"
@@ -137,12 +94,13 @@ benchmark_setting {
     accelerator_name: "snpe_gpu_fp16"
     accelerator_desc: "GPU_FP16"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilebert_quantized_htp.dlc"
-      model_checksum: "7a641e4df84fc06a1237b7fe1b1c5b08"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilebert_quantized_htp.dlc"
+      model_checksum: "96d947175f04950898a372890907dda1"
     }
   }
   delegate_selected: "SNPE_GPU_FP16"
 }
+
 benchmark_setting {
   benchmark_id: "image_segmentation_v2"
   framework: "SNPE"
@@ -164,8 +122,8 @@ benchmark_setting {
     accelerator_name: "snpe_gpu_fp16"
     accelerator_desc: "GPU_FP16"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobile_mosaic_htp.dlc"
-      model_checksum: "e870526444c1e48df4f0505e530ecfdf"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobile_mosaic_htp.dlc"
+      model_checksum: "3c0dfbacda053773d6afb34503d9991a"
     }
   }
   delegate_selected: "SNPE_GPU_FP16"
@@ -199,9 +157,9 @@ benchmark_setting {
     accelerator_name: "snpe_gpu_fp16"
     accelerator_desc: "GPU_FP16"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/snusr_htp.dlc"
-      model_checksum: "84ef0d9c2e7b710381cea962a22a0b41"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/snusr_htp.dlc"
+      model_checksum: "668da9816073d67972704e237137a50f"
     }
   }
   delegate_selected: "SNPE_GPU_FP16"
-}
\ No newline at end of file
+}
diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7cxg3.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7cxg3.pbtxt
index 7d1f75070..e217f39a9 100644
--- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7cxg3.pbtxt
+++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7cxg3.pbtxt
@@ -19,34 +19,6 @@ common_setting {
   }
 }
 
-benchmark_setting {
-  benchmark_id: "image_classification"
-  framework: "SNPE"
-  custom_setting {
-    id: "bg_load"
-    value: "false"
-  }
-  custom_setting {
-    id: "output_buffer_type"
-    value: "uint_8"
-  }
-  custom_setting {
-    id: "use_ion_buffer"
-    value: "false"
-  }
-  delegate_choice: {
-    priority: 1
-    delegate_name: "SNPE_DSP"
-    accelerator_name: "snpe_dsp"
-    accelerator_desc: "HTP"
-    model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_edgetpu_224_1.0_htp.dlc"
-      model_checksum: "cdf1fe622b309f692e05781661248a2b"
-    }
-  }
-  delegate_selected: "SNPE_DSP"
-}
-
 benchmark_setting {
   benchmark_id: "image_classification_v2"
   framework: "SNPE"
@@ -68,47 +40,14 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "HTP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_v4_htp.dlc"
-      model_checksum: "dbab3e231e5f83aabc80d5b69e6dad32"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp.dlc"
+      model_checksum: "56e5039260e20e5c2a0b54cc0fac8098"
     }
   }
   single_stream_expected_latency_ns: 250000
   delegate_selected: "SNPE_DSP"
 }
 
-benchmark_setting {
-  benchmark_id: "image_classification_offline"
-  framework: "SNPE"
-  custom_setting {
-    id: "scenario"
-    value: "Offline"
-  }
-  custom_setting {
-    id: "bg_load"
-    value: "false"
-  }
-  custom_setting {
-    id: "use_ion_buffer"
-    value: "false"
-  }
-  custom_setting {
-    id: "output_buffer_type"
-    value: "uint_8"
-  }
-  delegate_choice: {
-    priority: 1
-    delegate_name: "SNPE_DSP"
-    accelerator_name: "psnpe_dsp"
-    accelerator_desc: "HTP"
-    batch_size: 12288
-    model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_edgetpu_224_1.0_htp_batched_8.dlc"
-      model_checksum: "1e09cab7d0d381ef02cfd5ea5b85da92"
-    }
-  }
-  delegate_selected: "SNPE_DSP"
-}
-
 benchmark_setting {
   benchmark_id: "image_classification_offline_v2"
   framework: "SNPE"
@@ -139,19 +78,20 @@ benchmark_setting {
     accelerator_desc: "HTP"
     batch_size: 12288
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_v4_htp_batched_4.dlc"
-      model_checksum: "0de3b75022ce5c27d5902a080ec1cea0"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp_batched_4.dlc"
+      model_checksum: "7863deea588936fe6e09565ed47dde95"
     }
   }
   delegate_selected: "SNPE_DSP"
 }
 
+
 benchmark_setting {
   benchmark_id: "object_detection"
   framework: "SNPE"
   custom_setting {
-    id: "snpe_output_layers"
-    value: "Postprocessor/BatchMultiClassNonMaxSuppression"
+    id: "snpe_output_tensors"
+    value: "Postprocessor/BatchMultiClassNonMaxSuppression_classes,Postprocessor/BatchMultiClassNonMaxSuppression_num_detections,detection_scores:0,detection_boxes:0"
   }
   custom_setting {
     id: "bg_load"
@@ -167,8 +107,8 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "HTP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/ssd_mobiledet_qat_htp.dlc"
-      model_checksum: "c333fc135a8c474679d716fe391a9e2a"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/ssd_mobiledet_qat_htp.dlc"
+      model_checksum: "49c6afbfefffb78269fe73a6ee1b4a85"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -178,8 +118,8 @@ benchmark_setting {
   benchmark_id: "natural_language_processing"
   framework: "SNPE"
   custom_setting {
-    id: "snpe_output_layers"
-    value: "transpose"
+    id: "snpe_output_tensors"
+    value: "transpose:0"
   }
   custom_setting {
     id: "bg_load"
@@ -199,8 +139,8 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "DSP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilebert_quantized_htp.dlc"
-      model_checksum: "7a641e4df84fc06a1237b7fe1b1c5b08"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilebert_quantized_htp.dlc"
+      model_checksum: "96d947175f04950898a372890907dda1"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -231,8 +171,8 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "DSP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobile_mosaic_htp.dlc"
-      model_checksum: "e870526444c1e48df4f0505e530ecfdf"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobile_mosaic_htp.dlc"
+      model_checksum: "3c0dfbacda053773d6afb34503d9991a"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -259,9 +199,9 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "DSP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/snusr_htp.dlc"
-      model_checksum: "84ef0d9c2e7b710381cea962a22a0b41"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm//snusr_htp.dlc"
+      model_checksum: "668da9816073d67972704e237137a50f"
     }
   }
   delegate_selected: "SNPE_DSP"
-}
\ No newline at end of file
+}
diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7g1.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7g1.pbtxt
index 873fd5037..928b6eea0 100644
--- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7g1.pbtxt
+++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7g1.pbtxt
@@ -19,30 +19,6 @@ common_setting {
   }
 }
 
-benchmark_setting {
-  benchmark_id: "image_classification"
-  framework: "SNPE"
-  custom_setting {
-    id: "bg_load"
-    value: "false"
-  }
-  custom_setting {
-    id: "output_buffer_type"
-    value: "uint_8"
-  }
-  delegate_choice: {
-    priority: 1
-    delegate_name: "SNPE_DSP"
-    accelerator_name: "snpe_dsp"
-    accelerator_desc: "HTP"
-    model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_edgetpu_224_1.0_htp.dlc"
-      model_checksum: "cdf1fe622b309f692e05781661248a2b"
-    }
-  }
-  delegate_selected: "SNPE_DSP"
-}
-
 benchmark_setting {
   benchmark_id: "image_classification_v2"
   framework: "SNPE"
@@ -64,41 +40,8 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "HTP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_v4_htp.dlc"
-      model_checksum: "dbab3e231e5f83aabc80d5b69e6dad32"
-    }
-  }
-  delegate_selected: "SNPE_DSP"
-}
-
-benchmark_setting {
-  benchmark_id: "image_classification_offline"
-  framework: "SNPE"
-  custom_setting {
-    id: "scenario"
-    value: "Offline"
-  }
-  custom_setting {
-    id: "bg_load"
-    value: "false"
-  }
-  custom_setting {
-    id: "output_buffer_type"
-    value: "uint_8"
-  }
-  custom_setting {
-    id: "use_ion_buffer"
-    value: "false"
-  }
-  delegate_choice: {
-    priority: 1
-    delegate_name: "SNPE_DSP"
-    accelerator_name: "psnpe_dsp"
-    accelerator_desc: "HTP"
-    batch_size: 12288
-    model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_v4_htp_batched_4.dlc"
-      model_checksum: "0de3b75022ce5c27d5902a080ec1cea0"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp.dlc"
+      model_checksum: "56e5039260e20e5c2a0b54cc0fac8098"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -132,10 +75,10 @@ benchmark_setting {
     delegate_name: "SNPE_DSP"
     accelerator_name: "psnpe_dsp"
     accelerator_desc: "HTP"
-    batch_size: 12360
+    batch_size: 12288
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_edgetpu_224_1.0_htp_batched_4.dlc"
-      model_checksum: "6523060565b8d3f326f3f323c531fc1c"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp_batched_4.dlc"
+      model_checksum: "7863deea588936fe6e09565ed47dde95"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -145,8 +88,8 @@ benchmark_setting {
   benchmark_id: "object_detection"
   framework: "SNPE"
   custom_setting {
-    id: "snpe_output_layers"
-    value: "Postprocessor/BatchMultiClassNonMaxSuppression"
+    id: "snpe_output_tensors"
+    value: "Postprocessor/BatchMultiClassNonMaxSuppression_classes,Postprocessor/BatchMultiClassNonMaxSuppression_num_detections,detection_scores:0,detection_boxes:0"
   }
   custom_setting {
     id: "bg_load"
@@ -162,8 +105,8 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "HTP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/ssd_mobiledet_qat_htp.dlc"
-      model_checksum: "c333fc135a8c474679d716fe391a9e2a"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/ssd_mobiledet_qat_htp.dlc"
+      model_checksum: "49c6afbfefffb78269fe73a6ee1b4a85"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -173,8 +116,8 @@ benchmark_setting {
   benchmark_id: "natural_language_processing"
   framework: "SNPE"
   custom_setting {
-    id: "snpe_output_layers"
-    value: "transpose"
+    id: "snpe_output_tensors"
+    value: "transpose:0"
   }
   custom_setting {
     id: "bg_load"
@@ -198,8 +141,8 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "DSP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilebert_quantized_htp.dlc"
-      model_checksum: "7a641e4df84fc06a1237b7fe1b1c5b08"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilebert_quantized_htp.dlc"
+      model_checksum: "96d947175f04950898a372890907dda1"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -226,8 +169,8 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "DSP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobile_mosaic_htp.dlc"
-      model_checksum: "e870526444c1e48df4f0505e530ecfdf"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobile_mosaic_htp.dlc"
+      model_checksum: "3c0dfbacda053773d6afb34503d9991a"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -258,9 +201,9 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "DSP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/snusr_htp.dlc"
-      model_checksum: "84ef0d9c2e7b710381cea962a22a0b41"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm//snusr_htp.dlc"
+      model_checksum: "668da9816073d67972704e237137a50f"
     }
   }
   delegate_selected: "SNPE_DSP"
-}
\ No newline at end of file
+}
diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7pg2.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7pg2.pbtxt
index 6443f3927..71b9e6d29 100644
--- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7pg2.pbtxt
+++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd7pg2.pbtxt
@@ -19,31 +19,6 @@ common_setting {
   }
 }
 
-benchmark_setting {
-  benchmark_id: "image_classification"
-  framework: "SNPE"
-  custom_setting {
-    id: "bg_load"
-    value: "false"
-  }
-  custom_setting {
-    id: "output_buffer_type"
-    value: "uint_8"
-  }
-  delegate_choice: {
-    priority: 1
-    delegate_name: "SNPE_DSP"
-    accelerator_name: "snpe_dsp"
-    accelerator_desc: "HTP"
-    model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_edgetpu_224_1.0_htp.dlc"
-      model_checksum: "cdf1fe622b309f692e05781661248a2b"
-    }
-  }
-  single_stream_expected_latency_ns: 600000
-  delegate_selected: "SNPE_DSP"
-}
-
 benchmark_setting {
   benchmark_id: "image_classification_v2"
   framework: "SNPE"
@@ -65,47 +40,14 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "HTP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_v4_htp.dlc"
-      model_checksum: "dbab3e231e5f83aabc80d5b69e6dad32"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp.dlc"
+      model_checksum: "56e5039260e20e5c2a0b54cc0fac8098"
     }
   }
   single_stream_expected_latency_ns: 250000
   delegate_selected: "SNPE_DSP"
 }
 
-benchmark_setting {
-  benchmark_id: "image_classification_offline"
-  framework: "SNPE"
-  custom_setting {
-    id: "scenario"
-    value: "Offline"
-  }
-  custom_setting {
-    id: "bg_load"
-    value: "false"
-  }
-  custom_setting {
-    id: "output_buffer_type"
-    value: "uint_8"
-  }
-  custom_setting {
-    id: "use_ion_buffer"
-    value: "false"
-  }
-  delegate_choice: {
-    priority: 1
-    delegate_name: "SNPE_DSP"
-    accelerator_name: "psnpe_dsp"
-    accelerator_desc: "HTP"
-    batch_size: 12288
-    model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_edgetpu_224_1.0_htp_batched_4.dlc"
-      model_checksum: "6523060565b8d3f326f3f323c531fc1c"
-    }
-  }
-  delegate_selected: "SNPE_DSP"
-}
-
 benchmark_setting {
   benchmark_id: "image_classification_offline_v2"
   framework: "SNPE"
@@ -136,8 +78,8 @@ benchmark_setting {
     accelerator_desc: "HTP"
     batch_size: 12360
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_v4_htp_batched_4.dlc"
-      model_checksum: "0de3b75022ce5c27d5902a080ec1cea0"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp_batched_4.dlc"
+      model_checksum: "7863deea588936fe6e09565ed47dde95"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -147,8 +89,8 @@ benchmark_setting {
   benchmark_id: "object_detection"
   framework: "SNPE"
   custom_setting {
-    id: "snpe_output_layers"
-    value: "Postprocessor/BatchMultiClassNonMaxSuppression"
+    id: "snpe_output_tensors"
+    value: "Postprocessor/BatchMultiClassNonMaxSuppression_classes,Postprocessor/BatchMultiClassNonMaxSuppression_num_detections,detection_scores:0,detection_boxes:0"
   }
   custom_setting {
     id: "bg_load"
@@ -164,8 +106,8 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "HTP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/ssd_mobiledet_qat_htp.dlc"
-      model_checksum: "c333fc135a8c474679d716fe391a9e2a"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/ssd_mobiledet_qat_htp.dlc"
+      model_checksum: "49c6afbfefffb78269fe73a6ee1b4a85"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -175,8 +117,8 @@ benchmark_setting {
   benchmark_id: "natural_language_processing"
   framework: "SNPE"
   custom_setting {
-    id: "snpe_output_layers"
-    value: "transpose"
+    id: "snpe_output_tensors"
+    value: "transpose:0"
   }
   custom_setting {
     id: "bg_load"
@@ -200,8 +142,8 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "DSP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilebert_quantized_htp.dlc"
-      model_checksum: "7a641e4df84fc06a1237b7fe1b1c5b08"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilebert_quantized_htp.dlc"
+      model_checksum: "96d947175f04950898a372890907dda1"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -228,8 +170,8 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "DSP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobile_mosaic_htp.dlc"
-      model_checksum: "e870526444c1e48df4f0505e530ecfdf"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobile_mosaic_htp.dlc"
+      model_checksum: "3c0dfbacda053773d6afb34503d9991a"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -260,9 +202,9 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "DSP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/snusr_htp.dlc"
-      model_checksum: "84ef0d9c2e7b710381cea962a22a0b41"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm//snusr_htp.dlc"
+      model_checksum: "668da9816073d67972704e237137a50f"
     }
   }
   delegate_selected: "SNPE_DSP"
-}
\ No newline at end of file
+}
diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8cxg3.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8cxg3.pbtxt
index c3a1aa4d6..3514f8ec1 100644
--- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8cxg3.pbtxt
+++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8cxg3.pbtxt
@@ -19,35 +19,6 @@ common_setting {
   }
 }
 
-benchmark_setting {
-  benchmark_id: "image_classification"
-  framework: "SNPE"
-  custom_setting {
-    id: "bg_load"
-    value: "false"
-  }
-  custom_setting {
-    id: "use_ion_buffer"
-    value: "true"
-  }
-  custom_setting {
-    id: "output_buffer_type"
-    value: "uint_8"
-  }
-  delegate_choice: {
-    priority: 1
-    delegate_name: "SNPE_DSP"
-    accelerator_name: "snpe_dsp"
-    accelerator_desc: "HTP"
-    model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_edgetpu_224_1.0_htp.dlc"
-      model_checksum: "cdf1fe622b309f692e05781661248a2b"
-    }
-  }
-  single_stream_expected_latency_ns: 600000
-  delegate_selected: "SNPE_DSP"
-}
-
 benchmark_setting {
   benchmark_id: "image_classification_v2"
   framework: "SNPE"
@@ -73,47 +44,14 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "HTP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_v4_htp.dlc"
-      model_checksum: "dbab3e231e5f83aabc80d5b69e6dad32"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp.dlc"
+      model_checksum: "56e5039260e20e5c2a0b54cc0fac8098"
     }
   }
   single_stream_expected_latency_ns: 60000
   delegate_selected: "SNPE_DSP"
 }
 
-benchmark_setting {
-  benchmark_id: "image_classification_offline"
-  framework: "SNPE"
-  custom_setting {
-    id: "scenario"
-    value: "Offline"
-  }
-  custom_setting {
-    id: "bg_load"
-    value: "false"
-  }
-  custom_setting {
-    id: "use_ion_buffer"
-    value: "false"
-  }
-  custom_setting {
-    id: "output_buffer_type"
-    value: "uint_8"
-  }
-  delegate_choice: {
-    priority: 1
-    delegate_name: "SNPE_DSP"
-    accelerator_name: "psnpe_dsp"
-    accelerator_desc: "HTP"
-    batch_size: 12288
-    model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_edgetpu_224_1.0_htp_batched_8.dlc"
-      model_checksum: "1e09cab7d0d381ef02cfd5ea5b85da92"
-    }
-  }
-  delegate_selected: "SNPE_DSP"
-}
-
 benchmark_setting {
   benchmark_id: "image_classification_offline_v2"
   framework: "SNPE"
@@ -144,8 +82,8 @@ benchmark_setting {
     accelerator_desc: "HTP"
     batch_size: 12360
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_v4_htp_batched_4.dlc"
-      model_checksum: "0de3b75022ce5c27d5902a080ec1cea0"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp_batched_4.dlc"
+      model_checksum: "7863deea588936fe6e09565ed47dde95"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -155,8 +93,8 @@ benchmark_setting {
   benchmark_id: "object_detection"
   framework: "SNPE"
   custom_setting {
-    id: "snpe_output_layers"
-    value: "Postprocessor/BatchMultiClassNonMaxSuppression"
+    id: "snpe_output_tensors"
+    value: "Postprocessor/BatchMultiClassNonMaxSuppression_classes,Postprocessor/BatchMultiClassNonMaxSuppression_num_detections,detection_scores:0,detection_boxes:0"
   }
   custom_setting {
     id: "bg_load"
@@ -172,8 +110,8 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "HTP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/ssd_mobiledet_qat_htp.dlc"
-      model_checksum: "c333fc135a8c474679d716fe391a9e2a"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/ssd_mobiledet_qat_htp.dlc"
+      model_checksum: "49c6afbfefffb78269fe73a6ee1b4a85"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -183,8 +121,8 @@ benchmark_setting {
   benchmark_id: "natural_language_processing"
   framework: "SNPE"
   custom_setting {
-    id: "snpe_output_layers"
-    value: "transpose"
+    id: "snpe_output_tensors"
+    value: "transpose:0"
   }
   custom_setting {
     id: "input_buffer_type"
@@ -204,8 +142,8 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "DSP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilebert_quantized_htp.dlc"
-      model_checksum: "7a641e4df84fc06a1237b7fe1b1c5b08"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilebert_quantized_htp.dlc"
+      model_checksum: "96d947175f04950898a372890907dda1"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -236,8 +174,8 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "DSP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobile_mosaic_htp.dlc"
-      model_checksum: "e870526444c1e48df4f0505e530ecfdf"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobile_mosaic_htp.dlc"
+      model_checksum: "3c0dfbacda053773d6afb34503d9991a"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -264,9 +202,9 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "DSP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/snusr_htp.dlc"
-      model_checksum: "84ef0d9c2e7b710381cea962a22a0b41"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm//snusr_htp.dlc"
+      model_checksum: "668da9816073d67972704e237137a50f"
     }
   }
   delegate_selected: "SNPE_DSP"
-}
\ No newline at end of file
+}
diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g1.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g1.pbtxt
index 9cd50a4be..824bf9dbf 100644
--- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g1.pbtxt
+++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g1.pbtxt
@@ -19,35 +19,6 @@ common_setting {
   }
 }
 
-benchmark_setting {
-  benchmark_id: "image_classification"
-  framework: "SNPE"
-  custom_setting {
-    id: "bg_load"
-    value: "false"
-  }
-  custom_setting {
-    id: "use_ion_buffer"
-    value: "false"
-  }
-  custom_setting {
-    id: "output_buffer_type"
-    value: "uint_8"
-  }
-  delegate_choice: {
-    priority: 1
-    delegate_name: "SNPE_DSP"
-    accelerator_name: "snpe_dsp"
-    accelerator_desc: "HTP"
-    model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_edgetpu_224_1.0_htp.dlc"
-      model_checksum: "cdf1fe622b309f692e05781661248a2b"
-    }
-  }
-  single_stream_expected_latency_ns: 800000
-  delegate_selected: "SNPE_DSP"
-}
-
 benchmark_setting {
   benchmark_id: "image_classification_v2"
   framework: "SNPE"
@@ -69,47 +40,14 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "HTP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_v4_htp.dlc"
-      model_checksum: "dbab3e231e5f83aabc80d5b69e6dad32"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp.dlc"
+      model_checksum: "56e5039260e20e5c2a0b54cc0fac8098"
     }
   }
   single_stream_expected_latency_ns: 80000
   delegate_selected: "SNPE_DSP"
 }
 
-benchmark_setting {
-  benchmark_id: "image_classification_offline"
-  framework: "SNPE"
-  custom_setting {
-    id: "scenario"
-    value: "Offline"
-  }
-  custom_setting {
-    id: "bg_load"
-    value: "false"
-  }
-  custom_setting {
-    id: "use_ion_buffer"
-    value: "false"
-  }
-  custom_setting {
-    id: "output_buffer_type"
-    value: "uint_8"
-  }
-  delegate_choice: {
-    priority: 1
-    delegate_name: "SNPE_DSP"
-    accelerator_name: "psnpe_dsp"
-    accelerator_desc: "HTP"
-    batch_size: 12288
-    model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_edgetpu_224_1.0_htp_batched_3.dlc"
-      model_checksum: "550f807bc7ef40f77018a64a47507d09"
-    }
-  }
-  delegate_selected: "SNPE_DSP"
-}
-
 benchmark_setting {
   benchmark_id: "image_classification_offline_v2"
   framework: "SNPE"
@@ -140,8 +78,8 @@ benchmark_setting {
     accelerator_desc: "HTP"
     batch_size: 12360
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_v4_htp_batched_4.dlc"
-      model_checksum: "0de3b75022ce5c27d5902a080ec1cea0"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp_batched_4.dlc"
+      model_checksum: "7863deea588936fe6e09565ed47dde95"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -151,8 +89,8 @@ benchmark_setting {
   benchmark_id: "object_detection"
   framework: "SNPE"
   custom_setting {
-    id: "snpe_output_layers"
-    value: "Postprocessor/BatchMultiClassNonMaxSuppression"
+    id: "snpe_output_tensors"
+    value: "Postprocessor/BatchMultiClassNonMaxSuppression_classes,Postprocessor/BatchMultiClassNonMaxSuppression_num_detections,detection_scores:0,detection_boxes:0"
   }
   custom_setting {
     id: "bg_load"
@@ -168,8 +106,8 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "HTP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/ssd_mobiledet_qat_htp.dlc"
-      model_checksum: "c333fc135a8c474679d716fe391a9e2a"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/ssd_mobiledet_qat_htp.dlc"
+      model_checksum: "49c6afbfefffb78269fe73a6ee1b4a85"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -179,8 +117,8 @@ benchmark_setting {
   benchmark_id: "natural_language_processing"
   framework: "SNPE"
   custom_setting {
-    id: "snpe_output_layers"
-    value: "transpose"
+    id: "snpe_output_tensors"
+    value: "transpose:0"
   }
   custom_setting {
     id: "bg_load"
@@ -204,8 +142,8 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "DSP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilebert_quantized_htp.dlc"
-      model_checksum: "7a641e4df84fc06a1237b7fe1b1c5b08"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilebert_quantized_htp.dlc"
+      model_checksum: "96d947175f04950898a372890907dda1"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -232,8 +170,8 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "DSP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobile_mosaic_htp.dlc"
-      model_checksum: "e870526444c1e48df4f0505e530ecfdf"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobile_mosaic_htp.dlc"
+      model_checksum: "3c0dfbacda053773d6afb34503d9991a"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -264,8 +202,8 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "DSP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/snusr_htp.dlc"
-      model_checksum: "84ef0d9c2e7b710381cea962a22a0b41"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm//snusr_htp.dlc"
+      model_checksum: "668da9816073d67972704e237137a50f"
     }
   }
   delegate_selected: "SNPE_DSP"
diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g2.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g2.pbtxt
index 50d06ff74..5c19b71e5 100644
--- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g2.pbtxt
+++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g2.pbtxt
@@ -19,31 +19,6 @@ common_setting {
   }
 }
 
-benchmark_setting {
-  benchmark_id: "image_classification"
-  framework: "SNPE"
-  custom_setting {
-    id: "bg_load"
-    value: "false"
-  }
-  custom_setting {
-    id: "output_buffer_type"
-    value: "uint_8"
-  }
-  delegate_choice: {
-    priority: 1
-    delegate_name: "SNPE_DSP"
-    accelerator_name: "snpe_dsp"
-    accelerator_desc: "HTP"
-    model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_edgetpu_224_1.0_htp_O2.dlc"
-      model_checksum: "25977982896e607bceb55340c8d76223"
-    }
-  }
-  single_stream_expected_latency_ns: 300000
-  delegate_selected: "SNPE_DSP"
-}
-
 benchmark_setting {
   benchmark_id: "image_classification_v2"
   framework: "SNPE"
@@ -65,44 +40,11 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "HTP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_v4_htp.dlc"
-      model_checksum: "dbab3e231e5f83aabc80d5b69e6dad32"
-    }
-  }
-  single_stream_expected_latency_ns: 50000
-  delegate_selected: "SNPE_DSP"
-}
-
-benchmark_setting {
-  benchmark_id: "image_classification_offline"
-  framework: "SNPE"
-  custom_setting {
-    id: "scenario"
-    value: "Offline"
-  }
-  custom_setting {
-    id: "bg_load"
-    value: "false"
-  }
-  custom_setting {
-    id: "output_buffer_type"
-    value: "uint_8"
-  }
-  custom_setting {
-    id: "use_ion_buffer"
-    value: "false"
-  }
-  delegate_choice: {
-    priority: 1
-    delegate_name: "SNPE_DSP"
-    accelerator_name: "psnpe_dsp"
-    accelerator_desc: "HTP"
-    batch_size: 12288
-    model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_edgetpu_224_1.0_htp_batched_4_O2.dlc"
-      model_checksum: "b836e404b3aa5ff7914fac8376643fe4"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp.dlc"
+      model_checksum: "56e5039260e20e5c2a0b54cc0fac8098"
     }
   }
+  single_stream_expected_latency_ns: 500000
   delegate_selected: "SNPE_DSP"
 }
 
@@ -134,10 +76,10 @@ benchmark_setting {
     delegate_name: "SNPE_DSP"
     accelerator_name: "psnpe_dsp"
     accelerator_desc: "HTP"
-    batch_size: 12288
+    batch_size: 12360
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_v4_htp_batched_4.dlc"
-      model_checksum: "0de3b75022ce5c27d5902a080ec1cea0"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp_batched_4.dlc"
+      model_checksum: "7863deea588936fe6e09565ed47dde95"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -147,8 +89,8 @@ benchmark_setting {
   benchmark_id: "object_detection"
   framework: "SNPE"
   custom_setting {
-    id: "snpe_output_layers"
-    value: "Postprocessor/BatchMultiClassNonMaxSuppression"
+    id: "snpe_output_tensors"
+    value: "Postprocessor/BatchMultiClassNonMaxSuppression_classes,Postprocessor/BatchMultiClassNonMaxSuppression_num_detections,detection_scores:0,detection_boxes:0"
   }
   custom_setting {
     id: "bg_load"
@@ -164,8 +106,8 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "HTP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/ssd_mobiledet_qat_htp_O2.dlc"
-      model_checksum: "5802abfad10a7fc5c5849b13943d6d44"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/ssd_mobiledet_qat_htp.dlc"
+      model_checksum: "49c6afbfefffb78269fe73a6ee1b4a85"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -175,8 +117,8 @@ benchmark_setting {
   benchmark_id: "natural_language_processing"
   framework: "SNPE"
   custom_setting {
-    id: "snpe_output_layers"
-    value: "transpose"
+    id: "snpe_output_tensors"
+    value: "transpose:0"
   }
   custom_setting {
     id: "bg_load"
@@ -192,7 +134,7 @@ benchmark_setting {
   }
   custom_setting {
     id: "perf_profile"
-    value: "sustained_high_performance"
+    value: "high_performance"
   }
   delegate_choice: {
     priority: 1
@@ -200,8 +142,8 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "DSP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilebert_quantized_htp_O2.dlc"
-      model_checksum: "9d0dadbb6014289916a6078c4c991dd5"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilebert_quantized_htp.dlc"
+      model_checksum: "96d947175f04950898a372890907dda1"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -226,10 +168,10 @@ benchmark_setting {
     priority: 1
     delegate_name: "SNPE_DSP"
     accelerator_name: "snpe_dsp"
-    accelerator_desc: "HTP"
+    accelerator_desc: "DSP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobile_mosaic_htp_O2.dlc"
-      model_checksum: "99b39c2b9ea84ff13e00eaa82f00136b"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobile_mosaic_htp.dlc"
+      model_checksum: "3c0dfbacda053773d6afb34503d9991a"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -260,8 +202,8 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "DSP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/snusr_htp_O2.dlc"
-      model_checksum: "18fa274659e14c57b4f6bedb6871c83f"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm//snusr_htp.dlc"
+      model_checksum: "668da9816073d67972704e237137a50f"
     }
   }
   delegate_selected: "SNPE_DSP"
diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g3.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g3.pbtxt
index de4d87460..1794863d9 100644
--- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g3.pbtxt
+++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8g3.pbtxt
@@ -19,39 +19,6 @@ common_setting {
   }
 }
 
-benchmark_setting {
-  benchmark_id: "image_classification"
-  framework: "SNPE"
-  custom_setting {
-    id: "bg_load"
-    value: "false"
-  }
-  custom_setting {
-    id: "perf_profile"
-    value: "burst"
-  }
-  custom_setting {
-    id: "output_buffer_type"
-    value: "uint_8"
-  }
-  custom_setting {
-    id: "use_ion_buffer"
-    value: "false"
-  }
-  delegate_choice: {
-    priority: 1
-    delegate_name: "SNPE_DSP"
-    accelerator_name: "snpe_dsp"
-    accelerator_desc: "HTP"
-    model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_edgetpu_224_1.0_htp_O2.dlc"
-      model_checksum: "25977982896e607bceb55340c8d76223"
-    }
-  }
-  single_stream_expected_latency_ns: 250000
-  delegate_selected: "SNPE_DSP"
-}
-
 benchmark_setting {
   benchmark_id: "image_classification_v2"
   framework: "SNPE"
@@ -73,48 +40,11 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "HTP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_v4_htp.dlc"
-      model_checksum: "dbab3e231e5f83aabc80d5b69e6dad32"
-    }
-  }
-  single_stream_expected_latency_ns: 250000
-  delegate_selected: "SNPE_DSP"
-}
-
-benchmark_setting {
-  benchmark_id: "image_classification_offline"
-  framework: "SNPE"
-  custom_setting {
-    id: "scenario"
-    value: "Offline"
-  }
-  custom_setting {
-    id: "bg_load"
-    value: "false"
-  }
-  custom_setting {
-    id: "perf_profile"
-    value: "burst"
-  }
-  custom_setting {
-    id: "use_ion_buffer"
-    value: "false"
-  }
-  custom_setting {
-    id: "output_buffer_type"
-    value: "uint_8"
-  }
-  delegate_choice: {
-    priority: 1
-    delegate_name: "SNPE_DSP"
-    accelerator_name: "psnpe_dsp"
-    accelerator_desc: "HTP"
-    batch_size: 12360
-    model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_edgetpu_224_1.0_htp_batched_3_O2.dlc"
-      model_checksum: "aca3f4430fe98bbfe5c3a358ae9687e1"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp.dlc"
+      model_checksum: "56e5039260e20e5c2a0b54cc0fac8098"
     }
   }
+  single_stream_expected_latency_ns: 500000
   delegate_selected: "SNPE_DSP"
 }
 
@@ -148,8 +78,8 @@ benchmark_setting {
     accelerator_desc: "HTP"
     batch_size: 12360
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_v4_htp_batched_4.dlc"
-      model_checksum: "0de3b75022ce5c27d5902a080ec1cea0"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp_batched_4.dlc"
+      model_checksum: "7863deea588936fe6e09565ed47dde95"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -159,16 +89,12 @@ benchmark_setting {
   benchmark_id: "object_detection"
   framework: "SNPE"
   custom_setting {
-    id: "snpe_output_layers"
-    value: "Postprocessor/BatchMultiClassNonMaxSuppression"
+    id: "snpe_output_tensors"
+    value: "Postprocessor/BatchMultiClassNonMaxSuppression_classes,Postprocessor/BatchMultiClassNonMaxSuppression_num_detections,detection_scores:0,detection_boxes:0"
   }
   custom_setting {
     id: "bg_load"
-    value: "true"
-  }
-  custom_setting {
-    id: "perf_profile"
-    value: "burst"
+    value: "false"
   }
   custom_setting {
     id: "use_ion_buffer"
@@ -180,20 +106,20 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "HTP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/ssd_mobiledet_qat_htp_O2.dlc"
-      model_checksum: "5802abfad10a7fc5c5849b13943d6d44"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/ssd_mobiledet_qat_htp.dlc"
+      model_checksum: "49c6afbfefffb78269fe73a6ee1b4a85"
     }
   }
-  delegate_selected: "SNPE_DSP"
   single_stream_expected_latency_ns: 500000
+  delegate_selected: "SNPE_DSP"
 }
 
 benchmark_setting {
   benchmark_id: "natural_language_processing"
   framework: "SNPE"
   custom_setting {
-    id: "snpe_output_layers"
-    value: "transpose"
+    id: "snpe_output_tensors"
+    value: "transpose:0"
   }
   custom_setting {
     id: "bg_load"
@@ -209,7 +135,7 @@ benchmark_setting {
   }
   custom_setting {
     id: "perf_profile"
-    value: "sustained_high_performance"
+    value: "burst"
   }
   delegate_choice: {
     priority: 1
@@ -217,8 +143,8 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "DSP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilebert_quantized_htp_O2.dlc"
-      model_checksum: "9d0dadbb6014289916a6078c4c991dd5"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilebert_quantized_htp_O2.dlc"
+      model_checksum: "f8631dbd69819438d6b317c204fa80d7"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -247,10 +173,10 @@ benchmark_setting {
     priority: 1
     delegate_name: "SNPE_DSP"
     accelerator_name: "snpe_dsp"
-    accelerator_desc: "HTP"
+    accelerator_desc: "DSP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobile_mosaic_htp.dlc"
-      model_checksum: "e870526444c1e48df4f0505e530ecfdf"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobile_mosaic_htp.dlc"
+      model_checksum: "3c0dfbacda053773d6afb34503d9991a"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -277,7 +203,7 @@ benchmark_setting {
   }
   custom_setting {
     id: "use_ion_buffer"
-    value: "false"
+    value: "true"
   }
   delegate_choice: {
     priority: 1
@@ -285,8 +211,8 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "DSP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/snusr_htp_O2.dlc"
-      model_checksum: "18fa274659e14c57b4f6bedb6871c83f"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/snusr_htp_O2.dlc"
+      model_checksum: "76b33f02ebfa6294a0e973aaf91116fa"
     }
   }
   delegate_selected: "SNPE_DSP"
diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8pg1.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8pg1.pbtxt
index 45368a657..58609c39d 100644
--- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8pg1.pbtxt
+++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sd8pg1.pbtxt
@@ -19,31 +19,6 @@ common_setting {
   }
 }
 
-benchmark_setting {
-  benchmark_id: "image_classification"
-  framework: "SNPE"
-  custom_setting {
-    id: "bg_load"
-    value: "false"
-  }
-  custom_setting {
-    id: "output_buffer_type"
-    value: "uint_8"
-  }
-  delegate_choice: {
-    priority: 1
-    delegate_name: "SNPE_DSP"
-    accelerator_name: "snpe_dsp"
-    accelerator_desc: "HTP"
-    model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_edgetpu_224_1.0_htp.dlc"
-      model_checksum: "cdf1fe622b309f692e05781661248a2b"
-    }
-  }
-  single_stream_expected_latency_ns: 600000
-  delegate_selected: "SNPE_DSP"
-}
-
 benchmark_setting {
   benchmark_id: "image_classification_v2"
   framework: "SNPE"
@@ -65,47 +40,14 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "HTP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_v4_htp.dlc"
-      model_checksum: "dbab3e231e5f83aabc80d5b69e6dad32"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp.dlc"
+      model_checksum: "56e5039260e20e5c2a0b54cc0fac8098"
     }
   }
   single_stream_expected_latency_ns: 600000
   delegate_selected: "SNPE_DSP"
 }
 
-benchmark_setting {
-  benchmark_id: "image_classification_offline"
-  framework: "SNPE"
-  custom_setting {
-    id: "scenario"
-    value: "Offline"
-  }
-  custom_setting {
-    id: "bg_load"
-    value: "false"
-  }
-  custom_setting {
-    id: "output_buffer_type"
-    value: "uint_8"
-  }
-  custom_setting {
-    id: "use_ion_buffer"
-    value: "false"
-  }
-  delegate_choice: {
-    priority: 1
-    delegate_name: "SNPE_DSP"
-    accelerator_name: "psnpe_dsp"
-    accelerator_desc: "HTP"
-    batch_size: 12288
-    model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_edgetpu_224_1.0_htp_batched_4.dlc"
-      model_checksum: "6523060565b8d3f326f3f323c531fc1c"
-    }
-  }
-  delegate_selected: "SNPE_DSP"
-}
-
 benchmark_setting {
   benchmark_id: "image_classification_offline_v2"
   framework: "SNPE"
@@ -134,10 +76,10 @@ benchmark_setting {
     delegate_name: "SNPE_DSP"
     accelerator_name: "psnpe_dsp"
     accelerator_desc: "HTP"
-    batch_size: 12288
+    batch_size: 12360
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_v4_htp_batched_4.dlc"
-      model_checksum: "0de3b75022ce5c27d5902a080ec1cea0"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp_batched_4.dlc"
+      model_checksum: "7863deea588936fe6e09565ed47dde95"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -147,8 +89,8 @@ benchmark_setting {
   benchmark_id: "object_detection"
   framework: "SNPE"
   custom_setting {
-    id: "snpe_output_layers"
-    value: "Postprocessor/BatchMultiClassNonMaxSuppression"
+    id: "snpe_output_tensors"
+    value: "Postprocessor/BatchMultiClassNonMaxSuppression_classes,Postprocessor/BatchMultiClassNonMaxSuppression_num_detections,detection_scores:0,detection_boxes:0"
   }
   custom_setting {
     id: "bg_load"
@@ -164,8 +106,8 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "HTP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/ssd_mobiledet_qat_htp.dlc"
-      model_checksum: "c333fc135a8c474679d716fe391a9e2a"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/ssd_mobiledet_qat_htp.dlc"
+      model_checksum: "49c6afbfefffb78269fe73a6ee1b4a85"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -175,8 +117,8 @@ benchmark_setting {
   benchmark_id: "natural_language_processing"
   framework: "SNPE"
   custom_setting {
-    id: "snpe_output_layers"
-    value: "transpose"
+    id: "snpe_output_tensors"
+    value: "transpose:0"
   }
   custom_setting {
     id: "bg_load"
@@ -200,8 +142,8 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "DSP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilebert_quantized_htp.dlc"
-      model_checksum: "7a641e4df84fc06a1237b7fe1b1c5b08"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilebert_quantized_htp.dlc"
+      model_checksum: "96d947175f04950898a372890907dda1"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -222,14 +164,18 @@ benchmark_setting {
     id: "output_buffer_type"
     value: "int_32"
   }
+  custom_setting {
+    id: "perf_profile"
+    value: "burst"
+  }
   delegate_choice: {
     priority: 1
     delegate_name: "SNPE_DSP"
     accelerator_name: "snpe_dsp"
     accelerator_desc: "DSP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobile_mosaic_htp.dlc"
-      model_checksum: "e870526444c1e48df4f0505e530ecfdf"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobile_mosaic_htp.dlc"
+      model_checksum: "3c0dfbacda053773d6afb34503d9991a"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -254,14 +200,18 @@ benchmark_setting {
     id: "perf_profile"
     value: "burst"
   }
+  custom_setting {
+    id: "use_ion_buffer"
+    value: "false"
+  }
   delegate_choice: {
     priority: 1
     delegate_name: "SNPE_DSP"
     accelerator_name: "snpe_dsp"
     accelerator_desc: "DSP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/snusr_htp.dlc"
-      model_checksum: "84ef0d9c2e7b710381cea962a22a0b41"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm//snusr_htp.dlc"
+      model_checksum: "668da9816073d67972704e237137a50f"
     }
   }
   delegate_selected: "SNPE_DSP"
diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sdm778.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sdm778.pbtxt
index f02842797..e280e0158 100644
--- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sdm778.pbtxt
+++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sdm778.pbtxt
@@ -19,30 +19,6 @@ common_setting {
   }
 }
 
-benchmark_setting {
-  benchmark_id: "image_classification"
-  framework: "SNPE"
-  custom_setting {
-    id: "bg_load"
-    value: "false"
-  }
-  custom_setting {
-    id: "output_buffer_type"
-    value: "uint_8"
-  }
-  delegate_choice: {
-    priority: 1
-    delegate_name: "SNPE_DSP"
-    accelerator_name: "snpe_dsp"
-    accelerator_desc: "HTP"
-    model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_edgetpu_224_1.0_htp.dlc"
-      model_checksum: "cdf1fe622b309f692e05781661248a2b"
-    }
-  }
-  delegate_selected: "SNPE_DSP"
-}
-
 benchmark_setting {
   benchmark_id: "image_classification_v2"
   framework: "SNPE"
@@ -64,43 +40,11 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "HTP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_v4_htp.dlc"
-      model_checksum: "dbab3e231e5f83aabc80d5b69e6dad32"
-    }
-  }
-  delegate_selected: "SNPE_DSP"
-}
-
-benchmark_setting {
-  benchmark_id: "image_classification_offline"
-  framework: "SNPE"
-  custom_setting {
-    id: "scenario"
-    value: "Offline"
-  }
-  custom_setting {
-    id: "bg_load"
-    value: "false"
-  }
-  custom_setting {
-    id: "use_ion_buffer"
-    value: "false"
-  }
-  custom_setting {
-    id: "output_buffer_type"
-    value: "uint_8"
-  }
-  delegate_choice: {
-    priority: 1
-    delegate_name: "SNPE_DSP"
-    accelerator_name: "psnpe_dsp"
-    accelerator_desc: "HTP"
-    batch_size: 12288
-    model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_edgetpu_224_1.0_htp_batched_3.dlc"
-      model_checksum: "550f807bc7ef40f77018a64a47507d09"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp.dlc"
+      model_checksum: "56e5039260e20e5c2a0b54cc0fac8098"
     }
   }
+  single_stream_expected_latency_ns: 500000
   delegate_selected: "SNPE_DSP"
 }
 
@@ -132,10 +76,10 @@ benchmark_setting {
     delegate_name: "SNPE_DSP"
     accelerator_name: "psnpe_dsp"
     accelerator_desc: "HTP"
-    batch_size: 12288
+    batch_size: 12360
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_v4_htp_batched_4.dlc"
-      model_checksum: "0de3b75022ce5c27d5902a080ec1cea0"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp_batched_4.dlc"
+      model_checksum: "7863deea588936fe6e09565ed47dde95"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -145,8 +89,8 @@ benchmark_setting {
   benchmark_id: "object_detection"
   framework: "SNPE"
   custom_setting {
-    id: "snpe_output_layers"
-    value: "Postprocessor/BatchMultiClassNonMaxSuppression"
+    id: "snpe_output_tensors"
+    value: "Postprocessor/BatchMultiClassNonMaxSuppression_classes,Postprocessor/BatchMultiClassNonMaxSuppression_num_detections,detection_scores:0,detection_boxes:0"
   }
   custom_setting {
     id: "bg_load"
@@ -162,8 +106,8 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "HTP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/ssd_mobiledet_qat_htp.dlc"
-      model_checksum: "c333fc135a8c474679d716fe391a9e2a"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/ssd_mobiledet_qat_htp.dlc"
+      model_checksum: "49c6afbfefffb78269fe73a6ee1b4a85"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -173,8 +117,8 @@ benchmark_setting {
   benchmark_id: "natural_language_processing"
   framework: "SNPE"
   custom_setting {
-    id: "snpe_output_layers"
-    value: "transpose"
+    id: "snpe_output_tensors"
+    value: "transpose:0"
   }
   custom_setting {
     id: "bg_load"
@@ -190,7 +134,7 @@ benchmark_setting {
   }
   custom_setting {
     id: "perf_profile"
-    value: "high_performance"
+    value: "sustained_high_performance"
   }
   delegate_choice: {
     priority: 1
@@ -198,8 +142,8 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "DSP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilebert_quantized_htp.dlc"
-      model_checksum: "7a641e4df84fc06a1237b7fe1b1c5b08"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilebert_quantized_htp.dlc"
+      model_checksum: "96d947175f04950898a372890907dda1"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -220,14 +164,18 @@ benchmark_setting {
     id: "output_buffer_type"
     value: "int_32"
   }
+  custom_setting {
+    id: "perf_profile"
+    value: "burst"
+  }
   delegate_choice: {
     priority: 1
     delegate_name: "SNPE_DSP"
     accelerator_name: "snpe_dsp"
     accelerator_desc: "DSP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobile_mosaic_htp.dlc"
-      model_checksum: "e870526444c1e48df4f0505e530ecfdf"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobile_mosaic_htp.dlc"
+      model_checksum: "3c0dfbacda053773d6afb34503d9991a"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -252,14 +200,18 @@ benchmark_setting {
     id: "perf_profile"
     value: "burst"
   }
+  custom_setting {
+    id: "use_ion_buffer"
+    value: "false"
+  }
   delegate_choice: {
     priority: 1
     delegate_name: "SNPE_DSP"
     accelerator_name: "snpe_dsp"
     accelerator_desc: "DSP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/snusr_htp.dlc"
-      model_checksum: "84ef0d9c2e7b710381cea962a22a0b41"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm//snusr_htp.dlc"
+      model_checksum: "668da9816073d67972704e237137a50f"
     }
   }
   delegate_selected: "SNPE_DSP"
diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sdm888.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sdm888.pbtxt
index 7c680eb0c..5aa7db3cf 100644
--- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sdm888.pbtxt
+++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sdm888.pbtxt
@@ -19,30 +19,6 @@ common_setting {
   }
 }
 
-benchmark_setting {
-  benchmark_id: "image_classification"
-  framework: "SNPE"
-  custom_setting {
-    id: "bg_load"
-    value: "false"
-  }
-  custom_setting {
-    id: "output_buffer_type"
-    value: "uint_8"
-  }
-  delegate_choice: {
-    priority: 1
-    delegate_name: "SNPE_DSP"
-    accelerator_name: "snpe_dsp"
-    accelerator_desc: "HTP"
-    model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_edgetpu_224_1.0_htp.dlc"
-      model_checksum: "cdf1fe622b309f692e05781661248a2b"
-    }
-  }
-  delegate_selected: "SNPE_DSP"
-}
-
 benchmark_setting {
   benchmark_id: "image_classification_v2"
   framework: "SNPE"
@@ -64,39 +40,11 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "HTP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_v4_htp.dlc"
-      model_checksum: "dbab3e231e5f83aabc80d5b69e6dad32"
-    }
-  }
-  delegate_selected: "SNPE_DSP"
-}
-
-benchmark_setting {
-  benchmark_id: "image_classification_offline"
-  framework: "SNPE"
-  custom_setting {
-    id: "scenario"
-    value: "Offline"
-  }
-  custom_setting {
-    id: "use_ion_buffer"
-    value: "false"
-  }
-  custom_setting {
-    id: "output_buffer_type"
-    value: "uint_8"
-  }
-  delegate_choice: {
-    priority: 1
-    delegate_name: "SNPE_DSP"
-    accelerator_name: "psnpe_dsp"
-    accelerator_desc: "HTP"
-    batch_size: 12288
-    model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_edgetpu_224_1.0_htp_batched_3.dlc"
-      model_checksum: "550f807bc7ef40f77018a64a47507d09"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp.dlc"
+      model_checksum: "56e5039260e20e5c2a0b54cc0fac8098"
     }
   }
+  single_stream_expected_latency_ns: 500000
   delegate_selected: "SNPE_DSP"
 }
 
@@ -128,10 +76,10 @@ benchmark_setting {
     delegate_name: "SNPE_DSP"
     accelerator_name: "psnpe_dsp"
     accelerator_desc: "HTP"
-    batch_size: 12288
+    batch_size: 12360
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_v4_htp_batched_4.dlc"
-      model_checksum: "0de3b75022ce5c27d5902a080ec1cea0"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp_batched_4.dlc"
+      model_checksum: "7863deea588936fe6e09565ed47dde95"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -141,8 +89,8 @@ benchmark_setting {
   benchmark_id: "object_detection"
   framework: "SNPE"
   custom_setting {
-    id: "snpe_output_layers"
-    value: "Postprocessor/BatchMultiClassNonMaxSuppression"
+    id: "snpe_output_tensors"
+    value: "Postprocessor/BatchMultiClassNonMaxSuppression_classes,Postprocessor/BatchMultiClassNonMaxSuppression_num_detections,detection_scores:0,detection_boxes:0"
   }
   custom_setting {
     id: "bg_load"
@@ -158,8 +106,8 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "HTP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/ssd_mobiledet_qat_htp.dlc"
-      model_checksum: "c333fc135a8c474679d716fe391a9e2a"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/ssd_mobiledet_qat_htp.dlc"
+      model_checksum: "49c6afbfefffb78269fe73a6ee1b4a85"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -169,8 +117,8 @@ benchmark_setting {
   benchmark_id: "natural_language_processing"
   framework: "SNPE"
   custom_setting {
-    id: "snpe_output_layers"
-    value: "transpose"
+    id: "snpe_output_tensors"
+    value: "transpose:0"
   }
   custom_setting {
     id: "bg_load"
@@ -194,8 +142,8 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "DSP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilebert_quantized_htp.dlc"
-      model_checksum: "7a641e4df84fc06a1237b7fe1b1c5b08"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilebert_quantized_htp.dlc"
+      model_checksum: "96d947175f04950898a372890907dda1"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -216,14 +164,18 @@ benchmark_setting {
     id: "output_buffer_type"
     value: "int_32"
   }
+  custom_setting {
+    id: "perf_profile"
+    value: "burst"
+  }
   delegate_choice: {
     priority: 1
     delegate_name: "SNPE_DSP"
     accelerator_name: "snpe_dsp"
     accelerator_desc: "DSP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobile_mosaic_htp.dlc"
-      model_checksum: "e870526444c1e48df4f0505e530ecfdf"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobile_mosaic_htp.dlc"
+      model_checksum: "3c0dfbacda053773d6afb34503d9991a"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -248,14 +200,18 @@ benchmark_setting {
     id: "perf_profile"
     value: "burst"
   }
+  custom_setting {
+    id: "use_ion_buffer"
+    value: "false"
+  }
   delegate_choice: {
     priority: 1
     delegate_name: "SNPE_DSP"
     accelerator_name: "snpe_dsp"
     accelerator_desc: "DSP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/snusr_htp.dlc"
-      model_checksum: "84ef0d9c2e7b710381cea962a22a0b41"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm//snusr_htp.dlc"
+      model_checksum: "668da9816073d67972704e237137a50f"
     }
   }
   delegate_selected: "SNPE_DSP"
diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sm4450.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sm4450.pbtxt
index be7e2931b..61dbf92cd 100644
--- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sm4450.pbtxt
+++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sm4450.pbtxt
@@ -19,34 +19,6 @@ common_setting {
   }
 }
 
-benchmark_setting {
-  benchmark_id: "image_classification"
-  framework: "SNPE"
-  custom_setting {
-    id: "bg_load"
-    value: "false"
-  }
-  custom_setting {
-    id: "use_ion_buffer"
-    value: "false"
-  }
-  delegate_choice: {
-    priority: 1
-    custom_setting {
-      id: "cpu_int8"
-      value: "true"
-    }
-    delegate_name: "SNPE_CPU"
-    accelerator_name: "snpe_cpu"
-    accelerator_desc: "CPU"
-    model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_edgetpu_224_1.0_htp.dlc"
-      model_checksum: "cdf1fe622b309f692e05781661248a2b"
-    }
-  }
-  delegate_selected: "SNPE_CPU"
-}
-
 benchmark_setting {
   benchmark_id: "image_classification_v2"
   framework: "SNPE"
@@ -66,43 +38,10 @@ benchmark_setting {
     priority: 1
     delegate_name: "SNPE_CPU"
     accelerator_name: "snpe_cpu"
-    accelerator_desc: "HTP"
-    model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_v4_htp.dlc"
-      model_checksum: "dbab3e231e5f83aabc80d5b69e6dad32"
-    }
-  }
-  delegate_selected: "SNPE_CPU"
-}
-
-benchmark_setting {
-  benchmark_id: "image_classification_offline"
-  framework: "SNPE"
-  custom_setting {
-    id: "scenario"
-    value: "Offline"
-  }
-  custom_setting {
-    id: "bg_load"
-    value: "false"
-  }
-  custom_setting {
-    id: "use_ion_buffer"
-    value: "false"
-  }
-  delegate_choice: {
-    priority: 1
-    custom_setting {
-      id: "cpu_int8"
-      value: "true"
-    }
-    delegate_name: "SNPE_CPU"
-    accelerator_name: "psnpe_cpu"
     accelerator_desc: "CPU"
-    batch_size: 12288
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_edgetpu_224_1.0_htp_batched_3.dlc"
-      model_checksum: "550f807bc7ef40f77018a64a47507d09"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp.dlc"
+      model_checksum: "56e5039260e20e5c2a0b54cc0fac8098"
     }
   }
   delegate_selected: "SNPE_CPU"
@@ -136,10 +75,10 @@ benchmark_setting {
     delegate_name: "SNPE_CPU"
     accelerator_name: "psnpe_cpu"
     accelerator_desc: "CPU"
-    batch_size: 12288
+    batch_size: 12360
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_v4_htp_batched_4.dlc"
-      model_checksum: "0de3b75022ce5c27d5902a080ec1cea0"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp_batched_4.dlc"
+      model_checksum: "7863deea588936fe6e09565ed47dde95"
     }
   }
   delegate_selected: "SNPE_CPU"
@@ -149,8 +88,8 @@ benchmark_setting {
   benchmark_id: "object_detection"
   framework: "SNPE"
   custom_setting {
-    id: "snpe_output_layers"
-    value: "Postprocessor/BatchMultiClassNonMaxSuppression"
+    id: "snpe_output_tensors"
+    value: "Postprocessor/BatchMultiClassNonMaxSuppression_classes,Postprocessor/BatchMultiClassNonMaxSuppression_num_detections,detection_scores:0,detection_boxes:0"
   }
   custom_setting {
     id: "bg_load"
@@ -170,8 +109,8 @@ benchmark_setting {
     accelerator_name: "snpe_cpu"
     accelerator_desc: "CPU"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/ssd_mobiledet_qat_htp.dlc"
-      model_checksum: "c333fc135a8c474679d716fe391a9e2a"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/ssd_mobiledet_qat_htp.dlc"
+      model_checksum: "49c6afbfefffb78269fe73a6ee1b4a85"
     }
   }
   delegate_selected: "SNPE_CPU"
@@ -181,8 +120,8 @@ benchmark_setting {
   benchmark_id: "natural_language_processing"
   framework: "SNPE"
   custom_setting {
-    id: "snpe_output_layers"
-    value: "transpose"
+    id: "snpe_output_tensors"
+    value: "transpose:0"
   }
   custom_setting {
     id: "bg_load"
@@ -206,8 +145,8 @@ benchmark_setting {
     accelerator_name: "snpe_cpu"
     accelerator_desc: "CPU"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilebert_quantized_htp.dlc"
-      model_checksum: "7a641e4df84fc06a1237b7fe1b1c5b08"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilebert_quantized_htp.dlc"
+      model_checksum: "96d947175f04950898a372890907dda1"
     }
   }
   delegate_selected: "SNPE_CPU"
@@ -242,8 +181,8 @@ benchmark_setting {
       value: "true"
     }
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobile_mosaic_htp.dlc"
-      model_checksum: "e870526444c1e48df4f0505e530ecfdf"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobile_mosaic_htp.dlc"
+      model_checksum: "3c0dfbacda053773d6afb34503d9991a"
     }
   }
   delegate_selected: "SNPE_CPU"
@@ -286,8 +225,8 @@ benchmark_setting {
     accelerator_name: "snpe_cpu"
     accelerator_desc: "CPU"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/snusr_htp.dlc"
-      model_checksum: "84ef0d9c2e7b710381cea962a22a0b41"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/snusr_htp.dlc"
+      model_checksum: "668da9816073d67972704e237137a50f"
     }
   }
   delegate_selected: "SNPE_CPU"
diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sm7550.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sm7550.pbtxt
index e6711820a..57e24295f 100644
--- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sm7550.pbtxt
+++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sm7550.pbtxt
@@ -19,35 +19,6 @@ common_setting {
   }
 }
 
-benchmark_setting {
-  benchmark_id: "image_classification"
-  framework: "SNPE"
-  custom_setting {
-    id: "bg_load"
-    value: "false"
-  }
-  custom_setting {
-    id: "output_buffer_type"
-    value: "uint_8"
-  }
-  custom_setting {
-    id: "perf_profile"
-    value: "burst"
-  }
-  delegate_choice: {
-    priority: 1
-    delegate_name: "SNPE_DSP"
-    accelerator_name: "snpe_dsp"
-    accelerator_desc: "HTP"
-    model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_edgetpu_224_1.0_htp.dlc"
-      model_checksum: "cdf1fe622b309f692e05781661248a2b"
-    }
-  }
-  single_stream_expected_latency_ns: 500000
-  delegate_selected: "SNPE_DSP"
-}
-
 benchmark_setting {
   benchmark_id: "image_classification_v2"
   framework: "SNPE"
@@ -69,51 +40,14 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "HTP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_v4_htp.dlc"
-      model_checksum: "dbab3e231e5f83aabc80d5b69e6dad32"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp.dlc"
+      model_checksum: "56e5039260e20e5c2a0b54cc0fac8098"
     }
   }
   single_stream_expected_latency_ns: 50000
   delegate_selected: "SNPE_DSP"
 }
 
-benchmark_setting {
-  benchmark_id: "image_classification_offline"
-  framework: "SNPE"
-  custom_setting {
-    id: "scenario"
-    value: "Offline"
-  }
-  custom_setting {
-    id: "bg_load"
-    value: "false"
-  }
-  custom_setting {
-    id: "perf_profile"
-    value: "burst"
-  }
-  custom_setting {
-    id: "output_buffer_type"
-    value: "uint_8"
-  }
-  custom_setting {
-    id: "use_ion_buffer"
-    value: "false"
-  }
-  delegate_choice: {
-    priority: 1
-    delegate_name: "SNPE_DSP"
-    accelerator_name: "psnpe_dsp"
-    accelerator_desc: "HTP"
-    batch_size: 12288
-    model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_edgetpu_224_1.0_htp_batched_4.dlc"
-      model_checksum: "6523060565b8d3f326f3f323c531fc1c"
-    }
-  }
-  delegate_selected: "SNPE_DSP"
-}
-
 benchmark_setting {
   benchmark_id: "image_classification_offline_v2"
   framework: "SNPE"
@@ -142,10 +76,10 @@ benchmark_setting {
     delegate_name: "SNPE_DSP"
     accelerator_name: "psnpe_dsp"
     accelerator_desc: "HTP"
-    batch_size: 12288
+    batch_size: 12360
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_v4_htp_batched_4_O2.dlc"
-      model_checksum: "d349e3fb8a74a5037ecc3b2770dbd188"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp_batched_4_O2.dlc"
+      model_checksum: "80ba82f2a628ab712d812d06524d2bd8"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -155,8 +89,8 @@ benchmark_setting {
   benchmark_id: "object_detection"
   framework: "SNPE"
   custom_setting {
-    id: "snpe_output_layers"
-    value: "Postprocessor/BatchMultiClassNonMaxSuppression"
+    id: "snpe_output_tensors"
+    value: "Postprocessor/BatchMultiClassNonMaxSuppression_classes,Postprocessor/BatchMultiClassNonMaxSuppression_num_detections,detection_scores:0,detection_boxes:0"
   }
   custom_setting {
     id: "bg_load"
@@ -176,8 +110,8 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "HTP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/ssd_mobiledet_qat_htp_O2.dlc"
-      model_checksum: "5802abfad10a7fc5c5849b13943d6d44"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/ssd_mobiledet_qat_htp.dlc"
+      model_checksum: "49c6afbfefffb78269fe73a6ee1b4a85"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -187,8 +121,8 @@ benchmark_setting {
   benchmark_id: "natural_language_processing"
   framework: "SNPE"
   custom_setting {
-    id: "snpe_output_layers"
-    value: "transpose"
+    id: "snpe_output_tensors"
+    value: "transpose:0"
   }
   custom_setting {
     id: "bg_load"
@@ -212,8 +146,8 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "DSP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilebert_quantized_htp.dlc"
-      model_checksum: "7a641e4df84fc06a1237b7fe1b1c5b08"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilebert_quantized_htp.dlc"
+      model_checksum: "96d947175f04950898a372890907dda1"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -242,10 +176,10 @@ benchmark_setting {
     priority: 1
     delegate_name: "SNPE_DSP"
     accelerator_name: "snpe_dsp"
-    accelerator_desc: "HTP"
+    accelerator_desc: "DSP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobile_mosaic_htp.dlc"
-      model_checksum: "e870526444c1e48df4f0505e530ecfdf"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobile_mosaic_htp.dlc"
+      model_checksum: "3c0dfbacda053773d6afb34503d9991a"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -270,14 +204,18 @@ benchmark_setting {
     id: "perf_profile"
     value: "burst"
   }
+  custom_setting {
+    id: "use_ion_buffer"
+    value: "false"
+  }
   delegate_choice: {
     priority: 1
     delegate_name: "SNPE_DSP"
     accelerator_name: "snpe_dsp"
     accelerator_desc: "DSP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/snusr_htp_O2.dlc"
-      model_checksum: "18fa274659e14c57b4f6bedb6871c83f"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm//snusr_htp.dlc"
+      model_checksum: "668da9816073d67972704e237137a50f"
     }
   }
   delegate_selected: "SNPE_DSP"
diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sm8635.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sm8635.pbtxt
index 83725a5de..7571b8942 100644
--- a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sm8635.pbtxt
+++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_sm8635.pbtxt
@@ -19,35 +19,6 @@ common_setting {
   }
 }
 
-benchmark_setting {
-  benchmark_id: "image_classification"
-  framework: "SNPE"
-  custom_setting {
-    id: "bg_load"
-    value: "false"
-  }
-  custom_setting {
-    id: "output_buffer_type"
-    value: "uint_8"
-  }
-  custom_setting {
-    id: "perf_profile"
-    value: "burst"
-  }
-  delegate_choice: {
-    priority: 1
-    delegate_name: "SNPE_DSP"
-    accelerator_name: "snpe_dsp"
-    accelerator_desc: "HTP"
-    model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_edgetpu_224_1.0_htp_O2.dlc"
-      model_checksum: "25977982896e607bceb55340c8d76223"
-    }
-  }
-  single_stream_expected_latency_ns: 500000
-  delegate_selected: "SNPE_DSP"
-}
-
 benchmark_setting {
   benchmark_id: "image_classification_v2"
   framework: "SNPE"
@@ -69,51 +40,14 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "HTP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_v4_htp.dlc"
-      model_checksum: "dbab3e231e5f83aabc80d5b69e6dad32"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp.dlc"
+      model_checksum: "56e5039260e20e5c2a0b54cc0fac8098"
     }
   }
   single_stream_expected_latency_ns: 50000
   delegate_selected: "SNPE_DSP"
 }
 
-benchmark_setting {
-  benchmark_id: "image_classification_offline"
-  framework: "SNPE"
-  custom_setting {
-    id: "scenario"
-    value: "Offline"
-  }
-  custom_setting {
-    id: "bg_load"
-    value: "false"
-  }
-  custom_setting {
-    id: "perf_profile"
-    value: "burst"
-  }
-  custom_setting {
-    id: "output_buffer_type"
-    value: "uint_8"
-  }
-  custom_setting {
-    id: "use_ion_buffer"
-    value: "false"
-  }
-  delegate_choice: {
-    priority: 1
-    delegate_name: "SNPE_DSP"
-    accelerator_name: "psnpe_dsp"
-    accelerator_desc: "HTP"
-    batch_size: 12288
-    model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_edgetpu_224_1.0_htp_batched_4.dlc"
-      model_checksum: "6523060565b8d3f326f3f323c531fc1c"
-    }
-  }
-  delegate_selected: "SNPE_DSP"
-}
-
 benchmark_setting {
   benchmark_id: "image_classification_offline_v2"
   framework: "SNPE"
@@ -142,10 +76,10 @@ benchmark_setting {
     delegate_name: "SNPE_DSP"
     accelerator_name: "psnpe_dsp"
     accelerator_desc: "HTP"
-    batch_size: 12288
+    batch_size: 12360
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilenet_v4_htp_batched_4.dlc"
-      model_checksum: "0de3b75022ce5c27d5902a080ec1cea0"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilenet_v4_htp_batched_4.dlc"
+      model_checksum: "7863deea588936fe6e09565ed47dde95"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -155,8 +89,8 @@ benchmark_setting {
   benchmark_id: "object_detection"
   framework: "SNPE"
   custom_setting {
-    id: "snpe_output_layers"
-    value: "Postprocessor/BatchMultiClassNonMaxSuppression"
+    id: "snpe_output_tensors"
+    value: "Postprocessor/BatchMultiClassNonMaxSuppression_classes,Postprocessor/BatchMultiClassNonMaxSuppression_num_detections,detection_scores:0,detection_boxes:0"
   }
   custom_setting {
     id: "bg_load"
@@ -176,8 +110,8 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "HTP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/ssd_mobiledet_qat_htp.dlc"
-      model_checksum: "c333fc135a8c474679d716fe391a9e2a"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/ssd_mobiledet_qat_htp.dlc"
+      model_checksum: "49c6afbfefffb78269fe73a6ee1b4a85"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -187,8 +121,8 @@ benchmark_setting {
   benchmark_id: "natural_language_processing"
   framework: "SNPE"
   custom_setting {
-    id: "snpe_output_layers"
-    value: "transpose"
+    id: "snpe_output_tensors"
+    value: "transpose:0"
   }
   custom_setting {
     id: "bg_load"
@@ -212,8 +146,8 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "DSP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobilebert_quantized_htp.dlc"
-      model_checksum: "7a641e4df84fc06a1237b7fe1b1c5b08"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobilebert_quantized_htp.dlc"
+      model_checksum: "96d947175f04950898a372890907dda1"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -242,10 +176,10 @@ benchmark_setting {
     priority: 1
     delegate_name: "SNPE_DSP"
     accelerator_name: "snpe_dsp"
-    accelerator_desc: "HTP"
+    accelerator_desc: "DSP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/mobile_mosaic_htp.dlc"
-      model_checksum: "e870526444c1e48df4f0505e530ecfdf"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/mobile_mosaic_htp.dlc"
+      model_checksum: "3c0dfbacda053773d6afb34503d9991a"
     }
   }
   delegate_selected: "SNPE_DSP"
@@ -276,8 +210,8 @@ benchmark_setting {
     accelerator_name: "snpe_dsp"
     accelerator_desc: "DSP"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/qualcomm/snusr_htp.dlc"
-      model_checksum: "84ef0d9c2e7b710381cea962a22a0b41"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/snusr_htp.dlc"
+      model_checksum: "668da9816073d67972704e237137a50f"
     }
   }
   delegate_selected: "SNPE_DSP"
diff --git a/mobile_back_qti/cpp/backend_qti/settings/qti_settings_stablediffusion.pbtxt b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_stablediffusion.pbtxt
new file mode 100644
index 000000000..9430e5cd1
--- /dev/null
+++ b/mobile_back_qti/cpp/backend_qti/settings/qti_settings_stablediffusion.pbtxt
@@ -0,0 +1,46 @@
+# proto-file: flutter/cpp/proto/backend_setting.proto
+# proto-message: BackendSetting
+
+benchmark_setting {
+  benchmark_id: "stable_diffusion"
+  framework: "QNN"
+  custom_setting {
+    id: "pipeline"
+    value: "StableDiffusionPipeline"
+  }
+  custom_setting {
+    id: "bg_load"
+    value: "false"
+  }
+  delegate_choice: {
+    priority: 1
+    delegate_name: "QNN_DSP"
+    accelerator_name: "snpe_dsp"
+    accelerator_desc: "DSP"
+    model_file: {
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/betas.bin"
+      model_checksum: "09d2e4306d319caf1b34e6afb5c63c22"
+    }
+    model_file: {
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/lambdas.bin"
+      model_checksum: "c7179725ec31a6e2c7daf008a5e1ff23"
+    }
+    model_file: {
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/sd_precompute_data.tar"
+      model_checksum: "beb7fe2da40042fb585bb8cb95d86b4d"
+    }
+    model_file: {
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/text_encoder.serialized.bin"
+      model_checksum: "6da7b95fa467e99af2b9f80c7afe3734"
+    }
+    model_file: {
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/unet.serialized.bin"
+      model_checksum: "3b504b92cbd788d713ca9cfc5b19d596"
+    }
+    model_file: {
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/qualcomm/vae_decoder.serialized.bin"
+      model_checksum: "c7762e64c2596abe7f16614709cc5482"
+    }
+  }
+  delegate_selected: "QNN_DSP"
+}
\ No newline at end of file
diff --git a/mobile_back_qti/cpp/backend_qti/soc_utility.cc b/mobile_back_qti/cpp/backend_qti/soc_utility.cc
index c03e41336..c99a3dfe0 100644
--- a/mobile_back_qti/cpp/backend_qti/soc_utility.cc
+++ b/mobile_back_qti/cpp/backend_qti/soc_utility.cc
@@ -110,7 +110,8 @@ std::map<uint32_t, SocInfo> socDetails =
                           std::vector<int>({4, 5, 6, 7}), 8, false)},
             {557, SocInfo(2, 0, 0, 0, true, qti_settings_sd8g3, "SD8G3", 1,
                           std::vector<int>({0, 1, 2, 3}),
-                          std::vector<int>({4, 5, 6, 7}), 8, true)},
+                          std::vector<int>({4, 5, 6, 7}), 8, true,
+                          /* stable_diffusion */ true)},
             {614, SocInfo(2, 0, 0, 0, true, qti_settings_sm8635, "SM8635", 1,
                           std::vector<int>({0, 1, 2, 3}),
                           std::vector<int>({4, 5, 6, 7}), 8, true)},
@@ -314,29 +315,22 @@ void Socs::soc_info_init() {
 #endif
 
   LOG(INFO) << "Soc ID: " << soc_id;
-  if (soc_id != UNSUPPORTED_SOC_ID) {
-    if (socDetails.find(soc_id) == socDetails.end()) {
-      soc_id = UNSUPPORTED_SOC_ID;
-    }
 
-    m_soc_info = socDetails.find(soc_id)->second;
+  m_soc_info = socDetails.find(soc_id)->second;
 
-    if (external_config) {
-      LOG(INFO) << "Config settings derived externally from "
-                   "//data/local/tmp/external/qti_settings.pbtxt";
-      m_soc_info.m_settings = get_external_config_string();
-    }
-    if (soc_id == UNSUPPORTED_SOC_ID) {
-      if (QTIBackendHelper::IsRuntimeAvailable(SNPE_DSP)) {
-        m_soc_info.m_settings = qti_settings_default_dsp;
-      } else if (QTIBackendHelper::IsRuntimeAvailable(SNPE_GPU)) {
-        m_soc_info.m_settings = qti_settings_default_gpu;
-      } else {
-        m_soc_info.m_settings = qti_settings_default_cpu;
-      }
+  if (external_config) {
+    LOG(INFO) << "Config settings derived externally from "
+                 "//data/local/tmp/external/qti_settings.pbtxt";
+    m_soc_info.m_settings = get_external_config_string();
+  }
+  if (soc_id == UNSUPPORTED_SOC_ID) {
+    if (QTIBackendHelper::IsRuntimeAvailable(SNPE_DSP)) {
+      m_soc_info.m_settings = qti_settings_default_dsp;
+    } else if (QTIBackendHelper::IsRuntimeAvailable(SNPE_GPU)) {
+      m_soc_info.m_settings = qti_settings_default_gpu;
+    } else {
+      m_soc_info.m_settings = qti_settings_default_cpu;
     }
-  } else {
-    m_soc_info = unsupportedSoc;
   }
 }
 
@@ -366,7 +360,6 @@ int Socs::soc_num_inits() {
 }
 
 bool Socs::isSnapDragon(const char *manufacturer) {
-  soc_info_init();
 #ifdef __ANDROID__
   bool is_qcom = false;
   if (strncmp("QUALCOMM", manufacturer, 7) == 0) {
@@ -429,20 +422,10 @@ bool Socs::soc_settings(const char **settings,
                         const char **not_allowed_message) {
   soc_info_init();
 
-  if (m_soc_info.m_soc_name == UNSUPPORTED_SOC_STR) {
-    // it's a QTI SOC, but can't access soc_id
-    *not_allowed_message = "Unsupported app";
-    *settings = empty_settings.c_str();
-    return true;
-  }
-
   // Check if this SoC is supported
   *not_allowed_message = nullptr;
   *settings = m_soc_info.m_settings.c_str();
-  if (m_soc_info.m_soc_name == DEFAULT_SOC_STR) {
-    // it's a QTI SOC, but the chipset is not yet supported
-    *not_allowed_message = "Unsupported QTI SoC";
-  }
+
   return true;
 }
 
diff --git a/mobile_back_qti/cpp/backend_qti/soc_utility.h b/mobile_back_qti/cpp/backend_qti/soc_utility.h
index cd6cf3a15..4980ccd89 100644
--- a/mobile_back_qti/cpp/backend_qti/soc_utility.h
+++ b/mobile_back_qti/cpp/backend_qti/soc_utility.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2020-2022 Qualcomm Innovation Center, Inc. All rights reserved.
+/* Copyright (c) 2020-2024 Qualcomm Innovation Center, Inc. All rights reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
@@ -43,12 +43,13 @@ class SocInfo {
         m_soc_name(soc_name),
         m_num_inits(0),
         m_max_cores(0),
-        m_needs_rpcmem(false) {}
+        m_needs_rpcmem(false),
+        m_needs_stablediffusion(false) {}
 
   SocInfo(int num_dsp, int num_gpu, int num_cpu, int num_gpu_fp16,
           bool useDspFeatures, const std::string settings, std::string soc_name,
           int num_inits, std::vector<int> hlc, std::vector<int> llc,
-          int max_cores, bool needs_rpcmem)
+          int max_cores, bool needs_rpcmem, bool needs_stablediffusion = false)
       : m_num_dsp(num_dsp),
         m_num_gpu(num_gpu),
         m_num_cpu(num_cpu),
@@ -60,10 +61,16 @@ class SocInfo {
         m_high_latency_cores(hlc),
         m_low_latency_cores(llc),
         m_max_cores(max_cores),
-        m_needs_rpcmem(needs_rpcmem) {
+        m_needs_rpcmem(needs_rpcmem),
+        m_needs_stablediffusion(needs_stablediffusion) {
     if (m_useDspFeatures == false) {
       m_num_inits = 1;
     }
+    if (m_needs_stablediffusion) {
+#ifdef STABLEDIFFUSION_FLAG
+      m_settings += qti_settings_stablediffusion;
+#endif
+    }
   }
 
   int m_num_dsp;
@@ -78,6 +85,7 @@ class SocInfo {
   std::vector<int> m_low_latency_cores;
   int m_max_cores;
   bool m_needs_rpcmem;
+  bool m_needs_stablediffusion;
 };
 
 class SocProperties {
diff --git a/mobile_back_qti/docker/mlperf_dlc_prepare/Dockerfile b/mobile_back_qti/docker/mlperf_dlc_prepare/Dockerfile
index 14b82d36c..1a71fff64 100644
--- a/mobile_back_qti/docker/mlperf_dlc_prepare/Dockerfile
+++ b/mobile_back_qti/docker/mlperf_dlc_prepare/Dockerfile
@@ -14,7 +14,7 @@
 ##########################################################################
 
 # Docker image name: mlcommons/mlperf_dlc_prepare
-FROM ubuntu:focal
+FROM ubuntu:jammy
 RUN groupadd -r mlperfuser \
     && useradd -r mlperfuser -g mlperfuser
 LABEL maintainer="quic_mmundhra@quicinc.com"
@@ -26,20 +26,42 @@ RUN apt-get update && apt-get upgrade -y && apt-get autoremove -y && \
 
 RUN apt-get update && apt-get upgrade -y && apt-get autoremove -y && \
     apt-get install -y --no-install-recommends \
-       python3 python3-pip libpython3.8-dev python3.8-venv libgl1-mesa-glx libglib2.0-0 cython3 gcc make curl unzip libc++1-8 \
+       python3 python3-pip libpython3.10-dev python3.10-venv libgl1-mesa-glx libglib2.0-0 cython3 gcc make curl unzip libc++1-14 \
        git locales openssh-client ca-certificates tar gzip parallel \
-       zip bzip2 gnupg wget python3-six python3-pip libncurses5 openjdk-17-jdk-headless clang-format-10 golang-1.13-go build-essential
+       zip bzip2 gnupg wget python3-six python3-pip libncurses5 openjdk-11-jdk-headless clang-format golang-1.13-go build-essential
 
-RUN pip3 install pip==21.3.1 setuptools==31.0.1
-RUN pip3 install tensorflow-cpu==2.13.1
-RUN pip3 install protobuf==3.20.3
-RUN pip3 install Pillow opencv-python==4.3.0.38 setuptools matplotlib tensorflow_hub tf-slim \
-                 absl-py numpy pyyaml decorator scipy attrs pytest tflite psutil
+RUN pip3 install pip==24.1.1 setuptools==45.0.0
+RUN pip3 install numpy==1.23.1 opencv-python tensorflow-cpu==2.13.1
+RUN pip3 install protobuf==3.6.0
+RUN pip3 install Pillow tensorflow_hub tf-slim \
+                 absl-py pyyaml
+RUN pip3 install pandas matplotlib
+RUN pip3 install onnx==1.12.0 onnxruntime packaging
+#RUN pip3 install protobuf==3.19.4
+
+RUN curl -SL http://releases.llvm.org/9.0.0/clang+llvm-9.0.0-x86_64-linux-gnu-ubuntu-16.04.tar.xz | tar -xJC /usr/local/. && \
+    mv /usr/local/clang+llvm-9.0.0-x86_64-linux-gnu-ubuntu-16.04 /usr/local/clang-9.0.0
+
+RUN apt-get update && apt-get install -y bash coreutils
+RUN which readlink
+
+ARG ndk_version=android-ndk-r25c
+ARG android_ndk_home=/opt/android/${ndk_version}
+# Install the NDK
+# Use wget instead of curl to avoid "Error in the HTTP2 framing layer"
+RUN cd /tmp &&  wget -nv https://dl.google.com/android/repository/${ndk_version}-linux.zip && \
+    unzip -q /tmp/${ndk_version}-linux.zip -d /opt/android && \
+    rm /tmp/${ndk_version}-linux.zip
+
+ENV ANDROID_NDK_HOME ${android_ndk_home}
+ENV ANDROID_NDK_ROOT ${android_ndk_home}
+
+ENV PATH=${ANDROID_NDK_ROOT}:/usr/local/clang-9.0.0/bin:${PATH}
 
 RUN ln -s /usr/bin/python3 /usr/bin/python
 RUN apt-get clean
 
-RUN mkdir -p /home/mlperf && chmod 754 /home/mlperf
+RUN mkdir -p /home/mlperf && chmod 777 /home/mlperf
 ENV HOME /home/mlperf
 
-USER mlperfuser
+USER mlperfuser
\ No newline at end of file
diff --git a/mobile_back_qti/make/qti_backend.mk b/mobile_back_qti/make/qti_backend.mk
index bde9675f4..bb6cbd270 100644
--- a/mobile_back_qti/make/qti_backend.mk
+++ b/mobile_back_qti/make/qti_backend.mk
@@ -19,10 +19,9 @@ else ifeq ($(WITH_QTI),$(filter $(WITH_QTI),1 2))
     backend_qti_flutter_docker_args=-v "${SNPE_SDK}:/mnt/project/mobile_back_qti/$(shell basename ${SNPE_SDK})"
   endif
   $(info WITH_QTI=$(WITH_QTI))
-  local_snpe_sdk_root=$(shell echo mobile_back_qti/qaisw-* | awk '{print $$NF}')
+  local_snpe_sdk_root=$(shell echo mobile_back_qti/qairt/* | awk '{print $$NF}')
   $(info detected SNPE SDK: ${local_snpe_sdk_root})
   backend_qti_android_files=${BAZEL_LINKS_PREFIX}bin/mobile_back_qti/cpp/backend_qti/libqtibackend.so \
-    ${BAZEL_LINKS_PREFIX}bin/flutter/android/commonlibs/lib_arm64/libc++_shared.so \
     ${local_snpe_sdk_root}/lib/aarch64-android/libSNPE.so \
     ${local_snpe_sdk_root}/lib/aarch64-android/libSnpeHtpV75Stub.so \
     ${local_snpe_sdk_root}/lib/aarch64-android/libSnpeHtpV73Stub.so \
@@ -32,9 +31,14 @@ else ifeq ($(WITH_QTI),$(filter $(WITH_QTI),1 2))
     ${local_snpe_sdk_root}/lib/hexagon-v75/unsigned/libSnpeHtpV75Skel.so \
     ${local_snpe_sdk_root}/lib/hexagon-v73/unsigned/libSnpeHtpV73Skel.so \
     ${local_snpe_sdk_root}/lib/hexagon-v69/unsigned/libSnpeHtpV69Skel.so \
-    ${local_snpe_sdk_root}/lib/hexagon-v68/unsigned/libSnpeHtpV68Skel.so
+    ${local_snpe_sdk_root}/lib/hexagon-v68/unsigned/libSnpeHtpV68Skel.so \
+    ${local_snpe_sdk_root}/lib/aarch64-android/libQnnHtp.so \
+    ${local_snpe_sdk_root}/lib/aarch64-android/libQnnHtpNetRunExtensions.so \
+    ${local_snpe_sdk_root}/lib/aarch64-android/libQnnHtpV75Stub.so \
+    ${local_snpe_sdk_root}/lib/hexagon-v75/unsigned/libQnnHtpV75Skel.so \
+    ${local_snpe_sdk_root}/lib/aarch64-android/libQnnSystem.so \
+    ${BAZEL_LINKS_PREFIX}bin/flutter/android/commonlibs/lib_arm64/libc++_shared.so
   backend_qti_cmdline_files=${BAZEL_LINKS_PREFIX}bin/mobile_back_qti/cpp/backend_qti/libqtibackend.so \
-    ${BAZEL_LINKS_PREFIX}bin/flutter/android/commonlibs/lib_arm64/libc++_shared.so \
     ${local_snpe_sdk_root}/lib/aarch64-android/libSNPE.so \
     ${local_snpe_sdk_root}/lib/aarch64-android/libSnpeHtpV75Stub.so \
     ${local_snpe_sdk_root}/lib/aarch64-android/libSnpeHtpV73Stub.so \
@@ -45,7 +49,15 @@ else ifeq ($(WITH_QTI),$(filter $(WITH_QTI),1 2))
     ${local_snpe_sdk_root}/lib/hexagon-v73/unsigned/libSnpeHtpV73Skel.so \
     ${local_snpe_sdk_root}/lib/hexagon-v69/unsigned/libSnpeHtpV69Skel.so \
     ${local_snpe_sdk_root}/lib/hexagon-v68/unsigned/libSnpeHtpV68Skel.so \
-    mobile_back_qti/run_mlperf_tests.sh
+    mobile_back_qti/run_mlperf_tests.sh \
+    ${local_snpe_sdk_root}/lib/aarch64-android/libQnnHtp.so \
+    ${local_snpe_sdk_root}/lib/aarch64-android/libQnnHtpNetRunExtensions.so \
+    ${local_snpe_sdk_root}/lib/aarch64-android/libQnnHtpV75Stub.so \
+    ${local_snpe_sdk_root}/lib/hexagon-v75/unsigned/libQnnHtpV75Skel.so \
+    ${local_snpe_sdk_root}/lib/aarch64-android/libQnnSystem.so \
+    ${BAZEL_LINKS_PREFIX}bin/flutter/android/commonlibs/lib_arm64/libc++_shared.so
+
+  backend_qti_android_target_sd=//mobile_back_qti/cpp/backend_qti/StableDiffusion:stableDiffusion
   backend_qti_android_target=//mobile_back_qti/cpp/backend_qti:libqtibackend.so \
                                  //flutter/android/commonlibs:commonlibs
 
@@ -54,6 +66,21 @@ else ifeq ($(WITH_QTI),$(filter $(WITH_QTI),1 2))
 	backend_qti_android_target+=--//mobile_back_qti/cpp/backend_qti:external_config=${EXTERNAL_CONFIG}
   endif
 
+  ifeq ($(WITH_STABLEDIFFUSION),1)
+	backend_qti_libs_deps = rm -f ./mobile_back_qti/cpp/backend_qti/StableDiffusionShared/include/opencv && \
+	                             ln -s /opt/opencv-3.4.7_android/sdk/native mobile_back_qti/cpp/backend_qti/StableDiffusionShared/include/opencv
+	backend_qti_flutter_docker_args = --env WITH_STABLEDIFFUSION=${WITH_STABLEDIFFUSION}
+	backend_qti_android_target+=--//mobile_back_qti/cpp/backend_qti:with_stablediffusion=${WITH_STABLEDIFFUSION}
+	backend_qti_cmdline_files+=mobile_back_qti/cpp/backend_qti/StableDiffusionShared/include/opencv/libs/arm64-v8a/libopencv_core.so \
+                               mobile_back_qti/cpp/backend_qti/StableDiffusionShared/include/opencv/libs/arm64-v8a/libopencv_imgcodecs.so \
+                               mobile_back_qti/cpp/backend_qti/StableDiffusionShared/include/opencv/libs/arm64-v8a/libopencv_imgproc.so \
+                               mobile_back_qti/cpp/backend_qti/StableDiffusionShared/libStableDiffusion.so
+	backend_qti_android_files+=mobile_back_qti/cpp/backend_qti/StableDiffusionShared/include/opencv/libs/arm64-v8a/libopencv_core.so \
+                               mobile_back_qti/cpp/backend_qti/StableDiffusionShared/include/opencv/libs/arm64-v8a/libopencv_imgcodecs.so \
+                               mobile_back_qti/cpp/backend_qti/StableDiffusionShared/include/opencv/libs/arm64-v8a/libopencv_imgproc.so \
+                               mobile_back_qti/cpp/backend_qti/StableDiffusionShared/libStableDiffusion.so
+  endif
+
   backend_qti_windows_files=${BAZEL_LINKS_PREFIX}bin/mobile_back_qti/cpp/backend_qti/libqtibackend.dll \
     ${BAZEL_LINKS_PREFIX}bin/mobile_back_qti/cpp/backend_qti/libqtibackend.pdb \
     ${local_snpe_sdk_root}/lib/aarch64-windows-msvc/SNPE.dll \
diff --git a/mobile_back_qti/run_mlperf_tests.bat b/mobile_back_qti/run_mlperf_tests.bat
index 5b7a155cc..95caab306 100644
--- a/mobile_back_qti/run_mlperf_tests.bat
+++ b/mobile_back_qti/run_mlperf_tests.bat
@@ -34,7 +34,7 @@ rem # use --models argument to pass models path as value
 rem # use --mode argument to run in performance or accuracy mode. Defaults to performance mode.
 rem # valid values for --mode argument: performance, accuracy.
 rem # use --usecase argument to pass name of usecase to run as value (if not mentioned, by default runs all 8 usecases)
-rem # valid values for --usecase argument: image_classification_v2, image_classification, object_detection, image_segmentation, language_understanding, super_resolution, image_classification_offline_v2, image_classification_offline
+rem # valid values for --usecase argument: image_classification_v2, object_detection, image_segmentation, language_understanding, super_resolution, image_classification_offline_v2
 
 :loop
 IF NOT "%1"=="" (
@@ -115,14 +115,6 @@ IF "%usecase_name%"=="image_classification_offline_v2" (
     call :image_classification_offline_v2_performance
     goto :eof
 )
-IF "%usecase_name%"=="image_classification" (
-    call :image_classification_performance
-    goto :eof
-)
-IF "%usecase_name%"=="image_classification_offline" (
-    call :image_classification_offline_performance
-    goto :eof
-)
 IF  %usecase_name%=="" (
     call :image_classification_v2_performance
     echo ## cooldown intitated ##
@@ -140,12 +132,6 @@ IF  %usecase_name%=="" (
     echo ## cooldown intitated ##
     timeout /t %cooldown_period% /nobreak
     call :image_classification_offline_v2_performance
-    echo ## cooldown intitated ##
-    timeout /t %cooldown_period% /nobreak
-    call :image_classification_performance
-    echo ## cooldown intitated ##
-    timeout /t %cooldown_period% /nobreak
-    call :image_classification_offline_performance
     goto :eof
 )
 )
@@ -176,14 +162,6 @@ IF "%usecase_name%"=="image_classification_offline_v2" (
     call :image_classification_offline_v2_accuracy
     goto :eof
 )
-IF "%usecase_name%"=="image_classification" (
-    call :image_classification_accuracy
-    goto :eof
-)
-IF "%usecase_name%"=="image_classification_offline" (
-    call :image_classification_offline_accuracy
-    goto :eof
-)
 IF  %usecase_name%=="" (
     call :image_classification_v2_accuracy
     echo ## cooldown intitated ##
@@ -201,12 +179,6 @@ IF  %usecase_name%=="" (
     echo ## cooldown intitated ##
     timeout /t %cooldown_period% /nobreak
     call :image_classification_offline_v2_accuracy
-    echo ## cooldown intitated ##
-    timeout /t %cooldown_period% /nobreak
-    call :image_classification_accuracy
-    echo ## cooldown intitated ##
-    timeout /t %cooldown_period% /nobreak
-    call :image_classification_offline_accuracy
     goto :eof
 )
 )
@@ -293,31 +265,6 @@ findstr /C:"Samples per second" %use_case_results_file% >> %results_file%
 echo ####### Image classification offline V2 is complete #######
 EXIT /B 0
 
-:image_classification_performance
-echo ####### Performance:: Image classification in progress #######
-set test_case=image_classification
-mkdir %test_case%%test_case_suffix%
-set use_case_results_file=%results_prefix%%test_case%%results_suffix%
-.\main.exe EXTERNAL %test_case% --mode=PerformanceOnly --images_directory=%dataset_path%\imagenet\img --offset=1 --output_dir=%test_case%%test_case_suffix% --min_query_count=%min_query% --min_duration_ms=%min_duration_ms% --single_stream_expected_latency_ns=600000 --groundtruth_file=%dataset_path% --model_file=%models_path%\mobilenet_edgetpu_224_1.0_htp.dlc --lib_path=libqtibackend.dll --native_lib_path=. > %use_case_results_file% 2>&1
-echo #######%test_case%###### >> %results_file%
-findstr /C:"90th percentile latency (ns)" %use_case_results_file% >> %results_file%
-findstr /C:"Result is" %use_case_results_file% >> %results_file%
-findstr /C:"QPS w/o loadgen overhead" %use_case_results_file% >> %results_file%
-echo ####### Image classification is complete #######
-EXIT /B 0
-
-:image_classification_offline_performance
-echo ####### Performance:: Image classification offline in progress #######
-set test_case=image_classification_offline
-mkdir %test_case%%test_case_suffix%
-set use_case_results_file=%results_prefix%%test_case%%results_suffix%
-.\main.exe EXTERNAL %test_case% --mode=PerformanceOnly --scenario=Offline --batch_size=12288 --images_directory=%dataset_path%\imagenet\img --offset=1 --output_dir=%test_case%%test_case_suffix% --min_query_count=24576 --min_duration_ms=0 --single_stream_expected_latency_ns=1000000 --groundtruth_file=%dataset_path% --model_file=%models_path%\mobilenet_edgetpu_224_1.0_htp_batched_8.dlc --lib_path=libqtibackend.dll --native_lib_path=. > %use_case_results_file% 2>&1
-echo #######%test_case%###### >> %results_file%
-findstr /C:"Result is" %use_case_results_file% >> %results_file%
-findstr /C:"Samples per second" %use_case_results_file% >> %results_file%
-echo ####### Image classification offline is complete #######
-EXIT /B 0
-
 rem ####### Accuracy usecase functions #######
 
 :image_classification_v2_accuracy
@@ -386,28 +333,6 @@ findstr "Accuracy" %use_case_results_file% >> %results_file%
 echo ####### Image classification offline V2 is complete #######
 EXIT /B 0
 
-:image_classification_accuracy
-echo ####### Accuracy:: Image classification in progress #######
-set test_case=image_classification
-mkdir %test_case%%test_case_suffix%
-set use_case_results_file=%results_prefix%%test_case%%results_suffix%
-.\main.exe EXTERNAL %test_case% --mode=AccuracyOnly --images_directory=%dataset_path%\imagenet\img --offset=1 --output_dir=%test_case%%test_case_suffix% --min_query_count=%min_query% --min_duration_ms=%min_duration_ms% --single_stream_expected_latency_ns=600000 --groundtruth_file=%dataset_path%\imagenet\imagenet_val_full.txt --model_file=%models_path%\mobilenet_edgetpu_224_1.0_htp.dlc --lib_path=libqtibackend.dll --native_lib_path=. > %use_case_results_file% 2>&1
-echo #######%test_case%###### >> %results_file%
-findstr "Accuracy" %use_case_results_file% >> %results_file%
-echo ####### Image classification is complete #######
-EXIT /B 0
-
-:image_classification_offline_accuracy
-echo ####### Accuracy:: Image classification offline in progress #######
-set test_case=image_classification_offline
-mkdir %test_case%%test_case_suffix%
-set use_case_results_file=%results_prefix%%test_case%%results_suffix%
-.\main.exe EXTERNAL %test_case% --mode=AccuracyOnly --scenario=Offline --batch_size=12288 --images_directory=%dataset_path%\imagenet\img --offset=1 --output_dir=%test_case%%test_case_suffix% --min_query_count=24576 --min_duration_ms=0 --single_stream_expected_latency_ns=1000000 --groundtruth_file=%dataset_path%\imagenet\imagenet_val_full.txt --model_file=%models_path%\mobilenet_edgetpu_224_1.0_htp_batched_8.dlc --lib_path=libqtibackend.dll --native_lib_path=. > %use_case_results_file%  2>&1
-echo #######%test_case%###### >> %results_file%
-findstr "Accuracy" %use_case_results_file% >> %results_file%
-echo ####### Image classification offline is complete #######
-EXIT /B 0
-
 :dataset_end
 echo "set dataset path using --dataset"
 
diff --git a/mobile_back_qti/run_mlperf_tests.sh b/mobile_back_qti/run_mlperf_tests.sh
index 354870677..2f21cecd9 100644
--- a/mobile_back_qti/run_mlperf_tests.sh
+++ b/mobile_back_qti/run_mlperf_tests.sh
@@ -32,7 +32,7 @@ export LD_LIBRARY_PATH=.
 # use --mode argument to run in performance or accuracy mode. Defaults to performance mode.
 # valid values for --mode argument: performance, accuracy.
 # use --usecase argument to pass name of usecase to run as value (if not mentioned, by default runs all 8 usecases)
-# valid values for --usecase argument: image_classification_v2, image_classification, object_detection, image_segmentation, language_understanding, super_resolution, image_classification_offline_v2, image_classification_offline
+# valid values for --usecase argument: image_classification_v2, object_detection, image_segmentation, language_understanding, super_resolution, image_classification_offline_v2
 
 while [[ $# -gt 0 ]]
 do
@@ -170,32 +170,19 @@ grep "Samples per second" $use_case_results_file >> $results_file
 echo "####### Image classification offline V2 is complete #######"
 }
 
-image_classification_performance(){
-echo "####### Performance:: Image classification in progress #######"
-export test_case=image_classification
+stable_diffusion_performance(){
+echo "####### Performance:: Stable diffusion in progress #######"
+export test_case=stable_diffusion
 mkdir -p $test_case$test_case_suffix
 export use_case_results_file=$results_prefix$test_case$results_suffix
-./main EXTERNAL $test_case --mode=PerformanceOnly --images_directory=$dataset_path/imagenet/img --offset=1 --output_dir=$test_case$test_case_suffix --min_query_count=$min_query --min_duration_ms=$min_duration_ms --single_stream_expected_latency_ns=500000 --groundtruth_file="" --model_file=$models_path/mobilenet_edgetpu_224_1.0_htp.dlc --lib_path=libqtibackend.so --native_lib_path=. > $use_case_results_file 2>&1
+./main EXTERNAL $test_case --mode=PerformanceOnly --input_tfrecord=$dataset_path/stable_diffusion/coco_gen_full.tfrecord --output_dir=$test_case$test_case_suffix --min_query_count=1024 --min_duration_ms=60000 --max_duration_ms=300000 --single_stream_expected_latency_ns=1000000  --model_file=$models_path/stable_diffusion --lib_path=libqtibackend.so --native_lib_path=. > $use_case_results_file 2>&1
 echo "#######$test_case######" >> $results_file
 grep "90th percentile latency (ns)" $use_case_results_file >> $results_file
 grep "Result is" $use_case_results_file >> $results_file
 grep "QPS w/o loadgen overhead" $use_case_results_file >> $results_file
-echo "####### Image classification is complete #######"
+echo "####### Stable Diffusion is complete #######"
 }
 
-image_classification_offline_performance(){
-echo "####### Performance:: Image classification offline in progress #######"
-export test_case=image_classification_offline
-mkdir -p $test_case$test_case_suffix
-export use_case_results_file=$results_prefix$test_case$results_suffix
-./main EXTERNAL $test_case --mode=PerformanceOnly --scenario=Offline --batch_size=12288 --images_directory=$dataset_path/imagenet/img --offset=1 --output_dir=$test_case$test_case_suffix --min_query_count=24576 --min_duration_ms=0 --single_stream_expected_latency_ns=1000000 --groundtruth_file= --model_file=$models_path/mobilenet_edgetpu_224_1.0_htp_batched_4.dlc --lib_path=libqtibackend.so --native_lib_path=. > $use_case_results_file 2>&1
-echo "#######$test_case######" >> $results_file
-grep "Result is" $use_case_results_file >> $results_file
-grep "Samples per second" $use_case_results_file >> $results_file
-echo "####### Image classification offline is complete #######"
-}
-
-
 ####### Accuracy usecase functions #######
 
 image_classification_v2_accuracy(){
@@ -236,7 +223,7 @@ echo "####### Accuracy:: Natural language processing in progress #######"
 export test_case=natural_language_processing
 mkdir -p $test_case$test_case_suffix
 export use_case_results_file=$results_prefix$test_case$results_suffix
-./main EXTERNAL $test_case --mode=AccuracyOnly --input_file=$dataset_path/squad/squad_eval.tfrecord --output_dir=$test_case$test_case_suffix --min_query_count=$min_query --min_duration_ms=$min_duration_ms --single_stream_expected_latency_ns=1000000 --groundtruth_file=$dataset_path/squad/squad_groundtruth.tfrecord --model_file=$models_path/mobilebert_quantized_htp.dlc --lib_path=libqtibackend.so --native_lib_path=. > $use_case_results_file 2>&1
+./main EXTERNAL $test_case --mode=AccuracyOnly --input_file=$dataset_path/squad/squad_eval_mini.tfrecord --output_dir=$test_case$test_case_suffix --min_query_count=$min_query --min_duration_ms=$min_duration_ms --single_stream_expected_latency_ns=1000000 --groundtruth_file=$dataset_path/squad/squad_groundtruth.tfrecord --model_file=$models_path/mobilebert_quantized_htp.dlc --lib_path=libqtibackend.so --native_lib_path=. > $use_case_results_file 2>&1
 echo "#######$test_case######" >> $results_file
 grep "Accuracy" $use_case_results_file >> $results_file
 echo "####### Natural language processing is complete #######"
@@ -264,29 +251,18 @@ grep "Accuracy" $use_case_results_file >> $results_file
 echo "####### Image classification offline V2 is complete #######"
 }
 
-image_classification_accuracy(){
-echo "####### Accuracy:: Image classification in progress #######"
-export test_case=image_classification
+stable_diffusion_accuracy(){
+echo "####### Accuracy:: Stable diffusion in progress #######"
+export test_case=stable_diffusion
 mkdir -p $test_case$test_case_suffix
 export use_case_results_file=$results_prefix$test_case$results_suffix
-./main EXTERNAL $test_case --mode=AccuracyOnly --images_directory=$dataset_path/imagenet/img --offset=1 --output_dir=$test_case$test_case_suffix --min_query_count=$min_query --min_duration_ms=$min_duration_ms --single_stream_expected_latency_ns=1000000 --groundtruth_file=$dataset_path/imagenet/imagenet_val_full.txt --model_file=$models_path/mobilenet_edgetpu_224_1.0_htp.dlc --lib_path=libqtibackend.so --native_lib_path=. > $use_case_results_file 2>&1
+./main EXTERNAL $test_case --mode=AccuracyOnly --input_tfrecord=$dataset_path/stable_diffusion/coco_gen_test.tfrecord --input_clip_model=$models_path/stable_diffusion/clip_model_512x512.tflite --output_dir=$test_case$test_case_suffix --min_query_count=100 --min_duration_ms=0 --single_stream_expected_latency_ns=1000000  --model_file=$models_path/stable_diffusion --lib_path=libqtibackend.so --native_lib_path=. > $use_case_results_file 2>&1
 echo "#######$test_case######" >> $results_file
 grep "Accuracy" $use_case_results_file >> $results_file
-echo "####### Image classification is complete #######"
+echo "####### Stable Diffusion is complete #######"
 }
 
-image_classification_offline_accuracy(){
-echo "####### Accuracy:: Image classification offline in progress #######"
-export test_case=image_classification_offline
-mkdir -p $test_case$test_case_suffix
-export use_case_results_file=$results_prefix$test_case$results_suffix
-./main EXTERNAL $test_case --mode=AccuracyOnly --scenario=Offline --batch_size=12288 --images_directory=$dataset_path/imagenet/img --offset=1 --output_dir=$test_case$test_case_suffix --min_query_count=24576 --min_duration_ms=0 --single_stream_expected_latency_ns=1000000 --groundtruth_file=$dataset_path/imagenet/imagenet_val_full.txt --model_file=$models_path/mobilenet_edgetpu_224_1.0_htp_batched_4.dlc --lib_path=libqtibackend.so --native_lib_path=. > $use_case_results_file 2>&1
-echo "#######$test_case######" >> $results_file
-grep "Accuracy" $use_case_results_file >> $results_file
-echo "####### Image classification offline is complete #######"
-}
-
-if [[ "$mode" == "performance" || "$mode" == "" ]] 
+if [[ "$mode" == "performance" || "$mode" == "" ]]
 then
 case $usecase_name in
   "image_classification_v2")
@@ -307,11 +283,8 @@ case $usecase_name in
   "image_classification_offline_v2")
     image_classification_offline_v2_performance
     ;;
-  "image_classification")
-    image_classification_performance
-    ;;
-  "image_classification_offline")
-    image_classification_offline_performance
+  "stable_diffusion")
+    stable_diffusion_performance
     ;;
   *)
     image_classification_v2_performance
@@ -332,10 +305,7 @@ case $usecase_name in
     image_classification_offline_v2_performance
     echo "## cooldown intitated ##"
     sleep $cooldown_period
-    image_classification_performance
-    echo "## cooldown intitated ##"
-    sleep $cooldown_period
-    image_classification_offline_performance
+    stable_diffusion_performance
     ;;
 esac
 fi
@@ -361,11 +331,8 @@ case $usecase_name in
   "image_classification_offline_v2")
     image_classification_offline_v2_accuracy
     ;;
-  "image_classification")
-    image_classification_accuracy
-    ;;
-  "image_classification_offline")
-    image_classification_offline_accuracy
+  "stable_diffusion")
+    stable_diffusion_accuracy
     ;;
   *)
     image_classification_v2_accuracy
@@ -386,10 +353,7 @@ case $usecase_name in
     image_classification_offline_v2_accuracy
     echo "## cooldown intitated ##"
     sleep $cooldown_period
-    image_classification_accuracy
-    echo "## cooldown intitated ##"
-    sleep $cooldown_period
-    image_classification_offline_accuracy
+    stable_diffusion_accuracy
     ;;
 esac
 fi
diff --git a/mobile_back_qti/variables.bzl b/mobile_back_qti/variables.bzl
index ef0704d14..b5919bfc5 100644
--- a/mobile_back_qti/variables.bzl
+++ b/mobile_back_qti/variables.bzl
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+# Copyright (c) 2020-2024 Qualcomm Innovation Center, Inc. All rights reserved.
 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -17,26 +17,26 @@
 def _impl(repository_ctx):
     if "windows" in repository_ctx.os.name:
         # print(repository_ctx.attr.workspace_dir + "/mobile_back_qti/")
-        found = repository_ctx.execute(["ls", repository_ctx.attr.workspace_dir + "/mobile_back_qti"])
+        found = repository_ctx.execute(["ls", repository_ctx.attr.workspace_dir + "/mobile_back_qti/qairt/"])
         if found.return_code != 0 or found.stdout == "" or found.stdout == "\n":
-            fail("qaisw folder is not found in the repo: " + found.stderr)
+            fail("qairt folder is not found in the repo: " + found.stderr)
         filelist = found.stdout.split("\n")
         filepath = ""
         for x in filelist:
-            if x.find("qaisw-") == 0:
+            if x.startswith("2"):
                 filepath = x
                 break
         if filepath == "":
-            fail("qaisw folder is not found in the repo")
+            fail("qairt folder is not found in the repo")
     else:
-        found = repository_ctx.execute(["find", repository_ctx.attr.workspace_dir + "/mobile_back_qti/", "-maxdepth", "1", "-name", "qaisw-*", "-type", "d", "-print", "-quit"])
+        found = repository_ctx.execute(["find", repository_ctx.attr.workspace_dir + "/mobile_back_qti/qairt/", "-maxdepth", "1", "-name", "2.*", "-type", "d", "-print", "-quit"])
         if found.return_code != 0 or found.stdout == "" or found.stdout == "\n":
-            fail("qaisw folder is not found in the repo")
+            fail("qairt folder is not found in the repo")
         filepath = found.stdout[:-1]
-
     sdk_version = filepath[found.stdout.rfind("/") + 1:]
+
     print("Update SNPE version: " + sdk_version)  # buildifier: disable=print
-    repository_ctx.read(Label("@//:mobile_back_qti/" + sdk_version + "/ReleaseNotes.txt"))
+    repository_ctx.read(Label("@//:mobile_back_qti/qairt/" + sdk_version + "/ReleaseNotes.txt"))
 
     repository_ctx.file("BUILD", "")
     repository_ctx.file(
diff --git a/mobile_back_tflite/cpp/backend_tflite/BUILD b/mobile_back_tflite/cpp/backend_tflite/BUILD
index 80f46059a..651e34eba 100644
--- a/mobile_back_tflite/cpp/backend_tflite/BUILD
+++ b/mobile_back_tflite/cpp/backend_tflite/BUILD
@@ -36,9 +36,20 @@ pbtxt2header(
     ],
 )
 
+cc_library(
+    name = "embedding_utils",
+    srcs = ["embedding_utils.cc"],
+    hdrs = ["embedding_utils.h"],
+    visibility = ["//visibility:public"],
+    deps = [
+        "@org_tensorflow//tensorflow/core:tflite_portable_logging",
+    ],
+)
+
 cc_library(
     name = "tflite_c",
     srcs = [
+        "embedding_utils.cc",
         "sd_utils.cc",
         "single_model_pipeline.cc",
         "stable_diffusion_invoker.cc",
@@ -46,6 +57,7 @@ cc_library(
         "tflite_c.cc",
     ],
     hdrs = [
+        "embedding_utils.h",
         "pipeline.h",
         "sd_utils.h",
         "single_model_pipeline.h",
@@ -67,6 +79,7 @@ cc_library(
         "//conditions:default": [],
     }),
     deps = [
+        ":embedding_utils",
         ":tflite_settings",
         "//flutter/cpp:utils",
         "//flutter/cpp/c:headers",
diff --git a/mobile_back_tflite/cpp/backend_tflite/backend_settings/tflite_settings_android.pbtxt b/mobile_back_tflite/cpp/backend_tflite/backend_settings/tflite_settings_android.pbtxt
index dcb58f191..a94aa8749 100644
--- a/mobile_back_tflite/cpp/backend_tflite/backend_settings/tflite_settings_android.pbtxt
+++ b/mobile_back_tflite/cpp/backend_tflite/backend_settings/tflite_settings_android.pbtxt
@@ -207,34 +207,33 @@ benchmark_setting {
   delegate_selected: "NNAPI"
 }
 
-# TODO (anhappdev) uncomment when stable_diffusion is ready
-#benchmark_setting {
-#  benchmark_id: "stable_diffusion"
-#  framework: "TFLite"
-#  delegate_choice: {
-#    delegate_name: "NNAPI"
-#    accelerator_name: "npu"
-#    accelerator_desc: "NPU"
-#    model_file: {
-#      model_path: "local:///mlperf_models/stable-diffusion/decoder.tflite"
-#      model_checksum: "491385ad873880ba1876e1d097fcc0e3"
-#    }
-#    model_file: {
-#      model_path: "local:///mlperf_models/stable-diffusion/text_encoder.tflite"
-#      model_checksum: "8985768b09fe31b805e66b6048da9125"
-#    }
-#    model_file: {
-#      model_path: "local:///mlperf_models/stable-diffusion/first_model.tflite"
-#      model_checksum: "f0d6f45a2d702456a234c0a9b192816a"
-#    }
-#    model_file: {
-#      model_path: "local:///mlperf_models/stable-diffusion/second_model.tflite"
-#      model_checksum: "cea07208776347a8a5334106a09444fe"
-#    }
-#  }
-#  delegate_selected: "NNAPI"
-#  custom_setting {
-#    id: "pipeline"
-#    value: "StableDiffusionPipeline"
-#  }
-#}
+benchmark_setting {
+  benchmark_id: "stable_diffusion"
+  framework: "TFLite"
+  delegate_choice: {
+    delegate_name: "NNAPI"
+    accelerator_name: "npu"
+    accelerator_desc: "NPU"
+    model_file: {
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/tflite/sd_decoder_dynamic.tflite"
+      model_checksum: "68acdb62f99e1dc2c7f5db8cdd0e007c"
+    }
+    model_file: {
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/tflite/sd_diffusion_model_dynamic.tflite"
+      model_checksum: "309e95f76ac8de01130942037a28aa8f"
+    }
+    model_file: {
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/tflite/sd_text_encoder_dynamic.tflite"
+      model_checksum: "b64effb0360f9ea49a117cdaf8a2fbdc"
+    }
+    model_file: {
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/tflite/timestep_embeddings_data.bin.ts"
+      model_checksum: "798b772155a69de5df44b304327bb3cc"
+    }
+  }
+  delegate_selected: "NNAPI"
+  custom_setting {
+    id: "pipeline"
+    value: "StableDiffusionPipeline"
+  }
+}
diff --git a/mobile_back_tflite/cpp/backend_tflite/backend_settings/tflite_settings_apple_main.pbtxt b/mobile_back_tflite/cpp/backend_tflite/backend_settings/tflite_settings_apple_main.pbtxt
index f02a69a02..4f825d894 100644
--- a/mobile_back_tflite/cpp/backend_tflite/backend_settings/tflite_settings_apple_main.pbtxt
+++ b/mobile_back_tflite/cpp/backend_tflite/backend_settings/tflite_settings_apple_main.pbtxt
@@ -146,7 +146,6 @@ benchmark_setting {
   delegate_selected: "Core ML"
 }
 
-# TODO (anhappdev) uncomment when stable_diffusion is ready
 #benchmark_setting {
 #  benchmark_id: "stable_diffusion"
 #  framework: "TFLite"
diff --git a/mobile_back_tflite/cpp/backend_tflite/embedding_utils.cc b/mobile_back_tflite/cpp/backend_tflite/embedding_utils.cc
new file mode 100644
index 000000000..9f25eb4e3
--- /dev/null
+++ b/mobile_back_tflite/cpp/backend_tflite/embedding_utils.cc
@@ -0,0 +1,70 @@
+#include "embedding_utils.h"
+
+#include <iostream>
+
+bool TsEmbeddingParser::parse_pickle(const std::string& filename) {
+  std::ifstream file(filename, std::ios::binary);
+  if (!file) {
+    std::cerr << "Failed to open file: " << filename << std::endl;
+    return false;
+  }
+
+  // Read timesteps array
+  std::vector<int32_t> timesteps;
+  uint32_t num_timesteps;
+  file.read(reinterpret_cast<char*>(&num_timesteps), sizeof(uint32_t));
+  timesteps.resize(num_timesteps);
+  file.read(reinterpret_cast<char*>(timesteps.data()),
+            num_timesteps * sizeof(int32_t));
+
+  // Read embeddings array
+  std::vector<std::vector<float>> embeddings(num_timesteps);
+  for (auto& emb : embeddings) {
+    emb.resize(EMBEDDING_DIM);
+    file.read(reinterpret_cast<char*>(emb.data()),
+              EMBEDDING_DIM * sizeof(float));
+  }
+
+  // Reverse both timesteps and embeddings before storing
+  std::reverse(timesteps.begin(), timesteps.end());
+  std::reverse(embeddings.begin(), embeddings.end());
+
+  // Store in maps
+  timesteps_[num_timesteps] = std::move(timesteps);
+  embeddings_[num_timesteps] = std::move(embeddings);
+
+  return true;
+}
+
+std::vector<float> TsEmbeddingParser::get_timestep_embedding(
+    int32_t steps, int32_t step_index) const {
+  auto emb_it = embeddings_.find(steps);
+  if (emb_it == embeddings_.end() || step_index >= emb_it->second.size()) {
+    return {};
+  }
+  return emb_it->second[step_index];
+}
+
+std::vector<int32_t> TsEmbeddingParser::get_timesteps(int32_t steps) const {
+  auto ts_it = timesteps_.find(steps);
+  if (ts_it == timesteps_.end()) {
+    return {};
+  }
+  return ts_it->second;
+}
+
+bool EmbeddingManager::load_timestep_embeddings(const std::string& filename) {
+  ts_parser_ = std::make_unique<TsEmbeddingParser>();
+  return ts_parser_->parse_pickle(filename);
+}
+
+std::vector<float> EmbeddingManager::get_timestep_embedding(
+    int32_t timestep, int num_steps) const {
+  if (!ts_parser_) return {};
+  return ts_parser_->get_timestep_embedding(num_steps, timestep);
+}
+
+std::vector<int32_t> EmbeddingManager::get_timesteps(int num_steps) const {
+  if (!ts_parser_) return {};
+  return ts_parser_->get_timesteps(num_steps);
+}
\ No newline at end of file
diff --git a/mobile_back_tflite/cpp/backend_tflite/embedding_utils.h b/mobile_back_tflite/cpp/backend_tflite/embedding_utils.h
new file mode 100644
index 000000000..f543c6332
--- /dev/null
+++ b/mobile_back_tflite/cpp/backend_tflite/embedding_utils.h
@@ -0,0 +1,40 @@
+#ifndef EMBEDDING_UTILS_H_
+#define EMBEDDING_UTILS_H_
+
+#include <filesystem>
+#include <fstream>
+#include <map>
+#include <memory>
+#include <vector>
+
+class TsEmbeddingParser {
+ public:
+  bool parse_pickle(const std::string& filename);
+  std::vector<float> get_timestep_embedding(int32_t steps,
+                                            int32_t step_index) const;
+  std::vector<int32_t> get_timesteps(int32_t steps) const;
+
+ private:
+  static constexpr size_t EMBEDDING_DIM = 1280;
+  std::map<int32_t, std::vector<int32_t>> timesteps_;
+  std::map<int32_t, std::vector<std::vector<float>>> embeddings_;
+};
+
+class EmbeddingManager {
+ public:
+  static EmbeddingManager& getInstance() {
+    static EmbeddingManager instance;
+    return instance;
+  }
+
+  bool load_timestep_embeddings(const std::string& filename);
+  std::vector<float> get_timestep_embedding(int32_t timestep,
+                                            int num_steps) const;
+  std::vector<int32_t> get_timesteps(int num_steps) const;
+
+ private:
+  EmbeddingManager() = default;
+  std::unique_ptr<TsEmbeddingParser> ts_parser_;
+};
+
+#endif  // EMBEDDING_UTILS_H_
\ No newline at end of file
diff --git a/mobile_back_tflite/cpp/backend_tflite/neuron/BUILD b/mobile_back_tflite/cpp/backend_tflite/neuron/BUILD
index 3bd677e6a..e9e8cf9a7 100644
--- a/mobile_back_tflite/cpp/backend_tflite/neuron/BUILD
+++ b/mobile_back_tflite/cpp/backend_tflite/neuron/BUILD
@@ -72,6 +72,7 @@ cc_library(
         ":tflite_settings",
         "//flutter/cpp:utils",
         "//flutter/cpp/c:headers",
+        "//mobile_back_tflite/cpp/backend_tflite:embedding_utils",
         "//mobile_back_tflite/cpp/backend_tflite:tflite_settings",
         "@org_tensorflow//tensorflow/core:tflite_portable_logging",
         "@org_tensorflow//tensorflow/lite/c:c_api",
diff --git a/mobile_back_tflite/cpp/backend_tflite/neuron/backend_settings/tflite_settings_mtk_mt6989.pbtxt b/mobile_back_tflite/cpp/backend_tflite/neuron/backend_settings/tflite_settings_mtk_mt6989.pbtxt
index 9bceb8d86..70ff423bb 100644
--- a/mobile_back_tflite/cpp/backend_tflite/neuron/backend_settings/tflite_settings_mtk_mt6989.pbtxt
+++ b/mobile_back_tflite/cpp/backend_tflite/neuron/backend_settings/tflite_settings_mtk_mt6989.pbtxt
@@ -63,8 +63,8 @@ benchmark_setting {
     accelerator_name: "neuron-mdla"
     accelerator_desc: "MediaTek NN accelerator via the Neuron Delegate"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/mediatek/MobileNetV4-Conv-Large-int8-ptq.dla"
-      model_checksum: "07055309718ff681d8cd2c00f5e4b5db"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/mediatek/MobileNetV4-Conv-Large-int8-ptq.dla"
+      model_checksum: "ff9e3705d4a62c4b78e2597156bb7599"
     }
   }
   delegate_choice: {
@@ -173,8 +173,8 @@ benchmark_setting {
     accelerator_name: "neuron"
     accelerator_desc: "MediaTek NN accelerator + CPU via the Neuron Delegate"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/mediatek/mobiledet_qat.dla"
-      model_checksum: "14b9572f121caa093cd8bf690fde997c"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/mediatek/mobiledet_qat.dla"
+      model_checksum: "97cc339e205dfe5503d7dc256b12f472"
     }
   }
   delegate_choice: {
@@ -216,8 +216,8 @@ benchmark_setting {
     accelerator_name: "neuron-no-ahwb"
     accelerator_desc: "MediaTek NN accelerator + VPU via the Neuron Delegate"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/mediatek/mobilebert_int8_384_nnapi.dla"
-      model_checksum: "8c6ce45cc49bbf8bb26609bfb219164a"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/mediatek/mobilebert_int8_384_nnapi.dla"
+      model_checksum: "2c81eebd87e3f620373897cc56dbc3e7"
     }
     batch_size: 1
   }
@@ -260,8 +260,8 @@ benchmark_setting {
     accelerator_name: "neuron"
     accelerator_desc: "NPU"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/mediatek/mobile_segmenter_r4_quant_argmax_uint8.dla"
-      model_checksum: "fe62a283e6da531647da15b3f26e680a"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/mediatek/mobile_segmenter_r4_quant_argmax_uint8.dla"
+      model_checksum: "105fa48160803201dedec155445dd4e9"
     }
   }
   delegate_choice: {
@@ -302,8 +302,8 @@ benchmark_setting {
     accelerator_name: "neuron"
     accelerator_desc: "MediaTek NN accelerator + CPU via the Neuron Delegate"
     model_file: {
-      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_0/mediatek/edsr_f32b5_full_qint8.dla"
-      model_checksum: "fcd91d276036be666153c663c03fb69e"
+      model_path: "https://mobile.mlcommons-storage.org/app-resources/models/v4_1/mediatek/edsr_f32b5_full_qint8.dla"
+      model_checksum: "cc40f7f0e97cf34c8586883fd3357354"
     }
   }
   delegate_choice: {
diff --git a/mobile_back_tflite/cpp/backend_tflite/sd_utils.cc b/mobile_back_tflite/cpp/backend_tflite/sd_utils.cc
index 14aa858d9..c5901b66c 100644
--- a/mobile_back_tflite/cpp/backend_tflite/sd_utils.cc
+++ b/mobile_back_tflite/cpp/backend_tflite/sd_utils.cc
@@ -233,7 +233,6 @@ std::vector<float> get_timestep_embedding(int timestep, int batch_size, int dim,
     embedding_cos.push_back(cosf(timestep * freq));
     embedding_sin.push_back(sinf(timestep * freq));
   }
-
   std::vector<float> embedding;
   for (int i = 0; i < batch_size; i++) {
     embedding.insert(embedding.end(),
diff --git a/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_invoker.cc b/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_invoker.cc
index 94ac36ed2..fd944d7f1 100644
--- a/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_invoker.cc
+++ b/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_invoker.cc
@@ -4,6 +4,7 @@
 #include <random>
 #include <valarray>
 
+#include "embedding_utils.h"
 #include "sd_utils.h"
 #include "stable_diffusion_pipeline.h"
 #include "tensorflow/lite/c/c_api.h"
@@ -99,11 +100,21 @@ std::vector<float> StableDiffusionInvoker::diffusion_process(
     const std::vector<float>& unconditional_encoded_text, int num_steps,
     int seed) {
   float unconditional_guidance_scale = 7.5f;
+
   auto noise = get_normal(64 * 64 * 4, seed);
   auto latent = noise;
 
-  auto timesteps = get_timesteps(1, 1000, 1000 / num_steps);
+  // Get pre-calculated timesteps and embeddings
+  auto& embedding_manager = EmbeddingManager::getInstance();
+  auto timesteps = embedding_manager.get_timesteps(num_steps);
+
+  if (timesteps.empty()) {
+    LOG(ERROR) << "Failed to get timesteps for " << num_steps << " steps";
+    return std::vector<float>();
+  }
+
   auto alphas_tuple = get_initial_alphas(timesteps);
+
   auto alphas = std::get<0>(alphas_tuple);
   auto alphas_prev = std::get<1>(alphas_tuple);
 
@@ -111,7 +122,18 @@ std::vector<float> StableDiffusionInvoker::diffusion_process(
     LOG(INFO) << "Step " << timesteps.size() - 1 - i;
 
     auto latent_prev = latent;
-    auto t_emb = get_timestep_embedding(timesteps[i]);
+
+    auto t_emb = embedding_manager.get_timestep_embedding(i, num_steps);
+
+    if (t_emb.empty()) {
+      LOG(ERROR) << "Failed to get timestamp embedding for step " << i;
+      return std::vector<float>();
+    }
+
+    if (t_emb.empty()) {
+      LOG(ERROR) << "Failed to get timestamp embedding for step " << i;
+      return std::vector<float>();
+    }
 
     auto unconditional_latent =
         diffusion_step(latent, t_emb, unconditional_encoded_text);
@@ -132,6 +154,7 @@ std::vector<float> StableDiffusionInvoker::diffusion_process(
     latent.assign(std::begin(l), std::end(l));
   }
 
+  LOG(INFO) << "Diffusion process completed!";
   return latent;
 }
 
diff --git a/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_invoker.h b/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_invoker.h
index ccbef1f9e..706589835 100644
--- a/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_invoker.h
+++ b/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_invoker.h
@@ -5,7 +5,7 @@
 #include <string>
 #include <vector>
 
-#include "stable_diffusion_pipeline.h"  // Include the backend data structure
+#include "stable_diffusion_pipeline.h"
 #include "tensorflow/lite/interpreter.h"
 #include "tensorflow/lite/model_builder.h"
 
diff --git a/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_pipeline.cc b/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_pipeline.cc
index adec6b695..de7ddba57 100644
--- a/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_pipeline.cc
+++ b/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_pipeline.cc
@@ -6,6 +6,7 @@
 #include <random>
 #include <valarray>
 
+#include "embedding_utils.h"
 #include "flutter/cpp/c/backend_c.h"
 #include "flutter/cpp/utils.h"
 #include "stable_diffusion_invoker.h"
@@ -110,6 +111,16 @@ mlperf_backend_ptr_t StableDiffusionPipeline::backend_create(
     return nullptr;
   }
 
+  std::string ts_embedding_path =
+      std::string(model_path) + "/timestep_embeddings_data.bin.ts";
+  if (!EmbeddingManager::getInstance().load_timestep_embeddings(
+          ts_embedding_path)) {
+    LOG(ERROR) << "Failed to load timestep embeddings from "
+               << ts_embedding_path;
+    backend_delete(backend_data);
+    return nullptr;
+  }
+
   return backend_data;
 }
 
diff --git a/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_pipeline.h b/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_pipeline.h
index 7104cb24e..17070a286 100644
--- a/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_pipeline.h
+++ b/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_pipeline.h
@@ -37,8 +37,8 @@ struct SDBackendData {
   std::vector<int> input_prompt_tokens;
   std::vector<int> unconditional_tokens;
 
-  int num_steps{10};
-  int seed{0};
+  int num_steps{20};
+  int seed{633994880};
 
   std::vector<float> output;
   std::unique_ptr<Threadpool> executer;