diff --git a/.github/workflows/stackhpc-container-image-build.yml b/.github/workflows/stackhpc-container-image-build.yml
index 65428c039..f69626d27 100644
--- a/.github/workflows/stackhpc-container-image-build.yml
+++ b/.github/workflows/stackhpc-container-image-build.yml
@@ -34,6 +34,7 @@ jobs:
     name: Build Kolla container images
     if: github.repository == 'stackhpc/stackhpc-kayobe-config'
     runs-on: [self-hosted, stackhpc-kayobe-config-kolla-builder]
+    timeout-minutes: 720
     steps:
       - uses: actions/checkout@v3
         with:
diff --git a/doc/source/configuration/monitoring.rst b/doc/source/configuration/monitoring.rst
index 7e53629f1..f358ea084 100644
--- a/doc/source/configuration/monitoring.rst
+++ b/doc/source/configuration/monitoring.rst
@@ -1,4 +1,7 @@
-========================
+===========
+Monitoring
+===========
+
 Monitoring Configuration
 ========================
 
@@ -10,4 +13,43 @@ The configuration options can be found in
 ``etc/kayobe/stackhpc-monitoring.yml``:
 
 .. literalinclude:: ../../../etc/kayobe/stackhpc-monitoring.yml
-   :language: yaml
\ No newline at end of file
+   :language: yaml
+
+SMART Drive Monitoring
+=======================
+
+StackHPC kayobe config also includes drive monitoring for spinning disks and
+NVME's.
+
+By default, node exporter doesn't provide SMART metrics, hence we make use
+of 2 scripts (one for NVME’s and one for spinning drives), which are run by
+a cronjob, to output the metrics and we use node exporter's Textfile collector
+to report the metrics output by the scripts to Prometheus. These metrics can
+then be visualised in Grafana with the bundled dashboard.
+
+After pulling in the latest changes into your local kayobe config, reconfigure 
+Prometheus and Grafana
+
+.. code-block:: console
+
+    kayobe overcloud service reconfigure -kt grafana,prometheus
+
+(Note: If you run into an error when reconfiguring Grafana, it could be due to
+`this <https://bugs.launchpad.net/kolla-ansible/+bug/1997984>`__ bug and at
+present, the workaround is to go into each node running Grafana and manually
+restart the process with ``docker restart grafana`` and then try the reconfigure
+command again.) 
+
+Once the reconfigure has completed you can now run the custom playbook which
+copies over the scripts and sets up the cron jobs to start SMART monitoring
+on the overcloud hosts:
+
+.. code-block:: console
+
+    (kayobe) [stack@node ~]$ cd etc/kayobe
+    (kayobe) [stack@node kayobe]$ kayobe playbook run ansible/smartmontools.yml
+
+SMART reporting should now be enabled along with a Prometheus alert for
+unhealthy disks and a Grafana dashboard called ``Hardware Overview``. 
+
+
diff --git a/etc/kayobe/ansible/scripts/nvmemon.sh b/etc/kayobe/ansible/scripts/nvmemon.sh
new file mode 100644
index 000000000..9ab727b0b
--- /dev/null
+++ b/etc/kayobe/ansible/scripts/nvmemon.sh
@@ -0,0 +1,97 @@
+#!/usr/bin/env bash
+set -eu
+
+# Dependencies: nvme-cli, jq (packages)
+# Based on code from
+# - https://github.com/prometheus/node_exporter/blob/master/text_collector_examples/smartmon.sh
+# - https://github.com/prometheus/node_exporter/blob/master/text_collector_examples/mellanox_hca_temp
+# - https://github.com/vorlon/check_nvme/blob/master/check_nvme.sh
+#
+# Author: Henk <henk@wearespindle.com>
+
+# Check if we are root
+if [ "$EUID" -ne 0 ]; then
+  echo "${0##*/}: Please run as root!" >&2
+  exit 1
+fi
+
+# Check if programs are installed
+if ! command -v nvme >/dev/null 2>&1; then
+  echo "${0##*/}: nvme is not installed. Aborting." >&2
+  exit 1
+fi
+
+output_format_awk="$(
+  cat <<'OUTPUTAWK'
+BEGIN { v = "" }
+v != $1 {
+  print "# HELP nvme_" $1 " SMART metric " $1;
+  if ($1 ~ /_total$/)
+    print "# TYPE nvme_" $1 " counter";
+  else
+    print "# TYPE nvme_" $1 " gauge";
+  v = $1
+}
+{print "nvme_" $0}
+OUTPUTAWK
+)"
+
+format_output() {
+  sort | awk -F'{' "${output_format_awk}"
+}
+
+# Get the nvme-cli version
+nvme_version="$(nvme version | awk '$1 == "nvme" {print $3}')"
+echo "nvmecli{version=\"${nvme_version}\"} 1" | format_output
+
+# Get devices
+device_list="$(nvme list -o json | jq -r '.Devices | .[].DevicePath')"
+
+# Loop through the NVMe devices
+for device in ${device_list}; do
+  json_check="$(nvme smart-log -o json "${device}")"
+  disk="${device##*/}"
+
+  # The temperature value in JSON is in Kelvin, we want Celsius
+  value_temperature="$(echo "$json_check" | jq '.temperature - 273')"
+  echo "temperature_celsius{device=\"${disk}\"} ${value_temperature}"
+
+  value_available_spare="$(echo "$json_check" | jq '.avail_spare / 100')"
+  echo "available_spare_ratio{device=\"${disk}\"} ${value_available_spare}"
+
+  value_available_spare_threshold="$(echo "$json_check" | jq '.spare_thresh / 100')"
+  echo "available_spare_threshold_ratio{device=\"${disk}\"} ${value_available_spare_threshold}"
+
+  value_percentage_used="$(echo "$json_check" | jq '.percent_used / 100')"
+  echo "percentage_used_ratio{device=\"${disk}\"} ${value_percentage_used}"
+
+  value_critical_warning="$(echo "$json_check" | jq '.critical_warning')"
+  echo "critical_warning_total{device=\"${disk}\"} ${value_critical_warning}"
+
+  value_media_errors="$(echo "$json_check" | jq '.media_errors')"
+  echo "media_errors_total{device=\"${disk}\"} ${value_media_errors}"
+
+  value_num_err_log_entries="$(echo "$json_check" | jq '.num_err_log_entries')"
+  echo "num_err_log_entries_total{device=\"${disk}\"} ${value_num_err_log_entries}"
+
+  value_power_cycles="$(echo "$json_check" | jq '.power_cycles')"
+  echo "power_cycles_total{device=\"${disk}\"} ${value_power_cycles}"
+
+  value_power_on_hours="$(echo "$json_check" | jq '.power_on_hours')"
+  echo "power_on_hours_total{device=\"${disk}\"} ${value_power_on_hours}"
+
+  value_controller_busy_time="$(echo "$json_check" | jq '.controller_busy_time')"
+  echo "controller_busy_time_seconds{device=\"${disk}\"} ${value_controller_busy_time}"
+
+  value_data_units_written="$(echo "$json_check" | jq '.data_units_written')"
+  echo "data_units_written_total{device=\"${disk}\"} ${value_data_units_written}"
+
+  value_data_units_read="$(echo "$json_check" | jq '.data_units_read')"
+  echo "data_units_read_total{device=\"${disk}\"} ${value_data_units_read}"
+
+  value_host_read_commands="$(echo "$json_check" | jq '.host_read_commands')"
+  echo "host_read_commands_total{device=\"${disk}\"} ${value_host_read_commands}"
+
+  value_host_write_commands="$(echo "$json_check" | jq '.host_write_commands')"
+  echo "host_write_commands_total{device=\"${disk}\"} ${value_host_write_commands}"
+done | format_output
diff --git a/etc/kayobe/ansible/scripts/smartmon.sh b/etc/kayobe/ansible/scripts/smartmon.sh
new file mode 100644
index 000000000..bcac8b8b3
--- /dev/null
+++ b/etc/kayobe/ansible/scripts/smartmon.sh
@@ -0,0 +1,202 @@
+#!/bin/bash
+# Script informed by the collectd monitoring script for smartmontools (using smartctl)
+# by Samuel B. <samuel_._behan_(at)_dob_._sk> (c) 2012
+# source at: http://devel.dob.sk/collectd-scripts/
+
+# TODO: This probably needs to be a little more complex.  The raw numbers can have more
+#       data in them than you'd think.
+#       http://arstechnica.com/civis/viewtopic.php?p=22062211
+
+# Formatting done via shfmt -i 2
+# https://github.com/mvdan/sh
+
+parse_smartctl_attributes_awk="$(
+  cat <<'SMARTCTLAWK'
+$1 ~ /^ *[0-9]+$/ && $2 ~ /^[a-zA-Z0-9_-]+$/ {
+  gsub(/-/, "_");
+  printf "%s_value{%s,smart_id=\"%s\"} %d\n", $2, labels, $1, $4
+  printf "%s_worst{%s,smart_id=\"%s\"} %d\n", $2, labels, $1, $5
+  printf "%s_threshold{%s,smart_id=\"%s\"} %d\n", $2, labels, $1, $6
+  printf "%s_raw_value{%s,smart_id=\"%s\"} %e\n", $2, labels, $1, $10
+}
+SMARTCTLAWK
+)"
+
+smartmon_attrs="$(
+  cat <<'SMARTMONATTRS'
+airflow_temperature_cel
+command_timeout
+current_pending_sector
+end_to_end_error
+erase_fail_count
+g_sense_error_rate
+hardware_ecc_recovered
+host_reads_32mib
+host_reads_mib
+host_writes_32mib
+host_writes_mib
+load_cycle_count
+media_wearout_indicator
+nand_writes_1gib
+offline_uncorrectable
+power_cycle_count
+power_on_hours
+program_fail_cnt_total
+program_fail_count
+raw_read_error_rate
+reallocated_event_count
+reallocated_sector_ct
+reported_uncorrect
+runtime_bad_block
+sata_downshift_count
+seek_error_rate
+spin_retry_count
+spin_up_time
+start_stop_count
+temperature_case
+temperature_celsius
+temperature_internal
+total_lbas_read
+total_lbas_written
+udma_crc_error_count
+unsafe_shutdown_count
+unused_rsvd_blk_cnt_tot
+wear_leveling_count
+workld_host_reads_perc
+workld_media_wear_indic
+workload_minutes
+SMARTMONATTRS
+)"
+smartmon_attrs="$(echo "${smartmon_attrs}" | xargs | tr ' ' '|')"
+
+parse_smartctl_attributes() {
+  local disk="$1"
+  local disk_type="$2"
+  local serial="$3"
+  local labels="disk=\"${disk}\",type=\"${disk_type}\",serial_number=\"${serial}\""
+  sed 's/^ \+//g' |
+    awk -v labels="${labels}" "${parse_smartctl_attributes_awk}" 2>/dev/null |
+    tr '[:upper:]' '[:lower:]' |
+    grep -E "(${smartmon_attrs})"
+}
+
+parse_smartctl_scsi_attributes() {
+  local disk="$1"
+  local disk_type="$2"
+  local serial="$3"
+  local labels="disk=\"${disk}\",type=\"${disk_type}\",serial_number=\"${serial}\""
+  while read -r line; do
+    attr_type="$(echo "${line}" | tr '=' ':' | cut -f1 -d: | sed 's/^ \+//g' | tr ' ' '_')"
+    attr_value="$(echo "${line}" | tr '=' ':' | cut -f2 -d: | sed 's/^ \+//g')"
+    case "${attr_type}" in
+    number_of_hours_powered_up_) power_on="$(echo "${attr_value}" | awk '{ printf "%e\n", $1 }')" ;;
+    Current_Drive_Temperature) temp_cel="$(echo "${attr_value}" | cut -f1 -d' ' | awk '{ printf "%e\n", $1 }')" ;;
+    Blocks_sent_to_initiator_) lbas_read="$(echo "${attr_value}" | awk '{ printf "%e\n", $1 }')" ;;
+    Blocks_received_from_initiator_) lbas_written="$(echo "${attr_value}" | awk '{ printf "%e\n", $1 }')" ;;
+    Accumulated_start-stop_cycles) power_cycle="$(echo "${attr_value}" | awk '{ printf "%e\n", $1 }')" ;;
+    Elements_in_grown_defect_list) grown_defects="$(echo "${attr_value}" | awk '{ printf "%e\n", $1 }')" ;;
+    esac
+  done
+  [ -n "$power_on" ] && echo "power_on_hours_raw_value{${labels},smart_id=\"9\"} ${power_on}"
+  [ -n "$temp_cel" ] && echo "temperature_celsius_raw_value{${labels},smart_id=\"194\"} ${temp_cel}"
+  [ -n "$lbas_read" ] && echo "total_lbas_read_raw_value{${labels},smart_id=\"242\"} ${lbas_read}"
+  [ -n "$lbas_written" ] && echo "total_lbas_written_raw_value{${labels},smart_id=\"241\"} ${lbas_written}"
+  [ -n "$power_cycle" ] && echo "power_cycle_count_raw_value{${labels},smart_id=\"12\"} ${power_cycle}"
+  [ -n "$grown_defects" ] && echo "grown_defects_count_raw_value{${labels},smart_id=\"-1\"} ${grown_defects}"
+}
+
+parse_smartctl_info() {
+  local -i smart_available=0 smart_enabled=0 smart_healthy=
+  local disk="$1" disk_type="$2"
+  local model_family='' device_model='' serial_number='' fw_version='' vendor='' product='' revision='' lun_id=''
+  while read -r line; do
+    info_type="$(echo "${line}" | cut -f1 -d: | tr ' ' '_')"
+    info_value="$(echo "${line}" | cut -f2- -d: | sed 's/^ \+//g' | sed 's/"/\\"/')"
+    case "${info_type}" in
+    Model_Family) model_family="${info_value}" ;;
+    Device_Model) device_model="${info_value}" ;;
+    Serial_Number) serial_number="${info_value}" ;;
+    Firmware_Version) fw_version="${info_value}" ;;
+    Vendor) vendor="${info_value}" ;;
+    Product) product="${info_value}" ;;
+    Revision) revision="${info_value}" ;;
+    Logical_Unit_id) lun_id="${info_value}" ;;
+    esac
+    if [[ "${info_type}" == 'SMART_support_is' ]]; then
+      case "${info_value:0:7}" in
+      Enabled) smart_available=1; smart_enabled=1 ;;
+      Availab) smart_available=1; smart_enabled=0 ;;
+      Unavail) smart_available=0; smart_enabled=0 ;;
+      esac
+    fi
+    if [[ "${info_type}" == 'SMART_overall-health_self-assessment_test_result' ]]; then
+      case "${info_value:0:6}" in
+      PASSED) smart_healthy=1 ;;
+      *) smart_healthy=0 ;;
+      esac
+    elif [[ "${info_type}" == 'SMART_Health_Status' ]]; then
+      case "${info_value:0:2}" in
+      OK) smart_healthy=1 ;;
+      *) smart_healthy=0 ;;
+      esac
+    fi
+  done
+  echo "device_info{disk=\"${disk}\",type=\"${disk_type}\",vendor=\"${vendor}\",product=\"${product}\",revision=\"${revision}\",lun_id=\"${lun_id}\",model_family=\"${model_family}\",device_model=\"${device_model}\",serial_number=\"${serial_number}\",firmware_version=\"${fw_version}\"} 1"
+  echo "device_smart_available{disk=\"${disk}\",type=\"${disk_type}\",serial_number=\"${serial_number}\"} ${smart_available}"
+  [[ "${smart_available}" == "1" ]] && echo "device_smart_enabled{disk=\"${disk}\",type=\"${disk_type}\",serial_number=\"${serial_number}\"} ${smart_enabled}"
+  [[ "${smart_available}" == "1" ]] && [[ "${smart_healthy}" != "" ]] && echo "device_smart_healthy{disk=\"${disk}\",type=\"${disk_type}\",serial_number=\"${serial_number}\"} ${smart_healthy}"
+}
+
+output_format_awk="$(
+  cat <<'OUTPUTAWK'
+BEGIN { v = "" }
+v != $1 {
+  print "# HELP smartmon_" $1 " SMART metric " $1;
+  print "# TYPE smartmon_" $1 " gauge";
+  v = $1
+}
+{print "smartmon_" $0}
+OUTPUTAWK
+)"
+
+format_output() {
+  sort |
+    awk -F'{' "${output_format_awk}"
+}
+
+smartctl_version="$(/usr/sbin/smartctl -V | head -n1 | awk '$1 == "smartctl" {print $2}')"
+
+echo "smartctl_version{version=\"${smartctl_version}\"} 1" | format_output
+
+if [[ "$(expr "${smartctl_version}" : '\([0-9]*\)\..*')" -lt 6 ]]; then
+  exit
+fi
+
+device_list="$(/usr/sbin/smartctl --scan-open | awk '/^\/dev/{print $1 "|" $3}')"
+
+for device in ${device_list}; do
+  disk="$(echo "${device}" | cut -f1 -d'|')"
+  type="$(echo "${device}" | cut -f2 -d'|')"
+  # Use REGEX to extract the serial number from the parsed information and save that to a variable
+  serial_number="$(/usr/sbin/smartctl -i -H -d "${type}" "${disk}" | parse_smartctl_info "${disk}" "${type}"| sed -E ':a;N;$!ba;s/.*serial_number=\"([^"]+)\".*/\1/g' | sed -E 's/^device_info\{.*//g')"
+  active=1
+  echo "smartctl_run{disk=\"${disk}\",type=\"${type}\"}" "$(TZ=UTC date '+%s')"
+  # Check if the device is in a low-power mode
+  /usr/sbin/smartctl -n standby -d "${type}" "${disk}" > /dev/null || active=0
+  echo "device_active{disk=\"${disk}\",type=\"${type}\"}" "${active}"
+  # Skip further metrics to prevent the disk from spinning up
+  test ${active} -eq 0 && continue
+  # Get the SMART information and health
+  /usr/sbin/smartctl -i -H -d "${type}" "${disk}" | parse_smartctl_info "${disk}" "${type}"
+  # Get the SMART attributes
+  case ${type} in
+  sat) /usr/sbin/smartctl -A -d "${type}" "${disk}" | parse_smartctl_attributes "${disk}" "${type}" "${serial_number}" ;;
+  sat+megaraid*) /usr/sbin/smartctl -A -d "${type}" "${disk}" | parse_smartctl_attributes "${disk}" "${type}" "${serial_number}" ;;
+  scsi) /usr/sbin/smartctl -A -d "${type}" "${disk}" | parse_smartctl_scsi_attributes "${disk}" "${type}" "${serial_number}" ;;
+  megaraid*) /usr/sbin/smartctl -A -d "${type}" "${disk}" | parse_smartctl_scsi_attributes "${disk}" "${type}" "${serial_number}" ;;
+  *)
+      (>&2 echo "disk type is not sat, scsi or megaraid but ${type}")
+    exit
+    ;;
+  esac
+done | format_output
\ No newline at end of file
diff --git a/etc/kayobe/ansible/smartmon-tools.yml b/etc/kayobe/ansible/smartmon-tools.yml
new file mode 100644
index 000000000..6b275c264
--- /dev/null
+++ b/etc/kayobe/ansible/smartmon-tools.yml
@@ -0,0 +1,43 @@
+---
+- hosts: overcloud
+
+  tasks:
+    - name: Ensure smartmon-tools and nvme-cli is installed
+      package:
+        name:
+          - smartmontools
+          - nvme-cli
+          - jq
+        state: present
+      become: true
+
+    - name: Copy smartmon.sh and nvmemon.sh from scripts folder
+      copy:
+        src: "scripts/{{ item }}"
+        dest: /usr/local/bin/
+        owner: 'root'
+        group: 'root'
+        mode: '0700'
+      loop:
+        - smartmon.sh
+        - nvmemon.sh
+      become: yes
+
+    - name: Set PATH Variable for cron
+      cron:
+        name: PATH
+        user: root
+        env: yes
+        job: /usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
+      become: yes
+
+    - name: Schedule cronjob to run both scripts every 5 minutes and save output to file
+      cron:
+        name: "SMART metrics for drive monitoring using {{ item }}"
+        user: root
+        minute: "*/5"
+        job: "/usr/local/bin/{{ item }}.sh > /var/lib/docker/volumes/textfile/_data/{{ item }}.prom.temp && mv /var/lib/docker/volumes/textfile/_data/{{ item }}.prom.temp /var/lib/docker/volumes/textfile/_data/{{ item }}.prom"
+      loop:
+        - smartmon
+        - nvmemon
+      become: yes
diff --git a/etc/kayobe/kolla/config/grafana/dashboards/openstack/hardware_overview.json b/etc/kayobe/kolla/config/grafana/dashboards/openstack/hardware_overview.json
new file mode 100644
index 000000000..e4f78aee3
--- /dev/null
+++ b/etc/kayobe/kolla/config/grafana/dashboards/openstack/hardware_overview.json
@@ -0,0 +1,543 @@
+{% raw %}
+{
+  "annotations": {
+    "list": [
+      {
+        "builtIn": 1,
+        "datasource": {
+          "type": "grafana",
+          "uid": "-- Grafana --"
+        },
+        "enable": true,
+        "hide": true,
+        "iconColor": "rgba(0, 211, 255, 1)",
+        "name": "Annotations & Alerts",
+        "target": {
+          "limit": 100,
+          "matchAny": false,
+          "tags": [],
+          "type": "dashboard"
+        },
+        "type": "dashboard"
+      }
+    ]
+  },
+  "editable": true,
+  "fiscalYearStartMonth": 0,
+  "graphTooltip": 0,
+  "links": [],
+  "liveNow": false,
+  "panels": [
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "description": "Number of healthy drives",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 7,
+        "w": 6,
+        "x": 0,
+        "y": 0
+      },
+      "hideTimeOverride": false,
+      "id": 4,
+      "options": {
+        "colorMode": "value",
+        "graphMode": "area",
+        "justifyMode": "auto",
+        "orientation": "auto",
+        "reduceOptions": {
+          "calcs": [
+            "lastNotNull"
+          ],
+          "fields": "",
+          "values": false
+        },
+        "textMode": "auto"
+      },
+      "pluginVersion": "9.1.2",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "exemplar": false,
+          "expr": "count(smartmon_device_smart_healthy > 0)",
+          "format": "time_series",
+          "instant": false,
+          "interval": "",
+          "legendFormat": "__auto",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Healthy Drives",
+      "transformations": [],
+      "type": "stat"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "description": "Number of healthy drives",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "mappings": [],
+          "noValue": "0",
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 1
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 7,
+        "w": 6,
+        "x": 6,
+        "y": 0
+      },
+      "hideTimeOverride": false,
+      "id": 5,
+      "options": {
+        "colorMode": "value",
+        "graphMode": "area",
+        "justifyMode": "auto",
+        "orientation": "auto",
+        "reduceOptions": {
+          "calcs": [
+            "lastNotNull"
+          ],
+          "fields": "",
+          "values": false
+        },
+        "textMode": "auto"
+      },
+      "pluginVersion": "9.1.2",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "exemplar": false,
+          "expr": "count(smartmon_device_smart_healthy < 1) ",
+          "format": "time_series",
+          "instant": false,
+          "interval": "",
+          "legendFormat": "__auto",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Unhealthy Drives",
+      "transformations": [],
+      "type": "stat"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "description": "Number of healthy drives",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 7,
+        "w": 6,
+        "x": 12,
+        "y": 0
+      },
+      "hideTimeOverride": false,
+      "id": 6,
+      "options": {
+        "colorMode": "value",
+        "graphMode": "area",
+        "justifyMode": "auto",
+        "orientation": "auto",
+        "reduceOptions": {
+          "calcs": [
+            "lastNotNull"
+          ],
+          "fields": "",
+          "values": false
+        },
+        "textMode": "auto"
+      },
+      "pluginVersion": "9.1.2",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "editorMode": "code",
+          "exemplar": false,
+          "expr": "count(smartmon_device_smart_healthy)",
+          "format": "time_series",
+          "instant": false,
+          "interval": "",
+          "legendFormat": "__auto",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Total Drives",
+      "transformations": [],
+      "type": "stat"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${datasource}"
+      },
+      "description": "",
+      "fieldConfig": {
+        "defaults": {
+          "custom": {
+            "align": "center",
+            "displayMode": "auto",
+            "filterable": false,
+            "inspect": false
+          },
+          "mappings": [
+            {
+              "options": {
+                "0": {
+                  "color": "red",
+                  "index": 1,
+                  "text": "Failed"
+                },
+                "1": {
+                  "color": "dark-green",
+                  "index": 0,
+                  "text": "Ok"
+                }
+              },
+              "type": "value"
+            }
+          ],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          }
+        },
+        "overrides": [
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "job"
+            },
+            "properties": [
+              {
+                "id": "custom.hidden",
+                "value": true
+              }
+            ]
+          },
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "__name__"
+            },
+            "properties": [
+              {
+                "id": "custom.hidden",
+                "value": true
+              }
+            ]
+          },
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "Value"
+            },
+            "properties": [
+              {
+                "id": "custom.displayMode",
+                "value": "color-background-solid"
+              },
+              {
+                "id": "custom.width"
+              }
+            ]
+          },
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "type"
+            },
+            "properties": [
+              {
+                "id": "custom.width",
+                "value": 153
+              },
+              {
+                "id": "displayName",
+                "value": "Type"
+              }
+            ]
+          },
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "serial_number"
+            },
+            "properties": [
+              {
+                "id": "custom.width",
+                "value": 208
+              },
+              {
+                "id": "displayName",
+                "value": "Serial Number"
+              }
+            ]
+          },
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "Time"
+            },
+            "properties": [
+              {
+                "id": "custom.hidden",
+                "value": true
+              }
+            ]
+          },
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "disk"
+            },
+            "properties": [
+              {
+                "id": "custom.width",
+                "value": 146
+              },
+              {
+                "id": "displayName",
+                "value": "Disk"
+              }
+            ]
+          },
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "instance"
+            },
+            "properties": [
+              {
+                "id": "custom.width",
+                "value": 203
+              },
+              {
+                "id": "displayName",
+                "value": "Hostname"
+              }
+            ]
+          },
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "Type"
+            },
+            "properties": [
+              {
+                "id": "custom.width"
+              }
+            ]
+          },
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "Serial Number"
+            },
+            "properties": [
+              {
+                "id": "custom.width"
+              }
+            ]
+          },
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "Hostname"
+            },
+            "properties": [
+              {
+                "id": "custom.width"
+              }
+            ]
+          },
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "Disk"
+            },
+            "properties": [
+              {
+                "id": "custom.width"
+              }
+            ]
+          }
+        ]
+      },
+      "gridPos": {
+        "h": 13,
+        "w": 18,
+        "x": 0,
+        "y": 7
+      },
+      "id": 2,
+      "options": {
+        "footer": {
+          "fields": "",
+          "reducer": [
+            "sum"
+          ],
+          "show": false
+        },
+        "showHeader": true,
+        "sortBy": []
+      },
+      "pluginVersion": "9.1.2",
+      "targets": [
+        {
+          "$$hashKey": "object:40",
+          "aggregation": "Last",
+          "alias": "Healthy",
+          "crit": 0,
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${datasource}"
+          },
+          "decimals": 0,
+          "displayAliasType": "Warning / Critical",
+          "displayType": "Regular",
+          "displayValueWithAlias": "Never",
+          "editorMode": "code",
+          "exemplar": false,
+          "expr": "smartmon_device_smart_healthy",
+          "format": "table",
+          "instant": true,
+          "interval": "",
+          "legendFormat": "",
+          "range": false,
+          "refId": "A",
+          "units": "none",
+          "valueHandler": "Number Threshold",
+          "warn": 0
+        }
+      ],
+      "title": "Panel Title",
+      "transparent": true,
+      "type": "table"
+    }
+  ],
+  "refresh": false,
+  "schemaVersion": 37,
+  "style": "dark",
+  "tags": [],
+  "templating": {
+    "list": [
+      {
+        "datasource": {
+          "type": "prometheus",
+          "uid": "PBFA97CFB590B2093"
+        },
+        "filters": [],
+        "hide": 0,
+        "name": "Filters",
+        "skipUrlSync": false,
+        "type": "adhoc"
+      },
+      {
+        "current": {
+          "selected": true,
+          "text": "Prometheus",
+          "value": "Prometheus"
+        },
+        "hide": 0,
+        "includeAll": false,
+        "multi": false,
+        "name": "datasource",
+        "options": [],
+        "query": "prometheus",
+        "queryValue": "",
+        "refresh": 1,
+        "regex": "",
+        "skipUrlSync": false,
+        "type": "datasource"
+      }
+    ]
+  },
+  "time": {
+    "from": "now-1h",
+    "to": "now"
+  },
+  "timepicker": {},
+  "timezone": "",
+  "title": "Hardware Overview",
+  "uid": "TCN51Y25P",
+  "version": 1,
+  "weekStart": ""
+}
+{% endraw %}
\ No newline at end of file
diff --git a/etc/kayobe/kolla/config/grafana/dashboards/openstack/node_exporter_full.json b/etc/kayobe/kolla/config/grafana/dashboards/openstack/node_exporter_full.json
index 08078c31d..66d630b8d 100644
--- a/etc/kayobe/kolla/config/grafana/dashboards/openstack/node_exporter_full.json
+++ b/etc/kayobe/kolla/config/grafana/dashboards/openstack/node_exporter_full.json
@@ -4886,7 +4886,7 @@
         "Total Swap": "#614D93",
         "VmallocUsed": "#EA6460"
       },
-      "bars": false,
+      "bars": true,
       "dashLength": 10,
       "dashes": false,
       "datasource": {
@@ -4921,7 +4921,7 @@
         "total": false,
         "values": true
       },
-      "lines": true,
+      "lines": false,
       "linewidth": 1,
       "links": [],
       "maxPerRow": 6,
@@ -4940,9 +4940,9 @@
       "steppedLine": false,
       "targets": [
         {
-          "expr": "irate(node_vmstat_oom_kill{instance=\"$node\",job=\"$job\"}[5m])",
+          "expr": "max_over_time(node_vmstat_oom_kill{instance=\"$node\",job=\"$job\"}[$__rate_interval:]) - (min_over_time(node_vmstat_oom_kill{instance=\"$node\",job=\"$job\"}[$__rate_interval:]))",
           "format": "time_series",
-          "interval": "",
+          "interval": "30s",
           "intervalFactor": 2,
           "legendFormat": "oom killer invocations ",
           "refId": "A",
diff --git a/etc/kayobe/kolla/config/prometheus/ceph.rules b/etc/kayobe/kolla/config/prometheus/ceph.rules
index 7df171501..52b9841a9 100644
--- a/etc/kayobe/kolla/config/prometheus/ceph.rules
+++ b/etc/kayobe/kolla/config/prometheus/ceph.rules
@@ -154,7 +154,7 @@ groups:
 
   # alert on nic packet errors and drops rates > 1 packet/s
   - alert: NetworkPacketsDropped
-    expr: irate(node_network_receive_drop_total{device=~"en.*|eth.*"}[5m]) + irate(node_network_transmit_drop_total{device=~"en.*|eth.*"}[5m]) > 1
+    expr: irate(node_network_receive_drop_total{device!~"lo|br.*|.*-ovs|tap.*"}[5m]) + irate(node_network_transmit_drop_total{device!~"lo|br.*|.*-ovs|tap.*"}[5m]) > 1
     labels:
       severity: warning
     annotations:
diff --git a/etc/kayobe/kolla/config/prometheus/smart.rules b/etc/kayobe/kolla/config/prometheus/smart.rules
new file mode 100644
index 000000000..0b6552598
--- /dev/null
+++ b/etc/kayobe/kolla/config/prometheus/smart.rules
@@ -0,0 +1,12 @@
+{% raw %}
+
+- alert: DiskSmartStatusUnhealthy
+    expr: smartmon_device_smart_healthy < 1
+    for: 10m
+    labels:
+      severity: alert
+    annotations:
+      summary: "SMART monitor reports bad disk on (instance {{ $labels.instance }})"
+      description: "{{ $labels.instance }} is reporting unhealthy for the disk at {{ $labels.disk }}. Disk serial number is: {{ $labels.serial_number }}"
+
+{% endraw %}
diff --git a/etc/kayobe/kolla/globals.yml b/etc/kayobe/kolla/globals.yml
index b30ddd013..3081a2e5e 100644
--- a/etc/kayobe/kolla/globals.yml
+++ b/etc/kayobe/kolla/globals.yml
@@ -17,4 +17,11 @@ bifrost_tag: xena-20221128T101757
 es_heap_size: 8g
 prometheus_cmdline_extras: "--storage.tsdb.retention.time=30d"
 
+# Additional command line flags for node exporter to enable texfile collector for disk metrics and create textfile docker volume
+prometheus_node_exporter_extra_volumes:
+  - "textfile:/var/lib/node_exporter/textfile_collector"
+prometheus_node_exporter_cmdline_extras: "--collector.textfile.directory=/var/lib/node_exporter/textfile_collector"
+
+
 #############################################################################
+
diff --git a/releasenotes/notes/smart-mon-db8fa642c3af74b1.yaml b/releasenotes/notes/smart-mon-db8fa642c3af74b1.yaml
new file mode 100644
index 000000000..feaec4dbe
--- /dev/null
+++ b/releasenotes/notes/smart-mon-db8fa642c3af74b1.yaml
@@ -0,0 +1,4 @@
+---
+
+features:
+  - Enables SMART monitoring. Manual action is required, please see the monitoring documentation for the procedure.