Skip to content

Commit

Permalink
[DellEMC] S6100 Last Reboot Reason Thermal Support (sonic-net#3767)
Browse files Browse the repository at this point in the history
  • Loading branch information
santhosh-kt authored and Stepan Blyschak committed Feb 26, 2020
1 parent d590784 commit 0f7fe89
Show file tree
Hide file tree
Showing 11 changed files with 337 additions and 44 deletions.
6 changes: 6 additions & 0 deletions files/image_config/platform/rc.local
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,12 @@ if [ -f $FIRST_BOOT_FILE ]; then
# Notify firstboot to Platform, to use it for reboot-cause
touch /tmp/notify_firstboot_to_platform

# Create /host/reboot-cause/platform/ directory
# can be used to track last reboot reason by some platforms
if [ ! -d /host/reboot-cause/platform ]; then
mkdir -p /host/reboot-cause/platform
fi

if [ -d /host/image-$SONIC_VERSION/platform/$platform ]; then
dpkg -i /host/image-$SONIC_VERSION/platform/$platform/*.deb
fi
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
#!/usr/bin/python
#Script to read/write the nvram

import sys
import os
import getopt
import struct

nvram_resource='/dev/nvram'

def usage():
''' This is the Usage Method '''

print 'Utility for NVRAM read/write'
print '\t\t nvram_rd_wr.py --get --offset <offset>'
print '\t\t nvram_rd_wr.py --set --val <val> --offset <offset>'
sys.exit(1)

def nvram_reg_read(nvram_resource,offset):
fd=os.open(nvram_resource, os.O_RDONLY)
if(fd<0):
print 'file open failed %s"%nvram_resource'
return
if(os.lseek(fd, offset, os.SEEK_SET) != offset):
print 'lseek failed on %s'%nvram_resource
return
buf=os.read(fd,1)
reg_val1=ord(buf)
print 'value %x'%reg_val1
os.close(fd)

def nvram_reg_write(nvram_resource,offset,val):
fd=os.open(nvram_resource,os.O_RDWR)
if(fd<0):
print 'file open failed %s"%nvram_resource'
return
if(os.lseek(fd, offset, os.SEEK_SET) != offset):
print 'lseek failed on %s'%nvram_resource
return
ret=os.write(fd,struct.pack('B',val))
if(ret != 1):
print 'write failed %d'%ret
return
os.close(fd)

def main(argv):

''' The main function will read the user input from the
command line argument and process the request '''

opts = ''
val = ''
choice = ''
resouce = ''
offset = ''

try:
opts, args = getopt.getopt(argv, "hgs:" , \
["val=","offset=","help", "get", "set"])

except getopt.GetoptError:
usage()

if not os.path.exists(nvram_resource):
print 'NVRAM is not initialized'
sys.exit(1)

for opt,arg in opts:

if opt in ('-h','--help'):
choice = 'help'

elif opt in ('-g', '--get'):
choice = 'get'

elif opt in ('-s', '--set'):
choice = 'set'

elif opt == '--offset':
offset = int(arg,16) - 0xE

elif opt == '--val':
val = int(arg,16)

if choice == 'get' and offset != '':
nvram_reg_read(nvram_resource,offset)

elif choice == 'set' and offset != '' and val != '':
nvram_reg_write(nvram_resource,offset,val)

else:
usage()

#Calling the main method
if __name__ == "__main__":
main(sys.argv[1:])

Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@ s6100/scripts/iom_power_*.sh usr/local/bin
s6100/scripts/s6100_platform.sh usr/local/bin
common/dell_i2c_utils.sh usr/local/bin
common/io_rd_wr.py usr/local/bin
common/nvram_rd_wr.py usr/local/bin
s6100/scripts/platform_reboot_override usr/share/sonic/device/x86_64-dell_s6100_c2538-r0
s6100/scripts/fast-reboot_plugin usr/share/sonic/device/x86_64-dell_s6100_c2538-r0
s6100/scripts/track_reboot_reason.sh usr/share/sonic/device/x86_64-dell_s6100_c2538-r0
s6100/scripts/warm-reboot_plugin usr/share/sonic/device/x86_64-dell_s6100_c2538-r0
s6100/scripts/override.conf /etc/systemd/system/systemd-reboot.service.d
common/dell_lpc_mon.sh usr/local/bin
s6100/scripts/platform_sensors.py usr/local/bin
Expand All @@ -12,6 +16,7 @@ s6100/scripts/platform_watchdog_disable.sh usr/local/bin
s6100/scripts/sensors usr/bin
s6100/systemd/platform-modules-s6100.service etc/systemd/system
s6100/systemd/s6100-lpc-monitor.service etc/systemd/system
s6100/systemd/s6100-reboot-cause.service etc/systemd/system
tools/flashrom/flashrom usr/local/bin/
common/fw-updater usr/local/bin
common/onie_mode_set usr/local/bin
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash

if [[ -d /sys/devices/platform/SMF.512/hwmon/ ]]; then
cd /sys/devices/platform/SMF.512/hwmon/*
echo 0xcc > mb_poweron_reason
fi
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
#!/usr/bin/python
import sys
import os
import subprocess
import struct

PORT_RES = '/dev/port'


def log_software_reboot():
# Run plugin script which will track the cli triggered reboot, fastboot, warmboot
res = subprocess.check_output(['/usr/share/sonic/device/x86_64-dell_s6100_c2538-r0/fast-reboot_plugin'])
return

def portio_reg_write(resource, offset, val):
fd = os.open(resource, os.O_RDWR)
if(fd < 0):
Expand All @@ -21,5 +27,6 @@ def portio_reg_write(resource, offset, val):
os.close(fd)

if __name__ == "__main__":
log_software_reboot()
portio_reg_write(PORT_RES, 0xcf9, 0xe)

Original file line number Diff line number Diff line change
Expand Up @@ -237,24 +237,6 @@ reset_muxes() {
io_rd_wr.py --set --val 0xff --offset 0x20b
}

track_reboot_reason() {
if [[ -d /sys/devices/platform/SMF.512/hwmon/ ]]; then
rv=$(cd /sys/devices/platform/SMF.512/hwmon/*; cat mb_poweron_reason)
reason=$(echo $rv | cut -d 'x' -f2)
if [ $reason == "ff" ]; then
cd /sys/devices/platform/SMF.512/hwmon/*
if [[ -e /tmp/notify_firstboot_to_platform ]]; then
echo 0x01 > mb_poweron_reason
else
echo 0xbb > mb_poweron_reason
fi
elif [ $reason == "bb" ] || [ $reason == "1" ]; then
cd /sys/devices/platform/SMF.512/hwmon/*
echo 0xaa > mb_poweron_reason
fi
fi
}

install_python_api_package() {
device="/usr/share/sonic/device"
platform=$(/usr/local/bin/sonic-cfggen -H -v DEVICE_METADATA.localhost.platform)
Expand All @@ -277,7 +259,8 @@ if [[ "$1" == "init" ]]; then
modprobe dell_ich
modprobe dell_s6100_iom_cpld
modprobe dell_s6100_lpc
track_reboot_reason
modprobe nvram
systemctl start s6100-reboot-cause.service

# Disable Watchdog Timer
if [[ -e /usr/local/bin/platform_watchdog_disable.sh ]]; then
Expand Down Expand Up @@ -313,6 +296,7 @@ elif [[ "$1" == "deinit" ]]; then
modprobe -r i2c-mux-pca954x
modprobe -r i2c-dev
modprobe -r dell_ich
modprobe -r nvram
remove_python_api_package
else
echo "s6100_platform : Invalid option !"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
#!/bin/bash

reboot_dir_found=false
reboot_file_found=false
smf_dir_missing=0
nvram_missing=0

REBOOT_CAUSE_FILE=/host/reboot-cause/reboot-cause.txt
REBOOT_REASON_FILE=/host/reboot-cause/platform/reboot_reason
BIOS_VERSION_FILE=/host/reboot-cause/platform/bios_minor_version
SMF_POWERON_REASON=/sys/devices/platform/SMF.512/hwmon/*/smf_poweron_reason
SMF_RESET_REASON=/sys/devices/platform/SMF.512/hwmon/*/smf_reset_reason
MAILBOX_POWERON_REASON=/sys/devices/platform/SMF.512/hwmon/*/mb_poweron_reason
NVRAM_DEVICE_FILE=/dev/nvram
RESET_REASON_FILE=/host/reboot-cause/platform/reset_reason
SMF_DIR=/sys/devices/platform/SMF.512/hwmon/

while [[ ! -d $SMF_DIR ]]
do
sleep 0.5
let smf_dir_missing=$smf_dir_missing+1
if [[ "$smf_dir_missing" = "5" ]]; then
echo "SMF is not initialized"
smf_dir_missing=0
fi
done

SMF_RESET=$(cat $SMF_RESET_REASON)

if [[ -d /host/reboot-cause/platform ]]; then
reboot_dir_found=true
if [[ -f $REBOOT_REASON_FILE ]]; then
reboot_file_found=true
fi
fi

SMF_BIOS_REG=$(io_rd_wr.py --get --offset 0x203 | cut -d " " -f 3)
SMF_BIOS_REG=$((16#$SMF_BIOS_REG))
bios_secondary_boot=$(($SMF_BIOS_REG & 1))

_get_smf_reset_register(){
BIOS_VERSION=$(/usr/sbin/dmidecode -s system-version)
BIOS_VERSION_MINOR=$(echo $BIOS_VERSION | cut -d'-' -f 2)

if [[ $BIOS_VERSION_MINOR -gt 7 ]]; then
echo $BIOS_VERSION > $BIOS_VERSION_FILE
elif [[ "$bios_secondary_boot" = "0" ]]; then
# For Primary BIOS with older version
if [[ -e $BIOS_VERSION_FILE ]]; then
rm $BIOS_VERSION_FILE
fi
fi

if [[ -e $BIOS_VERSION_FILE ]]; then
while [[ ! -e $NVRAM_DEVICE_FILE ]]
do
sleep 1
let nvram_missing=$nvram_missing+1
if [[ "$nvram_missing" = "5" ]]; then
echo "NVRAM is not initialized"
nvram_missing=0
fi
done
first_reset=$(nvram_rd_wr.py --get --offset 0x5c | cut -d " " -f 2)
second_reset=$(nvram_rd_wr.py --get --offset 0x5d | cut -d " " -f 2)
third_reset=$(nvram_rd_wr.py --get --offset 0x5e | cut -d " " -f 2)
fourth_reset=$(nvram_rd_wr.py --get --offset 0x5f | cut -d " " -f 2)

if [[ "$first_reset" != "ee" ]]; then
SMF_RESET=$first_reset
fi

# Saving NVRAM values for future debugging
if [[ $reboot_dir_found = true ]]; then
echo "First reset - $first_reset" > $RESET_REASON_FILE
echo "Second reset - $second_reset" >> $RESET_REASON_FILE
echo "Third reset - $third_reset" >> $RESET_REASON_FILE
echo "Fourth reset - $fourth_reset" >> $RESET_REASON_FILE
fi

# Clearing NVRAM values to holding next reset values
nvram_rd_wr.py --set --val 0xee --offset 0x58
nvram_rd_wr.py --set --val 0xee --offset 0x5c
nvram_rd_wr.py --set --val 0xee --offset 0x5d
nvram_rd_wr.py --set --val 0xee --offset 0x5e
nvram_rd_wr.py --set --val 0xee --offset 0x5f
fi
}

_is_thermal_reset() {
prev_thermal=$(cat $REBOOT_REASON_FILE)
curr_poweron_reason=$(cat $SMF_POWERON_REASON)
if [[ $curr_poweron_reason = "11" ]]; then
echo 0
return
fi
if [[ $prev_thermal = $curr_poweron_reason ]]; then
echo 2
return
else
echo "$curr_poweron_reason" > $REBOOT_REASON_FILE
echo 1
return
fi

echo 0
return
}

_is_watchdog_reset(){
curr_reset_reason=$SMF_RESET
if [[ $curr_reset_reason = "33" ]]; then
echo 1
return
fi

echo 0
return
}

_is_unknown_reset(){
if [[ -f $REBOOT_CAUSE_FILE ]]; then
if [[ $1 = 0 ]]; then
echo "Unknown software reboot" > $REBOOT_CAUSE_FILE
return
fi
curr_poweron_reason=$(cat $SMF_POWERON_REASON)
curr_reset_reason=$SMF_RESET
mb_poweron_reason=$(cat $MAILBOX_POWERON_REASON)
echo "Unknown POR: $curr_poweron_reason RST: $curr_reset_reason MBR: $mb_poweron_reason" > $REBOOT_CAUSE_FILE
fi

}

update_mailbox_register(){
if [[ "$bios_secondary_boot" = "1" ]]; then
echo "Secondary BIOS booted"
fi

if [[ $reboot_file_found = false ]]; then
echo "None" > $REBOOT_REASON_FILE
fi

_get_smf_reset_register
if [[ -d /sys/devices/platform/SMF.512/hwmon/ ]]; then
is_thermal_reboot=$(_is_thermal_reset)

is_wd_reboot=$(_is_watchdog_reset)

mbr=$(cat $MAILBOX_POWERON_REASON)
reason=$(echo $mbr | cut -d 'x' -f2)
if [[ $reason = "ff" ]]; then
echo "None" > $REBOOT_REASON_FILE
echo 0xbb > $MAILBOX_POWERON_REASON
elif [[ $is_thermal_reboot = 1 ]]; then
echo 0xee > $MAILBOX_POWERON_REASON
elif [[ $is_wd_reboot = 1 ]] && [[ $reason != "cc" ]]; then
echo 0xdd > $MAILBOX_POWERON_REASON
elif [[ $reason = "cc" ]]; then
echo 0xaa > $MAILBOX_POWERON_REASON
else
_is_unknown_reset $is_thermal_reboot
echo 0x99 > $MAILBOX_POWERON_REASON
fi
fi
}

update_mailbox_register
Loading

0 comments on commit 0f7fe89

Please sign in to comment.