Skip to content

Commit

Permalink
BACKPORT: FROMGIT: nvme-pci: Use host managed power state for suspend
Browse files Browse the repository at this point in the history
The nvme pci driver prepares its devices for power loss during suspend
by shutting down the controllers. The power setting is deferred to
pci driver's power management before the platform removes power. The
suspend-to-idle mode, however, does not remove power.

NVMe devices that implement host managed power settings can achieve
lower power and better transition latencies than using generic PCI power
settings. Try to use this feature if the platform is not involved with
the suspend. If successful, restore the previous power state on resume.

Cc: Mario Limonciello <Mario.Limonciello@dell.com>
Cc: Kai Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: Keith Busch <keith.busch@intel.com>
(cherry picked from commit c5c4d7afac85501148c402bdc29397b77993cc36
 git://git.kernel.org/pub/scm/linux/kernel/git/kbusch/linux.git
 nvme-power)

BUG=b:127593309,b:134195632,b:134781711
TEST=Tested with LiteOn CL1-3D256-Q11 NVMe.  DP/N 0TN2CC.  F/W 22301100

Change-Id: Ic59f66ed916b8174b9ae50c29c0867b9e36f1124
Signed-off-by: Rajat Jain <rajatja@google.com>
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/1634069
Reviewed-by: Gwendal Grignou <gwendal@chromium.org>
  • Loading branch information
Keith Busch authored and Commit Bot committed Jul 19, 2019
1 parent 5ddf440 commit bc73048
Showing 1 changed file with 96 additions and 5 deletions.
101 changes: 96 additions & 5 deletions drivers/nvme/host/pci.c
Expand Up @@ -26,6 +26,7 @@
#include <linux/mutex.h>
#include <linux/once.h>
#include <linux/pci.h>
#include <linux/suspend.h>
#include <linux/t10-pi.h>
#include <linux/types.h>
#include <linux/io-64-nonatomic-lo-hi.h>
Expand Down Expand Up @@ -106,6 +107,7 @@ struct nvme_dev {
u32 cmbloc;
struct nvme_ctrl ctrl;
struct completion ioq_wait;
u32 last_ps;

mempool_t *iod_mempool;

Expand Down Expand Up @@ -2603,12 +2605,68 @@ static void nvme_remove(struct pci_dev *pdev)
}

#ifdef CONFIG_PM_SLEEP
static int nvme_suspend(struct device *dev)
static int nvme_deep_state(struct nvme_dev *dev)
{
struct pci_dev *pdev = to_pci_dev(dev->dev);
struct nvme_ctrl *ctrl = &dev->ctrl;
int ret = -EBUSY;;

nvme_start_freeze(ctrl);
nvme_wait_freeze(ctrl);
nvme_sync_queues(ctrl);

if (ctrl->state != NVME_CTRL_LIVE &&
ctrl->state != NVME_CTRL_ADMIN_ONLY)
goto unfreeze;

dev->last_ps = 0;
ret = nvme_get_features(ctrl, NVME_FEAT_POWER_MGMT, 0, NULL, 0,
&dev->last_ps);
if (ret < 0)
goto unfreeze;

ret = nvme_set_features(ctrl, NVME_FEAT_POWER_MGMT, dev->ctrl.npss,
NULL, 0, NULL);
if (ret < 0)
goto unfreeze;
if (ret) {
/*
* Clearing npss forces a controller reset on resume. The
* correct value will be resdicovered then.
*/
ctrl->npss = 0;
nvme_dev_disable(dev, true);
ret = 0;
} else {
/*
* A saved state prevents pci pm from generically controlling
* the device's power. If we're using protocol specific
* settings, we don't want pci interfering.
*/
pci_save_state(pdev);
}
unfreeze:
nvme_unfreeze(ctrl);
return ret;
}

static int nvme_make_operational(struct nvme_dev *dev)
{
struct nvme_ctrl *ctrl = &dev->ctrl;

if (nvme_set_features(ctrl, NVME_FEAT_POWER_MGMT, dev->last_ps,
NULL, 0, NULL) == 0)
return 0;
nvme_reset_ctrl(ctrl);
return 0;
}

static int nvme_simple_resume(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct nvme_dev *ndev = pci_get_drvdata(pdev);

nvme_dev_disable(ndev, true);
nvme_reset_ctrl(&ndev->ctrl);
return 0;
}

Expand All @@ -2617,12 +2675,45 @@ static int nvme_resume(struct device *dev)
struct pci_dev *pdev = to_pci_dev(dev);
struct nvme_dev *ndev = pci_get_drvdata(pdev);

nvme_reset_ctrl(&ndev->ctrl);
return pm_resume_via_firmware() || !ndev->ctrl.npss ?
nvme_simple_resume(dev) : nvme_make_operational(ndev);
}

static int nvme_simple_suspend(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct nvme_dev *ndev = pci_get_drvdata(pdev);

nvme_dev_disable(ndev, true);
return 0;
}
#endif

static SIMPLE_DEV_PM_OPS(nvme_dev_pm_ops, nvme_suspend, nvme_resume);
static int nvme_suspend(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct nvme_dev *ndev = pci_get_drvdata(pdev);

/*
* The platform does not remove power for a kernel managed suspend so
* use host managed nvme power settings for lowest idle power. This
* should have quicker resume latency than a full device shutdown.
*/
return pm_suspend_via_firmware() || !ndev->ctrl.npss ?
nvme_simple_suspend(dev) : nvme_deep_state(ndev);
}

const struct dev_pm_ops nvme_dev_pm_ops = {
.suspend = nvme_suspend,
.resume = nvme_resume,
.freeze = nvme_simple_suspend,
.thaw = nvme_simple_resume,
.poweroff = nvme_simple_suspend,
.restore = nvme_simple_resume,
};

#else
const struct dev_pm_ops nvme_dev_pm_ops = {};
#endif

static pci_ers_result_t nvme_error_detected(struct pci_dev *pdev,
pci_channel_state_t state)
Expand Down

0 comments on commit bc73048

Please sign in to comment.