# Introduction

This notebook illustrates the relationship between power consumption, CPU load, <br />
and CPU frequency of a laptop computer. The subsequent analysis is based on a  <br />
dataset collected from a 13-inch Macbook Pro running on a Core i5-5287U processor <br />
with MacOS Seirra installed.

The dataset consists of two data frames populated with the following readings:


| Column                        | Desciption |
| ----------------------------  | ---- | 
|Time                           |   Sample time stamp|
|Voltage    |  Voltage reading of the battery (in mV)|
|Current      |  Current reading of the battry (in mA)|
|Remaining charge |   Remaining charge of the battery in (mAh)|
|CPU load |   CPU load in (%)|
|Total power consumption |  Power consumption (in Watts) <br /> computed using $P = V*I$ <br />where V and I are obtained from the Voltage and Current columns|
|Processor power consumption$^*$ |  Power consumption of the processor, i.e., <br /> *Total power - processor power = pwr cons. of other components* |
|Processor speed (freq)$^*$ |  processor clock frequency (in MHz) at sampling instance|


$^*$ Data collected using [Intel Power Gadget](https://software.intel.com/en-us/articles/intel-power-gadget-20).

----
**Tools used:**

*MacOS SystemProfiler*: to collect battery discharge info., i.e., 
                          voltage and current readings.
                          
*Intel Power Gadget*: CPU frequency and power consumption monitoring tool

**Python Libraries**

*psutil*: to gather CPU load at user-defined sampling intervals

*pandas and numpy*: to process the resulting data

*bokeh*: for plotting

Aditionally, I wrote the mini python module  *power_profile*, to automate the data collection. <br >
Finally, for anyone interested, the data associated code lives [here](https://github.com/mEyob/CPU_load_VS_Power).

In [1]:
from power_profile import cpuLoadPower


In [2]:
cpuLoadPower(1000, 1) 


2017-07-11 12:38:09.140322 > 0 samples processed
2017-07-11 12:40:51.109189 > 100 samples processed
2017-07-11 12:43:35.216932 > 200 samples processed
2017-07-11 12:46:17.819553 > 300 samples processed
2017-07-11 12:49:04.339438 > 400 samples processed
2017-07-11 12:52:10.507287 > 500 samples processed
2017-07-11 12:54:55.536163 > 600 samples processed
2017-07-11 12:57:37.747262 > 700 samples processed
2017-07-11 13:00:28.483069 > 800 samples processed
2017-07-11 13:03:11.515793 > 900 samples processed


In [2]:
import pandas as pd


In [3]:
sys_data = pd.read_csv('data/volt-amp-chrg-cpuLoad.csv')
sys_data['Power'] = sys_data.mV * sys_data.mA * -1e-6   # Power = V * I


In [4]:
sys_data.head(5)


Unnamed: 0,Time,mV,mA,mAh,cpu_load,Power
0,12:38:09:139982,11819,-787,3669,5.7,9.301553
1,12:38:10:759085,11819,-787,3669,23.5,9.301553
2,12:38:12:390611,11819,-787,3669,21.4,9.301553
3,12:38:13:993952,11819,-787,3669,14.5,9.301553
4,12:38:15:628270,11819,-787,3669,13.6,9.301553


In [5]:
cols = 'System Time,CPU Frequency_0(MHz),Processor Power_0(Watt)'
cpu_data = pd.read_csv("data/PwrData_2017-7-11_12-38-14.csv", usecols=cols.split(','))
cpu_data.rename(columns={'System Time':'Time', 
                         'CPU Frequency_0(MHz)':'CPU_Freq_(MHz)', 
                         'Processor Power_0(Watt)': 'Processor_Power'}, inplace=True)
cpu_data.head()


Unnamed: 0,Time,CPU_Freq_(MHz),Processor_Power
0,12:38:14:240,1300.0,1.772
1,12:38:14:338,1300.0,1.747
2,12:38:14:442,1300.0,2.284
3,12:38:14:537,1300.0,2.038
4,12:38:14:637,1700.0,2.741


In [6]:
cpu_data.dropna(inplace=True)


In [7]:
import datetime


In [8]:
sys_data['Time'] = sys_data.Time.apply(lambda t: datetime.datetime.strptime('2017-07-11 ' + t[:-7], '%Y-%m-%d %H:%M:%S'))
cpu_data['Time'] = cpu_data.Time.apply(lambda t: datetime.datetime.strptime('2017-07-11 ' + t[:-4], '%Y-%m-%d %H:%M:%S'))

In [9]:
sys_data['Time'] = sys_data.Time.apply(lambda t: t.timestamp())
cpu_data['Time'] = cpu_data.Time.apply(lambda t: t.timestamp())


In [11]:
start_time = cpu_data.loc[0, 'Time'] if cpu_data.loc[0,'Time'] < sys_data.loc[0,'Time'] else sys_data.loc[0,'Time']


In [13]:
sys_data['Time'] = sys_data.Time - start_time
cpu_data['Time'] = cpu_data.Time - start_time


In [14]:
cpu_data.head()


Unnamed: 0,Time,CPU_Freq_(MHz),Processor_Power
0,5.0,1300.0,1.772
1,5.0,1300.0,1.747
2,5.0,1300.0,2.284
3,5.0,1300.0,2.038
4,5.0,1700.0,2.741


In [15]:
import numpy as np


In [16]:
avg_cpu_data = cpu_data.groupby(by='Time', as_index=False).agg(np.mean)


In [17]:
avg_cpu_data.head()


Unnamed: 0,Time,CPU_Freq_(MHz),Processor_Power
0,5.0,1450.0,2.3365
1,6.0,2020.0,5.7563
2,7.0,1800.0,4.59
3,8.0,1900.0,4.1122
4,9.0,2340.0,6.1323


In [18]:
merged_data = pd.merge(sys_data, avg_cpu_data, on='Time')


In [19]:
merged_data.head()


Unnamed: 0,Time,mV,mA,mAh,cpu_load,Power,CPU_Freq_(MHz),Processor_Power
0,6.0,11819,-787,3669,13.6,9.301553,2020.0,5.7563
1,8.0,11819,-787,3669,17.4,9.301553,1900.0,4.1122
2,9.0,11819,-787,3669,17.5,9.301553,2340.0,6.1323
3,11.0,11819,-787,3669,12.4,9.301553,1730.0,5.5018
4,13.0,11819,-787,3669,15.7,9.301553,1300.0,2.8642


In [20]:
from bokeh.io import output_notebook, show
from bokeh.plotting import figure



In [21]:
output_notebook()


In [22]:
from bokeh.models import LinearAxis, Range1d
from bokeh.io import export_svgs, export_png
import os


In [117]:
p = figure(plot_width=500, plot_height=300, toolbar_location=None)

p.line(sys_data['Time'], sys_data['cpu_load'], color="firebrick", legend='CPU load')

p.extra_y_ranges = {"pwr": Range1d(start=10, end=35)}
p.line(sys_data['Time'], sys_data['Power'], color="navy", y_range_name="pwr", legend='Power consumed')

p.add_layout(LinearAxis(y_range_name="pwr"), 'right')


p.xaxis.axis_label = "Time (sec)"

p.yaxis[0].axis_label = "CPU load (%)"
p.yaxis[0].axis_label_text_color = "firebrick"
p.yaxis[0].major_label_text_color = "firebrick"
p.yaxis[0].major_tick_line_color = 'firebrick'
p.yaxis[0].minor_tick_line_color = 'firebrick'
p.yaxis[0].axis_line_color = 'firebrick'

p.yaxis[1].axis_label = "Power (Watt)"
p.yaxis[1].axis_label_text_color = "navy"
p.yaxis[1].major_label_text_color = "navy"
p.yaxis[1].major_tick_line_color = 'navy'
p.yaxis[1].minor_tick_line_color = 'navy'
p.yaxis[1].axis_line_color = 'navy'

p.legend.location = "top_left"

p.grid.grid_line_dash = [2,2]
p.grid.grid_line_alpha = 0.6
p.background_fill_color = "whitesmoke"

path = os.getcwd()

export_png(p, os.path.join(path,"figures/TimePlot.png"))

show(p)


The above plot shows that whenever CPU load hikes so does power consumption and vise versa.
Interestingly, there seems to be a consistent time gap between the change in CPU load and 
power consumption. 


In [116]:
# ===============================================
# Fitting from System Profiler data
fit_from_sys_data = np.polyfit(sys_data['cpu_load'], sys_data['Power'], 1)
fn_from_sys_data = np.poly1d(fit_from_sys_data)

p = figure(plot_width=600, plot_height=400, toolbar_location=None)

p.circle(
    sys_data.cpu_load, 
    sys_data.Power, 
    color="firebrick",
    line_color="firebrick",
    fill_alpha = 0.4,
    size = 8
)

p.line(
    sys_data.cpu_load, 
    fn_from_sys_data(sys_data.cpu_load), 
    color="firebrick", 
    legend="Sys_Power = 0.22 * CPU_Load + 12.57"
)


# ===============================================
# Fitting from merged data: System Profiler +  Intel Power Gadget
fit_from_merged_data = np.polyfit(merged_data['cpu_load'], merged_data['Processor_Power'], 1)
fn_from_merged_data = np.poly1d(fit_from_merged_data)

p.circle(
    merged_data.cpu_load, 
    merged_data.Processor_Power, 
    color="navy",
    line_color="navy",
    fill_alpha = 0.4,
    size = 8
)
p.line(
    merged_data.cpu_load, 
    fn_from_merged_data(merged_data.cpu_load), 
    color="navy", 
    legend="CPU_Power = 0.26 * CPU_Load + 2.87"
)

p.grid.grid_line_dash = [2,2]
p.grid.grid_line_alpha = 0.6

p.xaxis.axis_label = "CPU LOAD (%)"
p.yaxis.axis_label = "POWER (Watt)"
p.legend.location = "bottom_right"
p.background_fill_color = "whitesmoke"

export_png(p, os.path.join(path,"figures/Load-vs-Power.png"))

show(p)


The linear line in the above plot shows that the power consumption is about $6$ Watts <br /> 
even when the computer is idle. Of course, the CPU is not the only component that consumes power.<br />
Power consumption ranges between $6$ and $35$ Watts as the CPU load goes from $0$ to $100\%$.

In [25]:
from bokeh.palettes import inferno


In [26]:
def color_mapper(x, palette=inferno(256)):
    index  = int((0.1275 * x) - 165.75)
    return palette[index]
colors = [color_mapper(freq) for freq in merged_data['CPU_Freq_(MHz)']]


In [115]:
p = figure(plot_width=500, plot_height=300, toolbar_location=None)
p.circle(
    merged_data.cpu_load, 
    merged_data['CPU_Freq_(MHz)']/1000, 
    color=colors,
    line_color=colors,
    fill_alpha = 0.4,
    size = 8
)

p.line(
    x=list(range(100)), 
    y=[2.900 for _ in range(100)], 
    color='red',
    line_width=1,
    line_dash = [4, 2],
    legend = 'Stock speed (2.9 GHz)'
)

p.xaxis.axis_label = "CPU LOAD (%)"
p.yaxis.axis_label = "CPU Freq (GHz)"
p.legend.location = "bottom_right"

p.grid.grid_line_dash = [2,2]
p.grid.grid_line_alpha = 0.6
p.background_fill_color = "whitesmoke"

export_png(p, os.path.join(path,"figures/Load-vs-Freq.png"))

show(p)


In [114]:
p = figure(plot_width=500, plot_height=300, toolbar_location=None)
p.circle(
    merged_data[merged_data['CPU_Freq_(MHz)'] > 2900].cpu_load, 
    merged_data[merged_data['CPU_Freq_(MHz)'] > 2900].Processor_Power, 
    color='red',
    line_color='red',
    fill_alpha = 0.4,
    size = 8,
    legend = 'Turbo boost'
)

p.circle(
    merged_data[merged_data['CPU_Freq_(MHz)'] < 2900].cpu_load, 
    merged_data[merged_data['CPU_Freq_(MHz)'] < 2900].Processor_Power, 
    color='green',
    line_color='green',
    fill_alpha = 0.4,
    size = 8,
    legend = 'Speed scaling'
)

p.xaxis.axis_label = "CPU LOAD (%)"
p.yaxis.axis_label = "Processor power (Watt)"
p.legend.location = "bottom_right"

p.grid.grid_line_dash = [2,2]
p.grid.grid_line_alpha = 0.6
p.background_fill_color = "whitesmoke"

export_png(p, os.path.join(path,"figures/Load-vs-ProcPower.png"))

show(p)


In [112]:
p = figure(plot_width=500, plot_height=300, toolbar_location=None)
p.circle(
    merged_data['CPU_Freq_(MHz)']/1000, 
    merged_data.Processor_Power, 
    color=None,
    line_color=colors[100],
    fill_alpha = 0.4,
    size = 8
)

fit_freq_power = np.polyfit(merged_data['CPU_Freq_(MHz)']/1000, merged_data['Processor_Power'], 2)
fn_freq_power = np.poly1d(fit_freq_power)
p.line(
    sorted(merged_data['CPU_Freq_(MHz)']/1000), 
    fn_freq_power(sorted(merged_data['CPU_Freq_(MHz)']/1000)), 
    color=colors[25], 
    legend="P(f) = 2.33f^2 -3.54f + 3.48",
    line_dash = [4, 2],
    line_width = 2
)

p.xaxis.axis_label = "CPU frequency f (GHz)"
p.yaxis.axis_label = "Processor power P (Watt)"
p.legend.location = "top_center"

p.grid.grid_line_dash = [2,2]
p.grid.grid_line_alpha = 0.6
p.background_fill_color = "whitesmoke"

export_png(p, os.path.join(path,"figures/Freq-vs-ProcPower.png"))
show(p)


# Notes

Estimating how long the laptop will last on battery power

- Remaining charge from mAh column
- CPU load as a function of time (or use average CPU load instead)
- Use the linear regression to calculate Power as a function of CPU load
- Estimate how long it takes to drain the remaining charge (mAh)   <br />
  with power draw estimated in the above bullet point

In [86]:
merged_data.Processor_Power.sum()/merged_data.Power.sum()


0.575603807837539

In [129]:
p = figure(plot_width=500, plot_height=300, y_range=(0,12000), toolbar_location=None)
p.circle(
    merged_data.Time, 
    merged_data.mV, 
    color=None,
    line_color=colors[100],
    fill_alpha = 0.4,
    size = 8
)

show(p)


In [121]:
os.path.join(os.getcwd(),'data','test.txt')

'/Users/misikir/Google Drive/PowerProfile/data/test.txt'