Skip to content

Commit

Permalink
Now iAWE matches v 0.1
Browse files Browse the repository at this point in the history
1. data at 1 minute resolution
2. motor removed
3. data prepended and appended with zeros

Closes #406
  • Loading branch information
nipunbatra committed Jun 10, 2015
1 parent 4ae125a commit e36b01d
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 64 deletions.
27 changes: 26 additions & 1 deletion nilmtk/dataset_converters/iawe/convert_iawe.py
Expand Up @@ -9,6 +9,23 @@
from nilmtk.utils import check_directory_exists, get_datastore
from nilm_metadata import convert_yaml_to_hdf5
from inspect import currentframe, getfile, getsourcefile
from copy import deepcopy

def reindex_fill_na(df, idx):
df_copy = deepcopy(df)
df_copy = df_copy.reindex(idx)

power_columns = [
x for x in df.columns if x[0] in ['power']]
non_power_columns = [x for x in df.columns if x not in power_columns]

for power in power_columns:
df_copy[power].fillna(0, inplace=True)
for measurement in non_power_columns:
df_copy[measurement].fillna(
df[measurement].median(), inplace=True)

return df_copy


column_mapping = {
Expand All @@ -31,6 +48,9 @@

TIMESTAMP_COLUMN_NAME = "timestamp"
TIMEZONE = "Asia/Kolkata"
START_DATETIME, END_DATETIME = '7-13-2013', '8-4-2013'
FREQ = "1T"


def convert_iawe(iawe_path, output_filename, format="HDF"):
"""
Expand All @@ -43,13 +63,15 @@ def convert_iawe(iawe_path, output_filename, format="HDF"):
"""

check_directory_exists(iawe_path)
idx = pd.DatetimeIndex(start=START_DATETIME, end=END_DATETIME, freq=FREQ)
idx = idx.tz_localize('GMT').tz_convert(TIMEZONE)

# Open data store
store = get_datastore(output_filename, format, mode='w')
electricity_path = join(iawe_path, "electricity")

# Mains data
for chan in range(1, 13):
for chan in range(1, 12):
key = Key(building=1, meter=chan)
filename = join(electricity_path, "%d.csv" % chan)
print('Loading ', chan)
Expand All @@ -64,6 +86,9 @@ def convert_iawe(iawe_path, output_filename, format="HDF"):
df = df.dropna()
df = df.astype(np.float32)
df = df.sort_index()
df = df.resample("1T")
df = reindex_fill_na(df, idx)
assert df.isnull().sum().sum() == 0
store.put(str(key), df)
store.close()
convert_yaml_to_hdf5(join(_get_module_directory(), 'metadata'),
Expand Down
8 changes: 0 additions & 8 deletions nilmtk/dataset_converters/iawe/metadata/building1.yaml
Expand Up @@ -16,9 +16,6 @@ elec_meters:
9: *jplug
10: *jplug
11: *jplug
12: &current_cost
submeter_of: 0
device_model: current_cost

appliances:
- original_name: fridge
Expand Down Expand Up @@ -72,8 +69,3 @@ appliances:
type: wet appliance
instance: 1
meters: [11]

- original_name: water motor
type: motor
instance: 1
meters: [12]
11 changes: 1 addition & 10 deletions nilmtk/dataset_converters/iawe/metadata/meter_devices.yaml
Expand Up @@ -3,7 +3,7 @@ EM6400:
manufacturer: Schneider Electric
manufacturer_url: http://www.schneider-electric.com/
description: Multifunction meter for feeders
sample_period: 1 # the interval between samples. In seconds.
sample_period: 60 # the interval between samples. In seconds.
max_sample_period: 300
measurements:
- physical_quantity: power # power, voltage, energy, current?
Expand Down Expand Up @@ -71,13 +71,4 @@ jplug:
lower_limit: 0
wireless: true

current_cost:
description:
sample_period: 6
max_sample_period: 300
measurements:
- physical_quantity: power # power, voltage, energy, current?
type: active # active (real power), reactive or apparent?
upper_limit: 5000
lower_limit: 0

89 changes: 44 additions & 45 deletions nilmtk/feature_detectors/steady_states.py
Expand Up @@ -55,17 +55,17 @@ def find_steady_states(dataframe, min_n_samples=2, stateThreshold=15,
"""
# Tells whether we have both real and reactive power or only real power
num_measurements = len(dataframe.columns)
estimatedSteadyPower = np.array([0] * num_measurements)
lastSteadyPower = np.array([0] * num_measurements)
previousMeasurement = np.array([0] * num_measurements)
estimated_steady_power = np.array([0] * num_measurements)
last_steady_power = np.array([0] * num_measurements)
previous_measurement = np.array([0] * num_measurements)

# These flags store state of power

instantaneousChange = False # power changing this second
ongoingChange = False # power change in progress over multiple seconds
instantaneous_change = False # power changing this second
ongoing_change = False # power change in progress over multiple seconds

index_transitions = [] # Indices to use in returned Dataframe
index_steadystates = []
index_steady_states = []
transitions = [] # holds information on transitions
steady_states = [] # steadyStates to store in returned Dataframe
N = 0 # N stores the number of samples in state
Expand All @@ -83,89 +83,87 @@ def find_steady_states(dataframe, min_n_samples=2, stateThreshold=15,

# Step 2: this does the threshold test and then we sum the boolean
# array.
thisMeasurement = row[1:3]
this_measurement = row[1:3]
# logging.debug('The current measurement is: %s' % (thisMeasurement,))
# logging.debug('The previous measurement is: %s' %
# (previousMeasurement,))

stateChange = np.fabs(
np.subtract(thisMeasurement, previousMeasurement))
state_change = np.fabs(
np.subtract(this_measurement, previous_measurement))
# logging.debug('The State Change is: %s' % (stateChange,))

if np.sum(stateChange > stateThreshold):
instantaneousChange = True
if np.sum(state_change > stateThreshold):
instantaneous_change = True
else:
instantaneousChange = False
instantaneous_change = False

# Step 3: Identify if transition is just starting, if so, process it
if (instantaneousChange and (not ongoingChange)):
if instantaneous_change and (not ongoing_change):

# Calculate transition size
lastTransition = np.subtract(estimatedSteadyPower, lastSteadyPower)
last_transition = np.subtract(estimated_steady_power, last_steady_power)
# logging.debug('The steady state transition is: %s' %
# (lastTransition,))

# Sum Boolean array to verify if transition is above noise level
if np.sum(np.fabs(lastTransition) > noise_level):
if np.sum(np.fabs(last_transition) > noise_level):
# 3A, C: if so add the index of the transition start and the
# power information

# Avoid outputting first transition from zero
index_transitions.append(time)
# logging.debug('The current row time is: %s' % (time))
transitions.append(lastTransition)
transitions.append(last_transition)

# I think we want this, though not specifically in Hart's algo notes
# We don't want to append a steady state if it's less than min samples in length.
# if N > min_n_samples:
index_steadystates.append(time)
index_steady_states.append(time)
# logging.debug('The ''time'' stored is: %s' % (time))
# last states steady power
steady_states.append(estimatedSteadyPower)
steady_states.append(estimated_steady_power)

# 3B
lastSteadyPower = estimatedSteadyPower
last_steady_power = estimated_steady_power
# 3C
time = row[0]

# Step 4: if a new steady state is starting, zero counter
if instantaneousChange:
if instantaneous_change:
N = 0

# Hart step 5: update our estimate for steady state's energy
estimatedSteadyPower = np.divide(
np.add(np.multiply(N, estimatedSteadyPower),
thisMeasurement), (N + 1))
estimated_steady_power = np.divide(
np.add(np.multiply(N, estimated_steady_power),
this_measurement), (N + 1))
# logging.debug('The steady power estimate is: %s' %
# (estimatedSteadyPower,))
# Step 6: increment counter
N = N + 1
N += 1

# Step 7
ongoingChange = instantaneousChange
ongoing_change = instantaneous_change

# Step 8
previousMeasurement = thisMeasurement
previous_measurement = this_measurement



#Appending last edge
lastTransition = np.subtract(estimatedSteadyPower, lastSteadyPower)
if np.sum(np.fabs(lastTransition) > noise_level):
# Appending last edge
last_transition = np.subtract(estimated_steady_power, last_steady_power)
if np.sum(np.fabs(last_transition) > noise_level):
index_transitions.append(time)
transitions.append(lastTransition)
index_steadystates.append(time)
steady_states.append(estimatedSteadyPower)
transitions.append(last_transition)
index_steady_states.append(time)
steady_states.append(estimated_steady_power)

#Removing first edge if the starting steady state power is more
# Removing first edge if the starting steady state power is more
# than the noise threshold
# https://github.com/nilmtk/nilmtk/issues/400

if np.sum(steady_states[0] > noise_level) and index_transitions[0] == index_steadystates[0] == dataframe.iloc[0].name:
if np.sum(steady_states[0] > noise_level) and index_transitions[0] == index_steady_states[0] == dataframe.iloc[0].name:
transitions = transitions[1:]
index_transitions = index_transitions[1:]
steady_states = steady_states[1:]
index_steadystates = index_steadystates[1:]
index_steady_states = index_steady_states[1:]

print("Edge detection complete.")

Expand All @@ -189,15 +187,15 @@ def find_steady_states(dataframe, min_n_samples=2, stateThreshold=15,

print("Creating states frame ...")
sys.stdout.flush()
steady_states = pd.DataFrame(data=steady_states, index=index_steadystates,
columns=cols_steady[num_measurements])
steady_states = pd.DataFrame(data=steady_states, index=index_steady_states,
columns=cols_steady[num_measurements])
print("States frame created.")
print("Finished.")
return steady_states, transitions


def cluster(X, max_num_clusters=3):
'''Applies clustering on reduced data,
def cluster(x, max_num_clusters=3):
"""Applies clustering on reduced data,
i.e. data where power is greater than threshold.
Parameters
Expand All @@ -209,9 +207,9 @@ def cluster(X, max_num_clusters=3):
-------
centroids : ndarray of int32s
Power in different states of an appliance, sorted
'''
"""
# Find where power consumption is greater than 10
data = _transform_data(X)
data = _transform_data(x)

# Find clusters
centroids = _apply_clustering(data, max_num_clusters)
Expand All @@ -223,7 +221,8 @@ def cluster(X, max_num_clusters=3):


def _transform_data(data):
'''Subsamples if needed and converts to column vector (which is what
"""
Subsamples if needed and converts to column vector (which is what
scikit-learn requires).
Parameters
Expand All @@ -234,7 +233,7 @@ def _transform_data(data):
-------
data_above_thresh : ndarray
column vector
'''
"""

MAX_NUMBER_OF_SAMPLES = 2000
MIN_NUMBER_OF_SAMPLES = 20
Expand Down
7 changes: 7 additions & 0 deletions tests_on_large_datasets/github_issues/405.py
@@ -0,0 +1,7 @@
from nilmtk import *

ds = DataSet("/Users/nipunbatra/Downloads/nilm_gjw_data.hdf5")

elec = ds.buildings[1].elec

elec.plot()

0 comments on commit e36b01d

Please sign in to comment.