In [2]:
# imports
import pickle

In [13]:
# loading features
def load_features():
    with open("results/features.pkl", "rb") as f:
        features = pickle.load(f)
    return(features)

In [14]:
# features includes a list of feature-set candidates, grouped by sensor and horizon
# we want to 
#   (1) extract the best features per horizon (for direct modelling)
#   (2) extract a common featureset per sensor (for structured modelling)

In [19]:
def get_candidates(s, h):
    features = load_features()
    for candidate in features:
        if (candidate[0]["sensor"] == s) and (candidate[0]["horizon"] == h):
            return candidate
    
    
def get_features_sh(s, h):
    candidates = get_candidates(s, h)
    accuracy = -999
    i = -1
    c = 0
    for candidate in candidates:
        if candidate["accuracy"] > accuracy:
            accuracy = candidate["accuracy"]
            i = c
            c = c + 1
        
    return candidates[i]["features"]

get_features_sh(1, 3)

['timeOfDay', 'monthOfYear', 'weekEnd']

In [48]:
def normalize_accuracies(horizon):
    # normalize accuracies
    accuracies = []
    for candidate in horizon:
        accuracies.append(candidate["accuracy"])        
    a_max = max(accuracies)
    a_min = min(accuracies)
    
    if a_min > a_max - 0.2:
        a_min = a_max - 0.2
    k = 1 / (a_max - a_min)
    
    for i in range(len(accuracies)):        
        new_acc = (accuracies[i] - a_min) * k
        horizon[i]["accuracy"] = new_acc        
    
    return(horizon)
    
def get_all_candidates(s):
    features = load_features()
    # build a normalized accuracies candidate list
    candidates = []
    for candidate in features:
        if (candidate[0]["sensor"] == s):
            candidates.append(normalize_accuracies(candidate))
    
    # build a list of best candidate features
    fweights = {}
    for horizon in candidates:
        for candidate in horizon:
            new_acc = candidate["accuracy"]
            for f in candidate["features"]:
                #print(f, new_acc)
                if not f in fweights:
                    fweights[f] = 0
                fweights[f] = fweights[f] + new_acc
    
    # filter feature candidates
    useful_features = []
    for f in fweights:
        v = fweights[f]
        if v >= 1.0:
            useful_features.append(f)
    
    return useful_features



get_all_candidates(1)

['pc',
 'temperature25',
 'timeOfDay',
 'weekEnd',
 'temperature33',
 'pc_ma_1H_3H',
 'dayOfWeek',
 'temperature4',
 'dewPoint12',
 'visibility31',
 'humidity14',
 'monthOfYear',
 'holiday',
 'dewPoint37',
 'i3',
 'pc_ma_7D_2D',
 'dayOfYear',
 'dayBeforeHoliday']