#Bayesian Net with spatial and temporal correlations among sensors

In [272]:
import numpy as np
import matplotlib.pyplot as plt
from collections import defaultdict
import pandas as pd
import os as os
from sklearn.linear_model import Lasso
%matplotlib inline
import warnings
import helpers
import inspect
warnings.filterwarnings('ignore')

In [273]:
def get_data_matrix(file_path):    
    '''Strip out the first row and first column and return data matrix'''
    data = np.loadtxt(file_path, delimiter=',', skiprows=1)
    return data[:,1:]

In [274]:
class sensor(object):
    """ A sensor.
    Parameters after training are:
    1. Mu_i,Sigma_i
    2. 51 Betas
    3. Conditional Variance
    """

    def __init__(self,total_sensors):        
        #For compute_initial_mu_vars
        self.mu_i = -1
        self.var_i = -1    
        
        #For compute_betas
        self.betas = [-1 for i in range(total_sensors+1)]
        
        #For compute_cond_var
        self.cond_var = -1
    
    def compute_initial_mu_vars(self,sensors_readings):
        self.mu_i = np.mean(sensors_readings)        
        self.var_i = np.var(sensors_readings)
    
    def compute_betas(self,X,y):
        self.regr = Lasso()
        self.regr.fit(X, y)
        self.betas[0] = self.regr.intercept_        
        
        for id,value in enumerate(self.regr.coef_):
            self.betas[id+1] = value
                
        
    def compute_cond_var(self,X,y):     
        predictions = np.array([self.regr.predict(row)[0] for row in X])        
        error = y - predictions        
        self.cond_var = np.var(error)

In [275]:
def learn_train_params(train_data_path):
    '''Train a lasso regressor on the train data and return the parameters of sensors'''
    
    data_matrix = get_data_matrix(train_data_path)
    sensors_count = data_matrix.shape[0]
    timestamps_count = data_matrix.shape[1]

    sensor_obj_lst = []
    X = data_matrix[:,:timestamps_count-1].T

    for s_id in range(sensors_count):
        sensor_obj = sensor(sensors_count)    
        sensor_obj.compute_initial_mu_vars(data_matrix[s_id])    
        y = data_matrix[s_id][1:].T
        sensor_obj.compute_betas(X,y)
        sensor_obj.compute_cond_var(X,y)
        sensor_obj_lst.append(sensor_obj)
        
    return sensor_obj_lst

##Window Sliding Active Inference

In [276]:
def build_pred_mat_using_sliding(old_test_mat,budget,sens_params_lst):
    '''Predict the ones that are not in window. For the rest, use existing data'''
    
    test_matrix = old_test_mat.copy()
    total_cols = test_matrix.shape[1]
    total_rows = test_matrix.shape[0]

    windows = helpers.get_windows(total_rows,total_cols,budget)

    for col in range(0,total_cols):
        
        #Row Ids to be predicted
        pred_row_ids = set(range(0,total_rows)) - set(windows[col])
                
        for r_id in pred_row_ids:
            current_sensor = sens_params_lst[r_id]
            if col == 0:
                test_matrix[r_id,col] = current_sensor.mu_i
            else:
                betas = current_sensor.betas
                prev_col = test_matrix[:,col-1:col]
                test_matrix[r_id,col] = betas[0]+ np.dot(betas[1:],prev_col)
                    
    return test_matrix

In [277]:
def train_and_predict(train_data_path,test_data_path,budget,Inf_type='W'):
    
    #Train to get params
    sens_params_lst = learn_train_params(train_data_path)
    
    test_matrix = get_data_matrix(test_data_path)
    
    #Return prediction values obtained using train params
    if Inf_type == 'W':
        return build_pred_mat_using_sliding(test_matrix,budget,sens_params_lst)
    elif Inf_type == 'V':
        return build_pred_mat_using_var(test_matrix,sens_params_lst,budget)

##Temperature's MAE using Window

In [270]:
for budget in [0,5,10,20,25]:
    pred_mat = train_and_predict('Dataset\intelTemperatureTrain.csv','Dataset\intelTemperatureTest.csv',budget,'W')
    
    'Write to disc [START]'
    file_name = 'w'+str(budget)+'.csv'
    row_ids = [x for x in range(0,50)] #50 sensors
    col_ids = list(np.arange(0.5,24.5,0.5))
    col_ids += col_ids
    col_ids = [x if x!= 24 else 0 for x in col_ids]
    df = pd.DataFrame(pred_mat, index=row_ids, columns=col_ids)
    df.to_csv(file_name, index=True, header=True, sep=',')
    'END'
    
    test_matrix = get_data_matrix('Dataset\intelTemperatureTest.csv')
    print helpers.mean_abs_error(test_matrix,pred_mat)

2.3580283488
1.40551284707
0.963139344009
0.507008480598
0.388757264106


##Humidity's MAE using Window

In [271]:
for budget in [0,5,10,20,25]:
    pred_mat = train_and_predict('Dataset\intelHumidityTrain.csv','Dataset\intelHumidityTest.csv',budget,'W')
    
    'Write to disc [START]'
    file_name = 'w'+str(budget)+'.csv'
    row_ids = [x for x in range(0,50)] #50 sensors
    col_ids = list(np.arange(0.5,24.5,0.5))
    col_ids += col_ids
    col_ids = [x if x!= 24 else 0 for x in col_ids]
    df = pd.DataFrame(pred_mat, index=row_ids, columns=col_ids)
    df.to_csv(file_name, index=True, header=True, sep=',')
    'END'
    
    test_matrix = get_data_matrix('Dataset\intelHumidityTest.csv')
    print helpers.mean_abs_error(test_matrix,pred_mat)

5.20400735684
2.91873540345
1.71199837154
0.823603812128
0.563036486245


##Variance Based Active Inference

$\sigma_{ij}=\sigma_i + \sum_{q=1}^{50} \beta_{q}^{(i)^{2}} \sigma_{q-2,j-1}$ <br>
- $\sigma_{i}$ is the conditional variance
- $\beta_{q}^{(i)}$ is the qth regressed coefficient of ith sensor
- $\sigma_{ij}$ is the variance at jth timestamp of ith sensor

In [247]:
def compute_variance(col_id,sens_params,prev_col_vars=[]):
    '''Compute the variance according to above formula. '''
    
    var = 0
    
    if col_id == 0 and prev_col_vars == []:
        var = sens_params.var_i
    elif prev_col_vars != []:        
        cond_var = sens_params.cond_var
        betas = sens_params.betas[1:]        
        part2 = np.dot(betas,prev_col_vars)
        var = cond_var + part2
        
    return var

In [248]:
def build_pred_mat_using_var(test_matrix,sens_params,budget):
    '''Build the predicted matrix using variance based approach'''
    total_cols = test_matrix.shape[1]
    total_rows = test_matrix.shape[0]
    
    variances_matrix = np.zeros((total_rows,total_cols))
    prev_col_variances=[]
    
    for col_id in range(0,total_cols):
        
        #Get the variances of the column col_id
        col_variances = []
        for row in range(0,total_rows):
            cur_sensor_params = sens_params[row]
            var_ij = compute_variance(col_id,cur_sensor_params,prev_col_variances)
            col_variances.append(var_ij)
        
        #Get the indices with top k variances     
  
        top_var_ind = helpers.top_k_indices(col_variances,budget)
    
        #For ones with top k variances, set variance to 0 as it won't be predicted
        for ind in top_var_ind:
            variances_matrix[ind,col_id] = 0
        
        pred_row_ids = set(range(0,total_rows)) - set(top_var_ind)
        
        #Predict the ones which do not fall among top k variances
        for row_id in pred_row_ids:
            #Update mu
            current_sensor = sens_params[row_id]            
            betas = current_sensor.betas
            if col_id == 0:                      
                test_matrix[row_id,col_id] = current_sensor.mu_i
            else:
                prev_col_mus = test_matrix[:,col_id-1:col_id]
                test_matrix[row_id,col_id] = betas[0]+ np.dot(betas[1:],prev_col_mus)
            
            #Update sigmas
            cur_sensor_params = sens_params[row_id]
            if col_id == 0:
                prev_col_vars_2 = None
                variances_matrix[row_id,col_id]  = cur_sensor_params.var_i
            else:
                #prev_col_vars_2 = variances_matrix[:,col_id-1:col_id]                       
                prev_col_vars_2 = []
                
                for ele in variances_matrix[:,col_id-1:col_id] :
                    prev_col_vars_2.append(ele[0])
        
                variances_matrix[row_id,col_id] = compute_variance(col_id,cur_sensor_params,prev_col_vars_2)           
        
        prev_col_variances= []
        for ele in variances_matrix[:,col_id:col_id+1]:
            prev_col_variances.append(ele[0])
            
    return test_matrix

##Temperature's MAE using Variance

In [269]:
for budget in [0,5,10,20,25]:
    pred_mat = train_and_predict('Dataset\intelTemperatureTrain.csv','Dataset\intelTemperatureTest.csv',budget,'V')
    
    'Write to disc [START]'
    file_name = 'v'+str(budget)+'.csv'
    row_ids = [x for x in range(0,50)] #50 sensors
    col_ids = list(np.arange(0.5,24.5,0.5))
    col_ids += col_ids
    col_ids = [x if x!= 24 else 0 for x in col_ids]
    df = pd.DataFrame(pred_mat, index=row_ids, columns=col_ids)
    df.to_csv(file_name, index=True, header=True, sep=',')
    'END'
    test_matrix = get_data_matrix('Dataset\intelTemperatureTest.csv')
    print helpers.mean_abs_error(test_matrix,pred_mat)

2.3580283488
0.776612864088
0.632531210788
0.406946347889
0.330723961793


##Humidity's MAE using Variance

In [268]:
for budget in [0,5,10,20,25]:
    pred_mat = train_and_predict('Dataset\intelHumidityTrain.csv','Dataset\intelHumidityTest.csv',budget,'V')
    
    'Write to disc [START]'
    file_name = 'v'+str(budget)+'.csv'
    row_ids = [x for x in range(0,50)] #50 sensors
    col_ids = list(np.arange(0.5,24.5,0.5))
    col_ids += col_ids
    col_ids = [x if x!= 24 else 0 for x in col_ids]
    df = pd.DataFrame(pred_mat, index=row_ids, columns=col_ids)
    df.to_csv(file_name, index=True, header=True, sep=',')
    'END'
    
    test_matrix = get_data_matrix('Dataset\intelHumidityTest.csv')
    print helpers.mean_abs_error(test_matrix,pred_mat)

5.20400735684
2.09154217564
1.4122045059
0.842476668095
0.633271897529
