# occupancy prediction

![img](dev/IMG_6435.jpg)

plan:
- split `real_data_array` into `timeunit` chunks
- compute transition matrix for each chunk
- simulate `timeunit` length of data using each transition matrix
- compare simulated data to each respective real data chunk
- make sure to normalize the *`timestep`* with `scipy.signal.decimate`
<br>
<br>

In [1]:
# set autoreload
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np
import scipy as sp
from tqdm import tqdm_notebook, tnrange, trange, tqdm
from itertools import cycle
from collections import Counter
from pathlib import Path
import time

import altair as alt
from altair.expr import datum

from _modules.wifi_traffic_analyzer import WifiTrafficAnalyzer

## params

In [3]:
#real_data_tstep = 1/20e6

timeunit = 10_000

## configs

In [4]:
real_data_path_dict = {
    'real_2': Path(r'data/wifitrafficstats2.csv'),
    'real_3': Path(r'data/wifitrafficstats3.csv'),
    'real_4': Path(r'data/wifitrafficstats4.csv'),
    'real_5': Path(r'data/wifitrafficstats5.csv'),
    'real_6': Path(r'data/wifitrafficstats6.csv')
}

transition_matrices_path_dict = {
    'tmat_1': Path(r'data/wifi_t_matrices.csv'),
    #'tmat_2': Path(r'data/wifi_t_matrices2.csv')
}

## occupancy predictor

### classing it up

In [5]:
class OccupancyPredictor:
    def __init__(self, real_data_path_dict, timeunits, autoselect_file=True):
        print('initializing OP..')
        
        # attributes
        self.real_data_path_dict = real_data_path_dict
        
        # set timeunits
        if not isinstance(timeunits, list):
            self.timeunits = [timeunits]
            print(f'single timeunit received {self.timeunits}')
        else:
            self.timeunits = timeunits
            print(f'{len(self.timeunits)} timeunits received')            
        
        # startup tasks
        self.WTA_real = WifiTrafficAnalyzer(mode='real', path_dict=self.real_data_path_dict)
        self.select_file_key(autoselect=autoselect_file)
        
        
    
    def select_file_key(self, autoselect):        
        if autoselect:
            self.file_key = list(self.WTA_real.path_dict.keys())[0]
            self.WTA_real.process_real_data(self.file_key)
            print(f'\tautoselected and processed \'{self.file_key}\'')
            
        else:
            file_key = input('please select a target file..')
            
            try:
                assert file_key.strip().lower() in self.WTA_real.path_dict.keys()
                
                self.file_key = file_key.strip().lower()
                self.WTA_real.process_real_data(self.file_key)
                
                print(f'\tset and processed target file: {self.file_key}')
                
            except AssertionError:
                print(f'invalid target file: {file_key}, options are {self.WTA_real.path_dict.keys()}')
                raise
        
        
    def convert_and_reshape(self, timeunit):
        self.data_array = self.WTA_real.real_data_array
        print(f'generated real data array, size: {self.data_array.shape}')
        
        data_matrix = (
            self.data_array[
                :int(timeunit*np.round(len(self.data_array) / timeunit))
            ].reshape((-1, timeunit))
        )
        print(f'reshaped into {data_matrix.shape[0]} : {timeunit}µs chunks')
        
        return data_matrix
    
        
    def compute_transition_matrix(self, row, timeunit):
        onon, onoff, offon, offoff = 0, 0, 0, 0
        
        for i in range(timeunit-1):    
            # if ON -> ON
            if row[i] == row[i+1] == 1:
                onon += 1
                
            # if ON -> OFF
            elif row[i] == 1 and row[i+1] == 0:
                onoff += 1
                
            # if OFF -> ON
            elif row[i] == 0 and row[i+1] == 1:
                offon += 1
                
            #if OFF -> OFF
            elif row[i] == row[i+1] == 0:
                offoff += 1
                
        try:
            p_onon = onon / (onon + onoff)
        except ZeroDivisionError:
            p_onon = 0
        
        try:
            p_offoff = offoff / (offoff + offon)
        except ZeroDivisionError:
            p_offoff = 0
            
        try:
            p_onoff = onoff / (onon + onoff)
        except ZeroDivisionError:
            p_onoff = 0
            
        try:
            p_offon = offon / (offoff + offon)
        except ZeroDivisionError:
            p_offon = 0
            
        
        return (p_onon, p_onoff, p_offon, p_offoff) 
    
    
    def compute_tmats_and_load_df(self, data_matrix, timeunit):
        # compute all transition matrices
        tmat_array = [
            self.compute_transition_matrix(
                data_matrix[idx,:],
                timeunit
            )
            for idx in tnrange(data_matrix.shape[0])
        ]
        
        # load dataframe
        tmat_df = (
            pd
            .DataFrame(
                tmat_array, 
                columns=['OnOn', 'OnOff', 'OffOn', 'OffOff']
            ).assign(
                timeunit = timeunit,
                source = self.file_key
            )
            [['OnOn','OnOff','OffOff','OffOn','timeunit','source']]
        )
        print(f'created transition matrix dataframe, {tmat_df.shape[0]} rows')
        
        return tmat_df
    
    
    def process_timeunit(self, timeunit):
        print(f'processing data for timeunit: {timeunit}µs')
        
        data_matrix = self.convert_and_reshape(timeunit)        
        tmat_df = self.compute_tmats_and_load_df(data_matrix, timeunit)
        
        return data_matrix, tmat_df
        
        

In [6]:
OP = OccupancyPredictor(real_data_path_dict, timeunit, autoselect_file=True)

initializing OP..
single timeunit received [10000]
initializing WTA..
initialization complete, mode: real
real data options: 
	key: real_2, path: data/wifitrafficstats2.csv
	key: real_3, path: data/wifitrafficstats3.csv
	key: real_4, path: data/wifitrafficstats4.csv
	key: real_5, path: data/wifitrafficstats5.csv
	key: real_6, path: data/wifitrafficstats6.csv


HBox(children=(IntProgress(value=0, max=5310), HTML(value='')))


	autoselected and processed 'real_2'


In [7]:
data_matrix, tmat_df = OP.process_timeunit(OP.timeunits[0])

print(f'data matrix: {data_matrix.shape}')
display(tmat_df.head())

processing data for timeunit: 10000µs
generated real data array, size: (40431970,)
reshaped into 4043 : 10000µs chunks


HBox(children=(IntProgress(value=0, max=4043), HTML(value='')))


created transition matrix dataframe, 4043 rows
data matrix: (4043, 10000)


Unnamed: 0,OnOn,OnOff,OffOff,OffOn,timeunit,source
0,0.923077,0.076923,1.0,0.0,10000,real_2
1,0.0,0.0,1.0,0.0,10000,real_2
2,0.0,0.0,1.0,0.0,10000,real_2
3,0.998675,0.001325,0.999784,0.000216,10000,real_2
4,0.999353,0.000647,1.0,0.0,10000,real_2


In [8]:
sim_data_matrix = OP.WTA_real.simulate_all_OP_transition_matrices(
    tmat_dataframe=tmat_df,
    n_samples=OP.timeunits[0], 
    m_trials=1
)

print(f'sim data matrix: {sim_data_matrix.shape}')

HBox(children=(IntProgress(value=0, max=4043), HTML(value='')))


total elapsed time: 1.77 minutes
sim data matrix: (4043, 10000)


### functions

In [None]:
def convert_and_reshape(WTA_real_instance, file_key, timeunit):
    WTA_real_instance.process_real_data(file_key)
    data_array = WTA_real_instance.real_data_array

    print(f'generated real data array, size: {data_array.shape}')
    
    data_matrix = (
        data_array[
            :int(timeunit*np.round(len(data_array) / timeunit))
        ].reshape((-1, timeunit))
    )
    print(f'reshaped into {data_matrix.shape[0]} : {timeunit}µs chunks')
    
    return data_matrix, file_key



def compute_transition_matrix(row, timeunit):
    onon, onoff, offon, offoff = 0, 0, 0, 0
    
    for i in range(timeunit-1):    
        # if ON -> ON
        if row[i] == row[i+1] == 1:
            onon += 1
            
        # if ON -> OFF
        elif row[i] == 1 and row[i+1] == 0:
            onoff += 1
            
        # if OFF -> ON
        elif row[i] == 0 and row[i+1] == 1:
            offon += 1
            
        #if OFF -> OFF
        elif row[i] == row[i+1] == 0:
            offoff += 1
            
    try:
        p_onon = onon / (onon + onoff)
    except ZeroDivisionError:
        p_onon = 0
    
    try:
        p_offoff = offoff / (offoff + offon)
    except ZeroDivisionError:
        p_offoff = 0
        
    try:
        p_onoff = onoff / (onon + onoff)
    except ZeroDivisionError:
        p_onoff = 0
        
    try:
        p_offon = offon / (offoff + offon)
    except ZeroDivisionError:
        p_offon = 0
        
    
    return (p_onon, p_onoff, p_offon, p_offoff)



def compute_tmats_and_load_df(data_matrix, timeunit, file_key):
    # compute all transition matrices
    tmat_array = [
        compute_transition_matrix(
            data_matrix[idx,:],
            timeunit
        )
        for idx in tnrange(data_matrix.shape[0])
    ]
    
    # load dataframe
    tmat_df = (
        pd
        .DataFrame(
            tmat_array, 
            columns=['onon', 'onoff', 'offon', 'offoff']
        ).assign(
            timeunit = timeunit,
            source = file_key
        )
    )
    
    return tmat_df    

### execution

In [None]:
WTA_real = WifiTrafficAnalyzer(mode='real', path_dict=real_data_path_dict)

In [None]:
data_matrix, file_key = convert_and_reshape(WTA_real, 'real_2', timeunit)

In [None]:
tmat_df = compute_tmats_and_load_df(data_matrix, timeunit, file_key)

In [None]:
tmat_df.head()

## sim data dev

## misfits

In [None]:
tmat_df.loc[
    lambda x: [
        True 
            if onon == 0 and onoff == 0 
            else False 
        for onon, onoff in zip(x.onon, x.onoff)
    ]
].shape

In [None]:
tmat_df.shape

In [None]:
1886 /4043