# occupancy prediction

![img](data/IMG_6435.jpg)

plan:
- split `real_data_array` into `timeunit` chunks
- compute transition matrix for each chunk
- simulate `timeunit` length of data using each transition matrix
- compare simulated data to each respective real data chunk
- make sure to normalize the *`timestep`* with `scipy.signal.decimate`
<br>
<br>

In [5]:
import pandas as pd
import numpy as np
import scipy as sp
from tqdm import tqdm_notebook, tnrange, trange, tqdm
from itertools import cycle
from collections import Counter
from pathlib import Path
import time

import altair as alt
from altair.expr import datum

from _modules.wifi_traffic_analyzer import WifiTrafficAnalyzer

## params

In [35]:
real_data_tstep = 1/20e6

timeunit = 10_000

## configs

In [7]:
real_data_path_dict = {
    'real_2': Path(r'data/wifitrafficstats2.csv'),
    'real_3': Path(r'data/wifitrafficstats3.csv'),
    'real_4': Path(r'data/wifitrafficstats4.csv'),
    'real_5': Path(r'data/wifitrafficstats5.csv'),
    'real_6': Path(r'data/wifitrafficstats6.csv')
}

transition_matrices_path_dict = {
    'tmat_1': Path(r'data/wifi_t_matrices.csv'),
    #'tmat_2': Path(r'data/wifi_t_matrices2.csv')
}

## occupancy predictor

### convert to real data array

In [38]:
WTA_real = WifiTrafficAnalyzer(mode='real', path_dict=real_data_path_dict)

initializing..
initialization complete, mode: real
real data options: 
	key: real_2, path: data/wifitrafficstats2.csv
	key: real_3, path: data/wifitrafficstats3.csv
	key: real_4, path: data/wifitrafficstats4.csv
	key: real_5, path: data/wifitrafficstats5.csv
	key: real_6, path: data/wifitrafficstats6.csv


In [39]:
def convert_and_reshape(WTA_real_instance, file_key, timeunit):
    WTA_real_instance.process_real_data(file_key)
    data_array = WTA_real_instance.real_data_array

    print(f'generated real data array, size: {data_array.shape}')
    
    data_matrix = (
        data_array[
            :int(timeunit*np.round(len(data_array) / timeunit))
        ].reshape((-1, timeunit))
    )
    print(f'reshaped into {data_matrix.shape[0]} : {timeunit}µs chunks')
    
    return data_matrix

In [41]:
data_matrix = convert_and_reshape(WTA_real, 'real_2', timeunit)

HBox(children=(IntProgress(value=0, max=5310), HTML(value='')))


generated real data array, size: (40431970,)
reshaped into 4043 : 10000µs chunks


In [10]:
WTA_real.process_real_data('real_2')
data_array = WTA_real.real_data_array

print(f'generated real data array, size: {data_array.shape}')

HBox(children=(IntProgress(value=0, max=5310), HTML(value='')))


generated real data array, size: (40431970,)


### reshaping array into `timeunit` chunks

In [36]:
data_matrix = data_array[:int(timeunit*np.round(len(data_array) / timeunit))].reshape((-1, timeunit))
print(f'reshaped into {data_matrix.shape[0]} : {timeunit}µs chunks')

reshaped into 4043 : 10000µs chunks


### `compute_transition_matrix`

In [15]:
def compute_transition_matrix(row, timeunit):
    onon, onoff, offon, offoff = 0, 0, 0, 0
    
    for i in range(timeunit-1):    
        # if ON -> ON
        if row[i] == row[i+1] == 1:
            onon += 1
            
        # if ON -> OFF
        elif row[i] == 1 and row[i+1] == 0:
            onoff += 1
            
        # if OFF -> ON
        elif row[i] == 0 and row[i+1] == 1:
            offon += 1
            
        #if OFF -> OFF
        elif row[i] == row[i+1] == 0:
            offoff += 1
            
    try:
        p_onon = onon / (onon + onoff)
    except ZeroDivisionError:
        p_onon = 0
    
    try:
        p_offoff = offoff / (offoff + offon)
    except ZeroDivisionError:
        p_offoff = 0
        
    try:
        p_onoff = onoff / (onon + onoff)
    except ZeroDivisionError:
        p_onoff = 0
        
    try:
        p_offon = offon / (offoff + offon)
    except ZeroDivisionError:
        p_offon = 0
        
    
    return (p_onon, p_onoff, p_offon, p_offoff)

In [16]:
row = data_matrix[5,:]

In [17]:
row

array([0, 0, 0, ..., 0, 0, 0])

In [22]:
compute_transition_matrix(row, timeunit)

(0.9995249406175772,
 0.00047505938242280285,
 0.0001266784899923993,
 0.9998733215100076)

### compute all transition matrices

In [32]:
def compute_tmats_load_df(data_matrix, timeunit):
    # compute all transition matrices
    tmat_array = [
        compute_transition_matrix(
            data_matrix[idx,:],
            timeunit
        )
        for idx in tnrange(data_matrix.shape[0])
    ]
    
    # load dataframe
    tmat_df = (
        pd
        .DataFrame(
            tmat_array, 
            columns=['onon', 'onoff', 'offon', 'offoff']
        ).assign(
            timeunit = timeunit
        )
    )
    
    return tmat_df    

In [37]:
tmat_df = compute_tmats_load_df(data_matrix, timeunit)

HBox(children=(IntProgress(value=0, max=4043), HTML(value='')))




In [26]:
tmat_array = [
    compute_transition_matrix(
        data_matrix[idx,:],
        timeunit
    )
    for idx in tnrange(data_matrix.shape[0])
]

HBox(children=(IntProgress(value=0, max=4043), HTML(value='')))




In [28]:
len(tmat_array)

4043

In [None]:
tmat_array

In [31]:
tmat_df = pd.DataFrame(tmat_array, columns=['onon', 'onoff', 'offon', 'offoff']).assign(timeunit = timeunit)

tmat_df.head()

Unnamed: 0,onon,onoff,offon,offoff,timeunit
0,0.923077,0.076923,0.0,1.0,10000
1,0.0,0.0,0.0,1.0,10000
2,0.0,0.0,0.0,1.0,10000
3,0.998675,0.001325,0.000216,0.999784,10000
4,0.999353,0.000647,0.0,1.0,10000


In [157]:
tmat_df.loc[
    lambda x: [
        True 
            if onon == 0 and onoff == 0 
            else False 
        for onon, onoff in zip(x.onon, x.onoff)
    ]
].shape

(1886, 4)

In [154]:
tmat_df.shape

(4043, 4)

In [155]:
1886 /4043

0.46648528320554045