# zerina project

## setup

In [2]:
import pandas as pd
import numpy as np
import scipy
from tqdm import tqdm_notebook, tnrange, trange, tqdm
from itertools import cycle
from collections import Counter

from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM

## data processing

### transition matrices

In [None]:
tmat_file = 'data/wifi_t_matrices.csv'

tmat_df = (
    pd
    .read_csv(
        tmat_file, 
        names=['OnOn','OnOff','OffOn','OffOff','timestep']
    )
    #.eval('OffOff = 1 - OffOff')
    #.eval('OffOn = 1 - OffOn')
    [['OnOn', 'OnOff', 'OffOff', 'OffOn', 'timestep']]
)

tmat_df.head()

### read csv and preprocess

In [None]:
tar_file = r'data/wifitrafficstats2.csv'
#tar_file = r'data/wifitrafficstats3.csv'
#tar_file = r'data/wifitrafficstats4.csv'
#tar_file = r'data/wifitrafficstats5.csv'
#tar_file = r'data/wifitrafficstats6.csv'

df = (
    pd
    .read_csv(tar_file)
    .rename({ 
        'Time':'time', 
        'Length':'length', 
        'TX Rate':'rate'
    }, axis=1)
    .eval('time = time * 1e6')
    .eval('rate = rate * 1e6')
    .eval('length = length * 8')
    .eval('on_time = 1e6 * length / rate')
    .eval('end_time = time + on_time')
    .assign(
        shift_time = lambda x: x.end_time.shift(1),
        off_time = lambda x: [
            (time - shift_time) 
                if (time - shift_time) > 0 else 0 
            for time, shift_time in (zip(x.time, x.shift_time))
    ])
    .drop(['No.','Delta Time','shift_time'], axis=1)
    .round(0)
)



df.head(10)

## functions

### generate data list of `1`'s and `0`'s

In [None]:
def generate_data_list(df):
    '''
    takes dataframe with 'on_time' and 'off_time' time stamp columns 
    and returns a numpy array of 1's (on_time) and 0's (off_time)
    '''
    
    data_list = []

    for row in tqdm_notebook(df[['on_time','off_time']].iloc[1:].itertuples(), total=df.shape[0]-1):
        data_list.extend([1 for i in range(int(row.on_time))])
        data_list.extend([0 for i in range(int(row.off_time))])
    
    return np.array(data_list, copy=True)

### fast get all state lengths

In [None]:
def get_state_length_list(data):
    '''
    takes data list ([1,1,0,1,0,0,...]) and returns numpy array 
    of duration of consecutive bits ([13,245,2588,19,1056,...])
    '''
    
    data = np.array(data)    
    
    return np.diff(
        np.where(
            np.concatenate(
                ([data[0]],
                 data[:-1] != data[1:],
                 [0]
                )
            )
        )[0]
    )[::2]

### compute stats

In [None]:
def compute_stats(sim_matrix):
    sample_size = sim_matrix.shape[0]
    stats = sim_matrix.sum(axis=1)
                    
    state_lengths = [
        item for sublist in
        [
            list(get_state_length_list(sim_matrix[i,:]))
            for i in range(sim_matrix.shape[0])
        ]
        for item in sublist
    ]
    
    return {
        'mean':np.mean(stats) / sample_size,
        'std':np.std(stats) / sample_size,
        'state_lengths':state_lengths
    }    

## data simulator

### setup

In [None]:
def generate_sim_data(n_samples, m_trials, transition_matrix):
    '''
    takes a transition matrix and generates a simulated signal
    of size <length>
    '''
        
    cycle_dict = {
        (1,1,1): 0,
        (1,1,0): 1,
        (1,0,1): 2,
        (1,0,0): 1,
        (0,1,1): 1,
        (0,1,0): 2,
        (0,0,1): 1,
        (0,0,0): 0,
    }
    
    global p
    p = 0.5
    sim_matrix = np.zeros((m_trials, n_samples))
    
    state_cycler = cycle(transition_matrix)
    
    def cycle_states():
        return next(state_cycler)
    
    def generate_bit(n):
        global p
        
        if n == 0:
            return np.random.binomial(1, p)
            
        else:
            for i in range(n):
                p = cycle_states()

            return np.random.binomial(1, p)
        
    for i in trange(m_trials, leave=False):
        sim_data = [1,1,1]
        p = 0.5
        
        for _ in range(n_samples + 1):        
            new_bit = generate_bit(cycle_dict[tuple(sim_data[-3:])])        
            sim_data.append(new_bit)
            
        sim_matrix[i,:] = np.array(sim_data[4:])
    
    return sim_matrix
    

### single execution

### main loop

In [None]:
%%time

master_dictionary = {}
sample_size = int(10e6)
trial_size = 10

for row in tqdm(tmat_df.itertuples(), total=tmat_df.shape[0]):
    transition_matrix = [row.OnOn, row.OnOff, row.OffOff, row.OffOn]
    sim_matrix = generate_sim_data(sample_size, trial_size, transition_matrix)    
    
    master_dictionary[row.timestep] = compute_stats(sim_matrix)

In [None]:
%store master_dictionary

#### alternate save

In [None]:
import json
path = 'data/all_t_matrix_results.json'

try:
    with open(path, 'w') as file:
        json.dump(master_dictionary, file)
    print(f'successfully saved file to:\n{path}')
except Exception:
    print('uh oh, something went wrong')

## real data ref