# Proof of Concept - Gorilla Compression

In [1]:
# Gorilla Compression is a lossless time series compression algorithm
# Let us test if it is able to compress the data for us by checking difference between
# input file sizes and output file sizes
# Gorilla Compression works as follows

In [2]:
#(a) Calculate the delta of delta
#          D = (t_n − t_(n−1)) − (t_(n−1) − t_(n−2))
#(b) If D is zero, then store a single ‘0’ bit
#(c) If D is between [-63, 64], store ‘10’ followed by the value (7 bits)
#(d) If D is between [-255, 256], store ‘110’ followed by the value (9 bits)
#(e) if D is between [-2047, 2048], store ‘1110’ followed by the value (12 bits)
#(f) Otherwise store ‘1111’ followed by D using 32 bits

In [3]:
# Import all packages here
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import gorillacompression as gc
import glob
import os

In [4]:
df_3vac1 = pd.read_csv('../fbc/3VAC1.csv')

In [5]:
def change_to_list(row):
    amplitudes_string = row['amplitudes'][1:-1]
    amplitude_list = amplitudes_string.split(',')

    map_object = map(int, amplitude_list)
    amplitude_list_int = list(map_object)

    return amplitude_list_int

df_3vac1['amplitudes'] = df_3vac1.apply(lambda x: change_to_list(x), axis=1)

In [6]:
df_compressed = df_3vac1[['device_id','amplitudes']].copy()
df_compressed.to_csv('temp1.csv')
df_compressed['compressed_waveform'] = df_3vac1['amplitudes'].apply(lambda x: gc.ValuesEncoder.encode_all(x))
df_compressed.drop(['amplitudes'], axis=1, inplace=True)
df_compressed.to_csv('temp2.csv')

In [7]:
# temp1.csv occupies ~ 64 MB on disk
# temp2.csv occupies - 47 MB on disk
# So the compression clearly works

# Code has been removed from here but I tested decoding on one waveform, and it accurately matched input before
# encoding. So this is clearly lossless

In [8]:
# Let us generate compressed data for all our 40 nodes

In [1]:
# Decompressing the data is only a matter of callinng gc.ValuesDecoder.decode_all(x) on all encoded values

# Final Compression 

In [9]:
cwd_path = os.getcwd()
csv_files = glob.glob(os.path.join(cwd_path,'../fbc', "*.csv"))

In [10]:
ctr = 1

for f in csv_files:

    # read the csv file
    df = pd.read_csv(f)
    df['amplitudes'] = df.apply(lambda x: change_to_list(x), axis=1)
    
    df_temp = df[['device_id']].copy()
    df_temp['compressed_waveform'] = df['amplitudes'].apply(lambda x: gc.ValuesEncoder.encode_all(x))
    
    result_path = cwd_path + '/Compression Results/'
    result_path += f.split('/')[-1].split('.')[0] + '_result.csv'
    
    print("Writing file #{} : {}_result.csv ...".format(ctr, f.split('/')[-1].split('.')[0]))
    
    ctr += 1
    
    df_temp.to_csv(result_path, index=False)

Writing file #1 : 3VAE2_result.csv ...
Writing file #2 : H070C_result.csv ...
Writing file #3 : 3VAE1_result.csv ...
Writing file #4 : H070A_result.csv ...
Writing file #5 : W056E_result.csv ...
Writing file #6 : H071A_result.csv ...
Writing file #7 : 3VAH2_result.csv ...
Writing file #8 : H030C_result.csv ...
Writing file #9 : MGABT_result.csv ...
Writing file #10 : H071B_result.csv ...
Writing file #11 : 3VAH1_result.csv ...
Writing file #12 : 3VAC1_result.csv ...
Writing file #13 : MM103_result.csv ...
Writing file #14 : 3WBM1_result.csv ...
Writing file #15 : 3VAC2_result.csv ...
Writing file #16 : MM104_result.csv ...
Writing file #17 : MM106_result.csv ...
Writing file #18 : ATLA1_result.csv ...
Writing file #19 : MM122_result.csv ...
Writing file #20 : H0182_result.csv ...
Writing file #21 : W091A_result.csv ...
Writing file #22 : MC050_result.csv ...
Writing file #23 : MM109_result.csv ...
Writing file #24 : MM119_result.csv ...
Writing file #25 : W012B_result.csv ...
Writing f