In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
from audioDataLoader import audioDataloader
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import sys
import neuro
from py_apps import utils
from py_apps.utils.common_utils import read_network
from py_apps.utils.common_utils import load_json_arg
from py_apps.utils.neuro_help import *
import numpy as np

In [2]:
# old data format 
# File path
file_path = "/data2/khood/PowerGridData_training.npy"
data = np.load(file_path)
data[0]

array([[60.        , 60.        , 60.        , ..., 60.00069713,
        60.00069605, 60.00069504],
       [60.        , 60.        , 60.        , ..., 60.00070597,
        60.00070482, 60.00070367],
       [60.        , 60.        , 60.        , ..., 60.00070733,
        60.00070613, 60.0007049 ],
       ...,
       [60.        , 60.        , 60.        , ..., 60.0007285 ,
        60.00072819, 60.00072784],
       [60.        , 60.        , 60.        , ..., 60.00073347,
        60.0007332 , 60.00073288],
       [60.        , 60.        , 60.        , ..., 60.00072845,
        60.00072809, 60.00072769]])

In [3]:
X = data
ea_json = {}
ea_json["dmin"] = [float(np.min(X[:,i])) for i in range(len(X[0]))]
ea_json["dmax"] = [float(np.max(X[:,i])) for i in range(len(X[0]))]

In [4]:
data[0][:, 800]

array([60.00048582, 60.00020592, 60.00015613, 60.00016548, 60.00019947,
       60.00019522, 60.00023037, 60.00024782, 60.00052549, 60.00015386,
       60.00016741, 60.00015825, 60.00014899, 60.00061492, 60.00005609,
       60.00002154, 60.00007367, 60.00010527, 59.99993716, 59.99990395,
       60.00045922, 59.99993634, 59.99993531, 60.0000094 , 60.00017263,
       60.00011805, 60.00010768, 60.00009439, 60.00008629, 60.0009906 ,
       60.00051745, 60.0005559 , 60.00057731, 60.00061758, 60.00063709,
       60.00061363, 60.00065301, 60.00055303, 60.00067229, 60.00062938,
       60.00089973, 60.00096554, 60.00066497, 60.0006655 , 60.00068863,
       60.00060792, 60.00053278, 60.0005675 , 60.00066436, 60.00081466,
       60.0007265 , 60.00092733, 60.00019853, 60.00061951, 60.0001098 ,
       59.99990389, 59.99985908, 59.99989436, 59.99987661, 60.00013398,
       60.00006871, 60.00051375, 60.00054503, 60.00061353, 60.0006794 ,
       60.00090236, 60.00096811, 60.00094049])

In [5]:
print(ea_json["dmin"][0])
print(ea_json["dmax"][0])

59.96341599826725
60.073241081442056



> ## Time Series Datasets
>
>classify_driver.py also supports timeseries data for classification. In this case, you will be using the app_type load and setting --timeseries true. <br>
>For datasets stored in this manner, if your data is stored in three-dimensional array A, then A[i] gives the first data  <br>
>instance, and A[i][j] will give the j<sup>th</sup> feature of the i<sup>th</sup> data instance, and A[i][j][k] gives the k<sup>th</sup> timestep of the j<sup>th</sup> feature of the i<sup>th</sup> data  <br>
>instance. An example of a time series data set is given in the data directory, the Activity Recognition system based on  <br>
>Multisensor data fusion (AReM) Data Set from the UCI repository. In this example, we are also using a custom encoder (with --encoder) and setting a  <br>
>new simulation time (with --sim_time), along with using multiple processes to train in parallel (with --processes).  <br>
<br>

## Summary 
<ul>
    <li>i: Index of a data instance. A[i] gives the first, A[i+1] the second, and so on.</li>
    <li>j: Feature index within a data instance. A[i][j] gives the j<sup>th</sup> feature.</li>
    <li>k: Timestep of a feature within a data instance. A[i][j][k] gets the i<sup>th</sup> data instance's j<sup>th</sup> feature's value at the k<sup>th</sup> timestep.</li>
</ul>
In summary:

    A[i] represents the ith data instance.
    A[i][j] represents the jth feature of the ith data instance.
    A[i][j][k] represents the kth timestep of the jth feature of the ith data instance.


## testing
You can test diffrent encoding paramiters with the <strong>spike_encoder_util</strong> from the framework repo. Utils programs are useful for testing and troubleshooting. 

<b>To make the util prgrames you will need to</b>

<ol>
    <li>checkout the framework repo </li>
    <li>Include the following flag in the makefile, positioned either under the line "Build Flags": <code>CFLAGS ?= -pthread</code></li>
    <li>run <strong>make utils</strong> from the root of the framework repo</li>
</ol>
 


You can then run any of the util programs from the root of the framework repo with <code>./bin/< < UTIL PROGRAM NAME > > '< < PROMPT > >'</code><br>
for example:<br>
<pre>
UNIX> pwd
/Users/USERNAME/Documents/framework
UNIX> <b>./bin/spike_encoder_tool 'SE>'</b>
SE> ?                                                                   # This prints out all of the options.
For commands that take a json either put a filename on the same line,
or the json can be multiple lines, starting on the next line.

FJ json                - Read a spike encoder from json.
TJ                     - Create JSON from the spike encoder.
PAS                    - Call print_all_settings() for debugging.
SOI int [over]         - Call set_overall_interval(interval, override=true).
I                      - Call get_overall_interval().
MS                     - Call get_max_spikes().
N                      - Return the number of neurons for the spike encoder.
GS val min max         - Get spikes for the given val, whose range is [min,max].
GTS vals min max       - Get timeseries spikes for the given vals, whose range is [min,max].
GSS vals times min max - Get sparse spikes for the given vals and times, and val range is [min,max] 

?                      - Print commands.
Q                      - Quit.
</pre>

In [6]:
train_dataset = "dataset_15sec/train/train.csv"
valid_dataset = "dataset_15sec/valid/valid.csv"
test_dataset = "dataset_15sec/test/test.csv"
data_transform = transforms.Compose([
        transforms.Normalize(mean=[2.3009], std=[42.1936]) 
    ])

train_data = audioDataloader(index_file=train_dataset, transforms=data_transform)
valid_data = audioDataloader(index_file=valid_dataset, transforms=data_transform)
test_data = audioDataloader(index_file=test_dataset, transforms=data_transform)

In [7]:
train_data[0][0].squeeze().shape

torch.Size([201, 7201])

In [8]:
data = torch.zeros([2,201,7201])
data

tensor([[[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]]])

In [9]:
data[0] = train_data[0][0]
data[0]

tensor([[ 2.3665,  0.5570,  1.2286,  ..., -0.0420, -0.0333, -0.0505],
        [ 2.2995,  0.1478,  0.1014,  ..., -0.0409, -0.0432, -0.0512],
        [ 1.3559,  0.0761,  0.4851,  ..., -0.0503, -0.0539, -0.0542],
        ...,
        [-0.0543, -0.0545, -0.0545,  ..., -0.0545, -0.0545, -0.0545],
        [-0.0544, -0.0545, -0.0545,  ..., -0.0545, -0.0545, -0.0545],
        [-0.0544, -0.0545, -0.0545,  ..., -0.0545, -0.0545, -0.0545]])

In [10]:
data[1] = train_data[1][0]

In [11]:
data.shape

torch.Size([2, 201, 7201])

In [12]:
i = 0
data[i]

tensor([[ 2.3665,  0.5570,  1.2286,  ..., -0.0420, -0.0333, -0.0505],
        [ 2.2995,  0.1478,  0.1014,  ..., -0.0409, -0.0432, -0.0512],
        [ 1.3559,  0.0761,  0.4851,  ..., -0.0503, -0.0539, -0.0542],
        ...,
        [-0.0543, -0.0545, -0.0545,  ..., -0.0545, -0.0545, -0.0545],
        [-0.0544, -0.0545, -0.0545,  ..., -0.0545, -0.0545, -0.0545],
        [-0.0544, -0.0545, -0.0545,  ..., -0.0545, -0.0545, -0.0545]])

In [13]:
data[i].shape

torch.Size([201, 7201])

In [14]:
j = 0
data[i][j]

tensor([ 2.3665,  0.5570,  1.2286,  ..., -0.0420, -0.0333, -0.0505])

In [15]:
data[i][j].shape

torch.Size([7201])

In [16]:
k = 0
data[i][j][k]

tensor(2.3665)

    A[i] represents the ith data instance.
    A[i][j] represents the jth feature of the ith data instance.
    A[i][j][k] represents the kth timestep of the jth feature of the ith data instance.

In [17]:
print(f"there are {len(data)} data instance")
print(f"there are {len(data[i])} features of each data instance")
print(f"there are {len(data[i][j])} timestep in each feature of each data instance")


there are 2 data instance
there are 201 features of each data instance
there are 7201 timestep in each feature of each data instance


In [18]:
data = data.numpy()

In [19]:
train_dataset = "dataset_15sec/train/train.csv"
valid_dataset = "dataset_15sec/valid/valid.csv"
test_dataset = "dataset_15sec/test/test.csv"
data_transform = transforms.Compose([
        transforms.Normalize(mean=[2.3009], std=[42.1936]) 
    ])

train_data = audioDataloader(index_file=train_dataset, transforms=data_transform)
valid_data = audioDataloader(index_file=valid_dataset, transforms=data_transform)
test_data = audioDataloader(index_file=test_dataset, transforms=data_transform)

In [20]:
data_instance_dim = len(train_data)
data_instance_dim

40000

In [21]:
features_dim = len(train_data[0][0][0])
features_dim

201

In [22]:
timestep_dim = len(train_data[0][0][0][0])
timestep_dim

7201

In [23]:
print("making training numpy file")
neuroTrain = torch.zeros((data_instance_dim, features_dim, timestep_dim))
for i in tqdm(range(data_instance_dim)):
    neuroTrain[i] = train_data[i][0]
neuroTrain = neuroTrain.numpy
np.save('neuroTrain.npy', neuroTrain)

print("making validation numpy file")
data_instance_dim = len(valid_data)
neuroValid = torch.zeros((data_instance_dim, features_dim, timestep_dim))
for i in tqdm(range(data_instance_dim)):
    neuroValid[i] = valid_data[i][0]
neuroValid = neuroValid.numpy
np.save('neuroValid.npy', neuroValid)

print("Making testing numpy file")
data_instance_dim = len(test_data)
neuroTest = torch.zeros((data_instance_dim, features_dim, timestep_dim))
for i in tqdm(range(data_instance_dim)):
    neuroTest[i] = test_data[i][0]
neuroTest = neuroTest.numpy
np.save('neuroTest.npy', neuroTest)

making training numpy file


  1%|▏         | 550/40000 [01:33<1:51:21,  5.90it/s]


KeyboardInterrupt: 