## Data Pre-Processing for Sigdata

In [1]:
import pdm_functions as fns
import torch 
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt
import pdm_functions as fns
import process_fns as pfns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import DataLoader, TensorDataset,  Subset




In [2]:
f_normal = pd.read_csv('../dataset/5528_drop_imbalance_normal.csv')
f_error = pd.read_csv('../dataset/5528_drop_imbalance_error.csv')

In [3]:
f_normal['created_at'] = pd.to_datetime(f_normal['created_at'], unit='s')
f_normal = f_normal.sort_values(by='created_at')
 
f_error['created_at'] = pd.to_datetime(f_error['created_at'], unit='s')
f_error = f_error.sort_values(by='created_at')

f_normal = f_normal.drop(columns=['asset_id', 'created_at', 'created_at_datetime', 'looseness_health', 'time','misalignment_health', 'bearing_health', 'imbalance_health'])
f_error = f_error.drop(columns=['asset_id', 'created_at', 'created_at_datetime', 'looseness_health', 'time','misalignment_health', 'bearing_health', 'imbalance_health'])

In [4]:
f_normal_tensor = torch.tensor(f_normal.values, dtype=torch.float32)
f_error_tensor = torch.tensor(f_error.values, dtype=torch.float32)

f_normal_tensor.shape, f_error_tensor.shape

(torch.Size([2220, 38]), torch.Size([192, 38]))

In [5]:
f_normal_label = torch.ones((2220))
f_error_label = torch.zeros((192))

In [6]:
f_normal_label.shape, f_error_label.shape

(torch.Size([2220]), torch.Size([192]))

In [7]:
sig_dataset = torch.concat((f_normal_tensor, f_error_tensor), dim = 0)
sig_labelset = torch.concat((f_normal_label, f_error_label), dim= 0 )
sig_dataset.shape, sig_labelset.shape

(torch.Size([2412, 38]), torch.Size([2412]))

In [8]:
sig_dataset = sig_dataset.reshape((-1,1,12,38))
sig_labelset = sig_labelset.reshape((-1,12))[:, 0]

In [9]:
sig_dataset.shape, sig_labelset.shape

(torch.Size([201, 1, 12, 38]), torch.Size([201]))

In [10]:
sig_labelset[sig_labelset==1].shape, sig_labelset[sig_labelset!=1].shape

(torch.Size([185]), torch.Size([16]))

In [11]:
train_dataset, temp_dataset, train_labels, temp_labels = train_test_split(sig_dataset, sig_labelset, train_size=0.8, stratify=sig_labelset, random_state=1)

dev_dataset, test_dataset, dev_labels, test_labels = train_test_split(temp_dataset, temp_labels, train_size=.5, stratify=temp_labels, random_state=1)

print(train_dataset.shape, train_labels.shape)
abnormal_indices = [i for i, label in enumerate(train_labels) if label != 1] 
normal_indices = [i for i, label in enumerate(train_labels) if label == 1]
abnormal_data = Subset(train_dataset, abnormal_indices)
normal_data = Subset(train_dataset, normal_indices)



torch.Size([160, 1, 12, 38]) torch.Size([160])


In [12]:
data, label =  fns.multi_datasets_stacks_abnormal(train_dataset, multi_dim = 10, num_groups=12, ab_indices = abnormal_indices, no_indices = normal_indices)

torch.Size([277, 1, 12, 38])


In [13]:
data.shape, label.shape

(torch.Size([277, 1, 12, 38]), torch.Size([277]))

In [14]:
label[label!=1].shape, label[label==1].shape

(torch.Size([130]), torch.Size([147]))

In [15]:
train_data_aug, train_label_aug = fns.multi_datasets_stacks(data, label, multi_dim=10, num_groups=12)

100%|█████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 434.79it/s]


In [16]:
train_data_aug.shape, train_label_aug.shape

(torch.Size([2770, 1, 12, 38]), torch.Size([2770]))

In [17]:
train_label_aug[train_label_aug!=1].shape, train_label_aug[train_label_aug==1].shape

(torch.Size([1300]), torch.Size([1470]))

In [18]:
dev_labels[dev_labels!=1].shape, dev_labels[dev_labels==1].shape

(torch.Size([1]), torch.Size([19]))

In [19]:
dev_data_aug, dev_label_aug = fns.multi_datasets_stacks(dev_dataset, dev_labels, multi_dim=10, num_groups=12)

100%|█████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 556.47it/s]


In [20]:
torch.save({'X_train':train_data_aug, 'y_train': train_label_aug, 'X_dev' : dev_data_aug, 'y_dev':dev_label_aug, 'X_test':test_dataset, 'y_test':test_labels}, 'datasets/sig_datasets.pt')

## ==================================================================================

## Data Pre-Processing for Spectrum Data

In [2]:
data = pfns.to_stack('../dataset/5528_droped_data.csv', 'cls') 

In [3]:
X_train, y_train = data[0][0], data[0][1]
X_test, y_test = data[1][0], data[1][1]
X_dev, y_dev = data[2][0], data[2][1]

In [4]:
X_train.shape, y_train.shape, X_dev.shape, y_dev.shape, X_test.shape, y_test.shape

(torch.Size([1452, 3, 2048]),
 torch.Size([1452]),
 torch.Size([444, 3, 2048]),
 torch.Size([444]),
 torch.Size([444, 3, 2048]),
 torch.Size([444]))

In [5]:
spec_data = torch.concat((X_train, X_dev, X_test), dim=0)
spec_label = torch.concat((y_train, y_dev, y_test), dim=0)

In [6]:
spec_data.shape, spec_label.shape

(torch.Size([2340, 3, 2048]), torch.Size([2340]))

In [7]:
spec_data = spec_data.reshape((-1, 1, 36, 2048))
spec_data.shape

torch.Size([195, 1, 36, 2048])

In [8]:
spec_label = spec_label.reshape(-1, 12)[:,0]
spec_label.shape

torch.Size([195])

In [9]:
spec_label[spec_label == 1].shape, spec_label[spec_label != 1].shape

(torch.Size([179]), torch.Size([16]))

In [10]:
train_dataset, temp_dataset, train_labels, temp_labels = train_test_split(spec_data, spec_label, train_size=0.8, stratify=spec_label, random_state=77)

dev_dataset, test_dataset, dev_labels, test_labels = train_test_split(temp_dataset, temp_labels, train_size=.5, stratify=temp_labels, random_state=77)

print(train_dataset.shape, train_labels.shape)
abnormal_indices = [i for i, label in enumerate(train_labels) if label != 1] 
normal_indices = [i for i, label in enumerate(train_labels) if label == 1]
abnormal_data = Subset(train_dataset, abnormal_indices)
normal_data = Subset(train_dataset, normal_indices)


torch.Size([156, 1, 36, 2048]) torch.Size([156])


In [11]:
train_labels[train_labels == 1].shape, train_labels[train_labels != 1].shape

(torch.Size([143]), torch.Size([13]))

In [12]:
test_labels[test_labels == 1].shape, test_labels[test_labels != 1].shape

(torch.Size([18]), torch.Size([2]))

In [13]:
data, label =  fns.multi_datasets_stacks_abnormal(train_dataset, multi_dim = 9, num_groups=12, ab_indices = abnormal_indices, no_indices = normal_indices)

torch.Size([260, 1, 36, 2048])


In [14]:
X_dev = X_dev.reshape(-1,1,36,2048)
X_test = X_test.reshape(-1,1,36,2048)

In [15]:
y_dev = y_dev.reshape(-1, 12)[:,0]
y_test = y_test.reshape(-1, 12)[:,0]

In [16]:
data.shape, label.shape, X_dev.shape, y_dev.shape, X_test.shape, y_test.shape

(torch.Size([260, 1, 36, 2048]),
 torch.Size([260]),
 torch.Size([37, 1, 36, 2048]),
 torch.Size([37]),
 torch.Size([37, 1, 36, 2048]),
 torch.Size([37]))

In [17]:
label[label==1].shape, label[label!=1].shape

(torch.Size([143]), torch.Size([117]))

In [18]:
y_dev[y_dev==1].shape, y_dev[y_dev!=1].shape

(torch.Size([34]), torch.Size([3]))

In [19]:
train_data_aug, train_label_aug = fns.multi_datasets_stacks(data, label, multi_dim=10, num_groups=12)

100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 57.87it/s]


In [20]:
dev_data_aug, dev_label_aub = fns.multi_datasets_stacks(X_dev, y_dev, multi_dim=10, num_groups=12)

100%|█████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 243.10it/s]


In [21]:
train_data_aug.shape, train_label_aug.shape

(torch.Size([2600, 1, 36, 2048]), torch.Size([2600]))

In [22]:
dev_label_aug = dev_label_aub
dev_data_aug.shape, dev_label_aub.shape

(torch.Size([370, 1, 36, 2048]), torch.Size([370]))

In [23]:
dev_label_aug[dev_label_aug == 1].shape, dev_label_aug[dev_label_aug!=1].shape

(torch.Size([340]), torch.Size([30]))

In [24]:
SAVEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE

NameError: name 'SAVEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE' is not defined

In [27]:
train_data_aug.shape, train_label_aug.shape, dev_data_aug.shape, dev_label_aug.shape, test_dataset.shape, test_dataset.shape

(torch.Size([2600, 1, 36, 2048]),
 torch.Size([2600]),
 torch.Size([370, 1, 36, 2048]),
 torch.Size([370]),
 torch.Size([20, 1, 36, 2048]),
 torch.Size([20, 1, 36, 2048]))

In [25]:
torch.save({'X_train':train_data_aug, 'y_train': train_label_aug, 'X_dev' : dev_data_aug, 'y_dev':dev_label_aug, 'X_test':test_dataset, 'y_test':test_labels}, 'datasets/spec_datasets_4.pt')