In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt

# from torcheval.metrics.functional import binary_accuracy


# Preprocessing/ setting up data sets

## import data from tsv and get the sequence length to use for padding

In [2]:
# import data.tsv file and create a dataframe

df = pd.read_csv('data.tsv', sep='\t')

# open file data_info.txt and read the first line

sequence_length = None
with open('data_info.txt', 'r') as f:
    line = f.readline()
    split_line_on_collen = line.split(':')
    sequence_length = int(split_line_on_collen[1])
    

## Split into train and test 

In [3]:

train_df = df.sample(frac=0.8, random_state=0)
test_df = df.drop(train_df.index)

## balance the positive and negative 

In [4]:
train_df_sit_ups = train_df[train_df['Y'] == 1]
train_df_non_sit_ups = train_df[train_df['Y'] == 0]


train_df_non_sit_ups_balanced = train_df_non_sit_ups.sample(n=len(train_df_sit_ups), random_state=0)


train_df = pd.concat([train_df_sit_ups, train_df_non_sit_ups_balanced])

test_df_sit_ups = test_df[test_df['Y'] == 1]
test_df_non_sit_ups = test_df[test_df['Y'] == 0]

test_df_non_sit_ups = test_df_non_sit_ups.sample(n=len(test_df_sit_ups), random_state=0)

test_df_balanced = pd.concat([test_df_sit_ups, test_df_non_sit_ups])
test_df = test_df_balanced

## separate x and y from test and training

In [5]:
train_x = train_df.drop(['Y'], axis=1).iloc[1:]
train_y = train_df['Y'].iloc[1:]

test_x = test_df.drop(['Y'], axis=1)


test_y = test_df['Y']

## Convert the cells in each collumn of the dataframe from string to a numpy array and convert the whole dataframe to numpy array

In [6]:
def convert_rows_to_nupy_array(df):
    numpy_array = []
    for index, row in df.iterrows():
        row_array = []
        for col in row.iteritems():
            col_float = []
            for item in col[1].split(','):
                col_float.append(float(item.replace('[', '').replace(']', '')))
            row_array.append(col_float)
        numpy_array.append(row_array)
    return np.array(numpy_array)

train_x_numpy = convert_rows_to_nupy_array(train_x)
test_x_numpy = convert_rows_to_nupy_array(test_x)

print(train_x_numpy.shape)
print(test_x_numpy[1][5])
            

(1087, 749, 3)
[ 0.51171875 -0.10406494 -0.10406494]


##### Remove Padding from numpy and create separate dataset

In [7]:
train_y_no_pad = train_y
test_y_no_pad = test_y

# remove the numpy array where you hava a [0,0,0] array

test_np_array = np.array([[1,2,1], [0,0,0], [1,2,1]])

# remove the [0,0,0] array from the numpy array test_np_array

test_np_array = test_np_array[test_np_array[:,0] != 0]



train_x_numpy_no_pad = []
# row_index = 0
for row in range(len(train_x_numpy)):
    temp = train_x_numpy[row][train_x_numpy[row][:,0] != 0]
    temp_list = []
    for col in temp:
        item_list = []
        for item in col:
            item_list.append(item)
        temp_list.append(item_list)
    train_x_numpy_no_pad.append(temp_list)
train_x_numpy_no_pad = np.array(train_x_numpy_no_pad)

test_x_numpy_no_pad = []
# row_index = 0
for row in range(len(test_x_numpy)):
    temp = test_x_numpy[row][test_x_numpy[row][:,0] != 0]
    temp_list = []
    for col in temp:
        item_list = []
        for item in col:
            item_list.append(item)
        temp_list.append(item_list)
    test_x_numpy_no_pad.append(temp_list)
test_x_numpy_no_pad = np.array(test_x_numpy_no_pad)
print(train_x_numpy_no_pad.shape)


(1087,)


  train_x_numpy_no_pad = np.array(train_x_numpy_no_pad)
  test_x_numpy_no_pad = np.array(test_x_numpy_no_pad)


## Train

## Dummy Classifier

In [8]:
# Dummy Classifer

from sklearn.dummy import DummyClassifier
from sklearn.metrics import accuracy_score

dummy_clf = DummyClassifier(strategy="most_frequent")
print(train_x_numpy.shape, train_y.shape)
dummy_clf.fit(train_x_numpy, train_y)
y_pred = dummy_clf.predict(test_x_numpy)
print(accuracy_score(test_y, y_pred))

(1087, 749, 3) (1087,)
0.5


## Recurrent Neural Classifier

### Dataset loader

In [26]:

class MyDataset(torch.utils.data.Dataset):
    def __init__(self, input,target, seq_len):
        self.input = input
        self.target = target
        self.seq_len = seq_len
    def __getitem__(self, item):
        return self.input[item], self.target.iloc[item]

    def __len__(self):
        return  self.input.shape[0]

In [24]:
class MyVariableLengthDataset(torch.utils.data.Dataset):
    def __init__(self, input,target):
        self.input = input
        self.target = target
    def __getitem__(self, item):
        # print("input item", self.input[item])
        return self.input[item], self.target.iloc[item]

        # return input[item:item+self.seq_len], input[item+self.seq_len]
    def __len__(self):
        return self.input.shape[0]

In [35]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

## Hyper parameters

In [22]:
input_size = 3                                                                           
num_classes = 1
hidden_size = 3
num_layers = 3
learning_rate = 0.001
batch_size = 1
num_epochs = 10
clip = 5

### For testing reduce to certain size

In [None]:
# reduce size of train_x_numpy_no_pad to 1000
# train_x_numpy = train_x_numpy[:50]
# train_y = train_y[:50]

# test_x_numpy = test_x_numpy[:50]
# test_y = test_y[:50]


# print how many 1 and 0 in the train_y
print(train_y.value_counts())

## Neural Network Archetecture

In [95]:
class SitUpDetector(nn.Module):
    def __init__(self,input_size, num_classes, hidden_size, num_layers,drop_prob=0.5):
        super(SitUpDetector, self).__init__()
        self.hidden_size = hidden_size
        self.hidden_max_sequence = None
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers,dropout=drop_prob, batch_first=False)
        self.dropout = nn.Dropout(drop_prob)
        self.fc = nn.Linear(hidden_size, num_classes)
        self.softmax = nn.Softmax()
        

    def forward(self, x,hidden_state):
        batch_size = x.size(0)

        # pad_sequence(x, )

        x = x.unsqueeze(0)
        out, hidden= self.lstm(x, hidden_state)
        out = self.dropout(out)
        out = self.fc(out)
        # out = self.softmax(out)


        # get the last output in out
        out = out.squeeze(0)
        out = out.squeeze(1)
        
        out = out[-1]
        out = self.softmax(out)
        return out, hidden
    def init_hidden(self, batch_size):
        hidden = (torch.zeros(self.num_layers,batch_size,self.hidden_size).to(device), torch.zeros(self.num_layers,batch_size,self.hidden_size).to(device))
        self.hidden_max_sequence = hidden
        print("hidden state", self.hidden_max_sequence[0].shape)
        return hidden
    
    def get_hidden_state_for_sequence(self, sequence):
        return tuple([self.hidden_max_sequence[0].split(sequence, dim=1),self.hidden_max_sequence[1].split(sequence, dim=1)])

    def update_hidden_state(self, hidden_state, sequence):
        self.hidden_max_sequence = (torch.cat(hidden_state[0], dim=1), torch.cat(hidden_state[1], dim=1))
        print("hidden state", self.hidden_max_sequence[0].shape)
        

## Create Model, Optimizers, dataloader and other variables

In [96]:
model = SitUpDetector(input_size, num_classes, hidden_size, num_layers).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)



myDataset = MyDataset(train_x_numpy,train_y, sequence_length)

myVariableLengthDataset = MyVariableLengthDataset(train_x_numpy_no_pad,train_y)

train_loader = torch.utils.data.DataLoader(dataset=myVariableLengthDataset, batch_size=batch_size)

losses = []
loss = None
valid_loss_min = np.Inf
total_steps = len(train_loader)


### Train Network

In [97]:

model.train()
for epoch in range(num_epochs):
    h = model.init_hidden(sequence_length)
    for i, (features, target) in enumerate(train_loader):
        features = torch.tensor(features)
        target = torch.tensor(target)

        h = model.get_hidden_state_for_sequence(features.size(0))

        # h = tuple([each.data for each in h])

        features, target = features.to(device), target.to(device)
        model.zero_grad()
        outputs, hidden = model(features.float(),h)
        model.update_hidden_state(hidden, features.size(0))
        outputs = outputs.unsqueeze(0)
        loss = criterion(outputs, target)
        nn.utils.clip_grad_norm_(model.parameters(), clip)
        loss.backward()

        optimizer.step()
        # if (i+1) % 50 == 0:
        #     val_h = model.init_hidden(batch_size)
        #     val_losses = []
        #     model.eval()
        #     for inp, lab in train_loader:
        #         inp = torch.tensor(inp)
        #         lab = torch.tensor(lab)
        #         val_h = model.init_hidden(inp.size(0))
        #         inp, lab = inp.float().to(device), lab.to(device)
        #         out, val_h = model(inp, val_h)
        #         out = out.unsqueeze(0)
        #         print("out",out)
        #         print("lab",lab)
        #         val_loss = criterion(out, lab)
        #         val_losses.append(val_loss.item())
                
        #     model.train()
        #     print("Epoch: {}/{}...".format(epoch+1, num_epochs),
        #           "Step: {}...".format(i),
        #           "Loss: {:.6f}...".format(loss.item()),
        #           "Val Loss: {:.6f}".format(np.mean(val_losses)))
        #     losses.append(loss.item())
        #     # print("why","{:.6f}".format(loss.item().detach().numpy()))

        print("loss",loss.item())
        losses.append(loss.item())
            # if np.mean(val_losses) <= valid_loss_min:
            #     torch.save(model.state_dict(), './state_dict.pt')
            #     print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(valid_loss_min,np.mean(val_losses)))
            #     valid_loss_min = np.mean(val_losses)

# graph the losses 

# get the lenght of the losses array

# create an array of the same length as the losses array with index values
for i in range(len(losses)):
    if type(losses[i]) == torch.Tensor:
        losses[i] = losses[i].detach().numpy()

y = np.array(losses)
print("plt y shpae loss", y.shape)
# print("y shpae")
# print(y)

x = np.arange(0,len(losses))
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.plot(x,y)
plt.savefig(f"loss-for-params-num-{num_layers}-learning_rate{learning_rate}-epochs-{num_epochs}.png")
plt.show()






# Confusion Matrix
# from sklearn.metrics import confusion_matrix
# import seaborn as sns
# import matplotlib.pyplot as plt

# cm = confusion_matrix(test_y, all_predictions)
# print(cm)
# sns.heatmap(cm, annot=True, fmt="d")

torch.save(model.state_dict(), 'sit_up_detector.pth')


hidden state torch.Size([3, 749, 3])


  target = torch.tensor(target)


AttributeError: 'tuple' object has no attribute 'size'

## Test Accuracy

In [None]:
# Check accuracy
test_dataset = MyDataset(train_x_numpy,train_y, sequence_length)
test_dataloader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=1)
n_correct = 0
n_samples = 0
model.eval()

all_predictions = []


test_losses = []
num_correct = 0
h = model.init_hidden(batch_size)


model.eval()
for inputs, label in train_loader:
    h = tuple([each.data for each in h])
    inputs, label = inputs.to(device), label.to(device)
    output, h = model(inputs.float(), h)
    test_loss = criterion(output, label.float())
    test_losses.append(test_loss.item())
    pred = torch.round(output.squeeze())  # Rounds the output to 0/1
    correct_tensor = pred.eq(label.float().view_as(pred))
    correct = np.squeeze(correct_tensor.cpu().numpy())
    num_correct += np.sum(correct)

print("Test loss: {:.3f}".format(np.mean(test_losses)))
test_acc = num_correct/len(test_dataloader.dataset)
print("Test accuracy: {:.3f}%".format(test_acc*100))

# Train with different network with data spread out and with simpler rnn

In [None]:
# test_flatten = train_x_numpy[0].flatten()

# # flatten the rows in train_x_numpy to 1D array
# train_x_numpy_flatten_list = []
# for i in range(len(train_x_numpy)):
#     train_x_numpy_flatten_list.append(train_x_numpy[i].flatten().tolist())

# train_x_flatten_numpy = np.array(train_x_numpy_flatten_list)

In [9]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
class MyFlattenDataset(torch.utils.data.Dataset):
    def __init__(self, input,target):
        self.input = input
        self.target = target
    def __getitem__(self, item):
        # print("input item", self.input[item])
        return self.input[item], self.target.iloc[item]

        # return input[item:item+self.seq_len], input[item+self.seq_len]
    def __len__(self):
        return self.input.shape[0]

In [10]:
class MyVariableLengthDataset(torch.utils.data.Dataset):
    def __init__(self, input,target):
        self.input = input
        self.target = target
    def __getitem__(self, item):
        # print("input item", self.input[item])
        return self.input[item], self.target.iloc[item]

        # return input[item:item+self.seq_len], input[item+self.seq_len]
    def __len__(self):
        return self.input.shape[0]

In [12]:
input_size = 3
num_classes = 2
hidden_size = 12
learning_rate = 0.001
num_epochs = 50


In [11]:
class SitUpDetectorSimpleRNN(nn.Module):
    def __init__(self,input_size, num_classes, hidden_size):
        super(SitUpDetectorSimpleRNN, self).__init__()
        self.hidden_size = hidden_size
        self.input_size = input_size
        self.in2hidden = nn.Linear(input_size + hidden_size, hidden_size)
        self.in2output = nn.Linear(input_size + hidden_size, num_classes)
    

    def forward(self, x, hidden_state):
        combined = torch.cat((x.unsqueeze(0), hidden_state),1)
        hidden = torch.sigmoid(self.in2hidden(combined))
        output = self.in2output(combined)
        return output, hidden

    def init_hidden(self):
        return nn.init.kaiming_uniform_(torch.empty(1, self.hidden_size))

In [13]:
#  create pytorch lstm variable recurrent classifier

# train SitUpDetectorVariableInput
model = SitUpDetectorSimpleRNN(input_size, num_classes, hidden_size).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

myDatasetVariableSequenceLengthDataset = MyVariableLengthDataset(train_x_numpy_no_pad,train_y)
train_loader = torch.utils.data.DataLoader(dataset=myDatasetVariableSequenceLengthDataset, batch_size=1, shuffle=False)

losses = []
loss = None
total_steps = len(train_loader)
for epoch in range(num_epochs):
    for i, (features, target) in enumerate(train_loader):
        hidden_state = model.init_hidden()
        for xyz in features:
            xyz = torch.tensor(xyz)
            output, hidden_state = model(xyz.float(), hidden_state)
        print("output ", output)
        print("output shape", output.shape)
        print("target shape", target.shape)
        loss = criterion(output, target)

        optimizer.zero_grad()
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), 1)
        optimizer.step()
    print(
            f"Epoch [{epoch + 1}/{num_epochs}], "
            f"Loss: {loss.item():.4f}"
            )
    losses.append(loss.item())


y = np.array(losses)
x = np.arange(0,len(losses))
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.plot(x,y)
plt.savefig(f"simple-rnn-loss-for-params-num-{num_layers}-learning_rate{learning_rate}-epochs-{num_epochs}.png")
plt.show()




output  tensor([[-0.1074,  0.0873]], grad_fn=<AddmmBackward0>)
output shape torch.Size([1, 2])
target shape torch.Size([1])
output  tensor([[0.0849, 0.1704]], grad_fn=<AddmmBackward0>)
output shape torch.Size([1, 2])
target shape torch.Size([1])
output  tensor([[-0.1293,  0.1040]], grad_fn=<AddmmBackward0>)
output shape torch.Size([1, 2])
target shape torch.Size([1])
output  tensor([[-0.0969,  0.1710]], grad_fn=<AddmmBackward0>)
output shape torch.Size([1, 2])
target shape torch.Size([1])
output  tensor([[-0.1368,  0.1107]], grad_fn=<AddmmBackward0>)
output shape torch.Size([1, 2])
target shape torch.Size([1])
output  tensor([[-0.1261,  0.1417]], grad_fn=<AddmmBackward0>)
output shape torch.Size([1, 2])
target shape torch.Size([1])
output  tensor([[-0.1611,  0.1517]], grad_fn=<AddmmBackward0>)
output shape torch.Size([1, 2])
target shape torch.Size([1])
output  tensor([[-0.1649,  0.1631]], grad_fn=<AddmmBackward0>)
output shape torch.Size([1, 2])
target shape torch.Size([1])
output  te

KeyboardInterrupt: 

In [None]:

myTestDatasetVariableSequenceLengthDataset = MyVariableLengthDataset(test_x_numpy_no_pad,test_y)
test_loader = torch.utils.data.DataLoader(dataset=myTestDatasetVariableSequenceLengthDataset, batch_size=1, shuffle=False)

num_correct = 0
num_samples = len(test_loader)

model.eval()

with torch.no_grad():
    for i, (features, target) in enumerate(test_loader):
        hidden_state = model.init_hidden()
        for xyz in features:
            xyz = torch.tensor(xyz)
            output, hidden_state = model(xyz.float(), hidden_state)
        _, pred = torch.max(output, dim=1)
        num_correct += bool(pred == target)

print(f"Accuracy: {num_correct / num_samples * 100:.4f}%")

# save the accuracy and model hyperparameters to a text file 

with open(f"simple-rnn-accuracy-for-params-num-{num_layers}-learning_rate{learning_rate}-epochs-{num_epochs}.txt", "w") as f:
    f.write(f"Accuracy: {num_correct / num_samples * 100:.4f}%")
    f.write(f"num_layers: {num_layers}")
    f.write(f"learning_rate: {learning_rate}")
    f.write(f"num_epochs: {num_epochs}")
    f.write(f"hidden_size: {hidden_size}")
    f.write(f"input_size: {input_size}")
    f.write(f"num_classes: {num_classes}")
    f.write(f"optimizer: {optimizer}")
    



## Hyper param tunning

In [None]:
# !pip install nbmultitask

In [16]:
# find the best hyperparameters for the model

# create a list of hyperparameters to test
import itertools


learning_rates = [0.001, 0.005,0.0005, 0.0001]
num_epochs = [200,400,800]
hidden_sizes = [2,3,6,12, 24, 36, 48]
num_classes = 2
input_size = 3
optimizers = [torch.optim.Adam]
# create a list of all possible combinations of hyperparameters
hyperparameter_combinations = list(itertools.product(learning_rates, num_epochs, hidden_sizes, optimizers))
# create a list to store the accuracy of each combination of hyperparameters
accuracy_list = []
# loop through each combination of hyperparameters








for combination in hyperparameter_combinations:

    # create pytorch lstm variable recurrent classifier
    model = SitUpDetectorSimpleRNN(input_size, num_classes, combination[2]).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = combination[3](model.parameters(), lr=combination[0])

    myDatasetVariableSequenceLengthDataset = MyVariableLengthDataset(train_x_numpy_no_pad,train_y)
    train_loader = torch.utils.data.DataLoader(dataset=myDatasetVariableSequenceLengthDataset, batch_size=1, shuffle=False)

    losses = []
    loss = None
    total_steps = len(train_loader)
    for epoch in range(combination[1]):
        for i, (features, target) in enumerate(train_loader):
            hidden_state = model.init_hidden()
            for xyz in features:
                xyz = torch.tensor(xyz)
                output, hidden_state = model(xyz.float(), hidden_state)
            loss = criterion(output, target)

            optimizer.zero_grad()
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), 1)
            optimizer.step()
        print(
                f"Epoch [{epoch + 1}/{combination[1]}], "
                f"Loss: {loss.item():.4f}"
                )
        losses.append(loss.item())



    myTestDatasetVariableSequenceLengthDataset = MyVariableLengthDataset(test_x_numpy_no_pad,test_y)
    test_loader = torch.utils.data.DataLoader(dataset=myTestDatasetVariableSequenceLengthDataset, batch_size=1, shuffle=False)

    model.eval()
    myTestDatasetVariableSequenceLengthDataset = MyVariableLengthDataset(test_x_numpy_no_pad,test_y)
    test_loader = torch.utils.data.DataLoader(dataset=myTestDatasetVariableSequenceLengthDataset, batch_size=1, shuffle=False)

    num_correct = 0
    num_samples = len(test_loader)

    model.eval()

    with torch.no_grad():
        for i, (features, target) in enumerate(test_loader):
            hidden_state = model.init_hidden()
            for xyz in features:
                xyz = torch.tensor(xyz)
                output, hidden_state = model(xyz.float(), hidden_state)
            _, pred = torch.max(output, dim=1)
            num_correct += bool(pred == target)

    print(f"Accuracy: {num_correct / num_samples * 100:.4f}%")
    accuracy_list.append(num_correct / num_samples * 100)

# display the best hyperparameters
print(f"Best hyperparameters: {hyperparameter_combinations[accuracy_list.index(max(accuracy_list))]}")


Epoch [1/200], Loss: 0.3704
Epoch [2/200], Loss: 0.1424
Epoch [3/200], Loss: 0.0913
Epoch [4/200], Loss: 0.0744
Epoch [5/200], Loss: 0.0677
Epoch [6/200], Loss: 0.0645
Epoch [7/200], Loss: 0.0626
Epoch [8/200], Loss: 0.0613
Epoch [9/200], Loss: 0.0602
Epoch [10/200], Loss: 0.0593
Epoch [11/200], Loss: 0.0585
Epoch [12/200], Loss: 0.0577
Epoch [13/200], Loss: 0.0569
Epoch [14/200], Loss: 0.0562
Epoch [15/200], Loss: 0.0555
Epoch [16/200], Loss: 0.0549
Epoch [17/200], Loss: 0.0543
Epoch [18/200], Loss: 0.0537
Epoch [19/200], Loss: 0.0531
Epoch [20/200], Loss: 0.0525
Epoch [21/200], Loss: 0.0519
Epoch [22/200], Loss: 0.0514
Epoch [23/200], Loss: 0.0508
Epoch [24/200], Loss: 0.0503
Epoch [25/200], Loss: 0.0498
Epoch [26/200], Loss: 0.0494
Epoch [27/200], Loss: 0.0490
Epoch [28/200], Loss: 0.0486
Epoch [29/200], Loss: 0.0482
Epoch [30/200], Loss: 0.0479
Epoch [31/200], Loss: 0.0476
Epoch [32/200], Loss: 0.0473
Epoch [33/200], Loss: 0.0470
Epoch [34/200], Loss: 0.0467
Epoch [35/200], Loss: 0

KeyboardInterrupt: 

In [19]:
best_hyperparameters = hyperparameter_combinations[accuracy_list.index(max(accuracy_list))]
print(f"Best hyperparameters: {best_hyperparameters}")
print(f"Best accuracy: {max(accuracy_list)}")

Best hyperparameters: (0.001, 200, 12, <class 'torch.optim.adam.Adam'>)
Best accuracy: 79.23728813559322


# Multivariate classifier

### Imports

In [None]:

from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline

from sktime.classification.compose import ColumnEnsembleClassifier
from sktime.classification.dictionary_based import BOSSEnsemble
from sktime.classification.interval_based import TimeSeriesForestClassifier
# from sktime.classification.shapelet_based import MrSEQLClassifier
from sktime.datasets import load_basic_motions
from sktime.transformations.panel.compose import ColumnConcatenator


from sktime.datatypes._panel._convert import (
    from_3d_numpy_to_nested,
    from_multi_index_to_3d_numpy,
    from_nested_to_3d_numpy,
)

### Test with example

In [None]:
X, y = load_basic_motions(return_X_y=True)
print(X.shape)
print(y.shape)
print(y[0])
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
X_train.head()
np.unique(y_train)


steps = [
    ("concatenate", ColumnConcatenator()),
    ("classify", TimeSeriesForestClassifier(n_estimators=100)),
]
clf = Pipeline(steps)
clf.fit(X_train, y_train)
score = clf.score(X_test, y_test)
print("score", score)

clf = ColumnEnsembleClassifier(
    estimators=[
        ("TSF0", TimeSeriesForestClassifier(n_estimators=100), [0]),
        ("BOSSEnsemble3", BOSSEnsemble(max_ensemble_size=5), [3]),
    ]
)
clf.fit(X_train, y_train)
score = clf.score(X_test, y_test)

print("score ", score)

# clf = MrSEQLClassifier()
# clf.fit(X_train, y_train)
# clf.score(X_test, y_test)

## implement on my data

### convert current dataset to sktime format

In [None]:
# import data.tsv file and create a dataframe

df = pd.read_csv('data.tsv', sep='\t')

# open file data_info.txt and read the first line

sequence_length = None
with open('data_info.txt', 'r') as f:
    line = f.readline()
    split_line_on_collen = line.split(':')
    sequence_length = int(split_line_on_collen[1])

In [None]:

train_df = df.sample(frac=0.8, random_state=0)
test_df = df.drop(train_df.index)
train_x = train_df.drop(['Y'], axis=1).iloc[1:]
train_y = train_df['Y'].iloc[1:]
y = train_y.to_numpy()


In [None]:
def convert_rows_to_nupy_array(df):
    numpy_array = []
    for index, row in df.iterrows():
        row_array = []
        col_x = []
        col_y = []
        col_z = []
        for col in row.iteritems():
            col_float = []
            for item in col[1].split(','):
                col_float.append(float(item.replace('[', '').replace(']', '')))
            col_x.append(col_float[0])   
            col_y.append(col_float[1])
            col_z.append(col_float[2]) 
        row_array.append(col_x)
        row_array.append(col_y)
        row_array.append(col_z)
        numpy_array.append(row_array)
    return np.array(numpy_array)

train_x_numpy = convert_rows_to_nupy_array(train_x)

print(train_x_numpy.shape)

In [None]:

multivariate_dataframe_train_x = from_3d_numpy_to_nested(train_x_numpy_x_y_z_separate_arrays)
# multivariate_dataframe_test_x = from_3d_numpy_to_nested(multivariate_list_np_test)
print(multivariate_dataframe_train_x.shape)

In [None]:

X_train, X_test, y_train, y_test = train_test_split(multivariate_dataframe_train_x, y, random_state=42)


steps = [
    ("concatenate", ColumnConcatenator()),
    ("classify", TimeSeriesForestClassifier(n_estimators=100)),
]
clf = Pipeline(steps)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
score = clf.score(X_test, y_test)
print("score is", score)


clf = ColumnEnsembleClassifier(
    estimators=[
        ("TSF0", TimeSeriesForestClassifier(n_estimators=100), [0]),
        ("BOSSEnsemble3", BOSSEnsemble(max_ensemble_size=5,n_jobs=-1), [2]),
    ]
)
clf.fit(X_train, y_train)
score = clf.score(X_test, y_test)
print(score)

# Convulational Neural Network

In [95]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# Define the CRNN architecture
class CRNN(nn.Module):
    def __init__(self):
        super(CRNN, self).__init__()
        
        # Define the convolutional layers
        self.conv1 = nn.Conv1d(in_channels=3, out_channels=16, kernel_size=5)
        self.conv2 = nn.Conv1d(in_channels=16, out_channels=32, kernel_size=5)
        
        # Define the pooling layers
        self.pool = nn.MaxPool1d(2, 2)
        
        # Define the RNN
        self.rnn = nn.LSTM(input_size=32 * 8 * 1, hidden_size=128, num_layers=2, batch_first=True)
        
        # Define the fully connected layer
        self.fc = nn.Linear(128, 2)

    def forward(self, x):
        # Apply the convolutional and pooling layers
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        
        # Flatten the output of the pooling layers
        x = x.view(x.size(0), -1, 32 * 8 * 1)
        
        # Apply the RNN
        x, _ = self.rnn(x)
        
        # Take the output from the final time step of the RNN
        x = x[:, -1, :]
        
        # Apply the fully connected layer
        x = self.fc(x)
        
        return x

# Create an instance of the CRNN
model = CRNN()

# Print a summary of the model
print(model)

CRNN(
  (conv1): Conv1d(3, 16, kernel_size=(5,), stride=(1,))
  (conv2): Conv1d(16, 32, kernel_size=(5,), stride=(1,))
  (pool): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (rnn): LSTM(256, 128, num_layers=2, batch_first=True)
  (fc): Linear(in_features=128, out_features=2, bias=True)
)


In [96]:
# Define the CRNN model
model = CRNN()

# Define the loss function and the optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())

# Train the model on the training set
for epoch in range(num_epochs):
    # Iterate over the training data
    for data, labels in train_loader:
        # Feed the data into the model
        output = model(data)
        
        # Compute the loss
        loss = criterion(output, labels)
        
        # Update the model's parameters
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    # Evaluate the model on the validation set
    with torch.no_grad():
        correct = 0
        total = 0
        for data, labels in val_loader:
            # Feed the data into the model
            output = model(data)
            
            # Compute the accuracy
            _, predicted = torch.max(output.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
        # Print the validation accuracy
        print("Epoch {}: Validation Accuracy = {}".format(epoch, correct / total))


type input item <class 'list'>
type target item <class 'numpy.int64'>


TypeError: conv1d() received an invalid combination of arguments - got (list, Parameter, Parameter, tuple, tuple, tuple, int), but expected one of:
 * (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, tuple of ints padding, tuple of ints dilation, int groups)
      didn't match because some of the arguments have invalid types: (!list!, !Parameter!, !Parameter!, !tuple!, !tuple!, !tuple!, int)
 * (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, str padding, tuple of ints dilation, int groups)
      didn't match because some of the arguments have invalid types: (!list!, !Parameter!, !Parameter!, !tuple!, !tuple!, !tuple!, int)


In [None]:
from sklearn.model_selection import GridSearchCV

# Define the hyperparameters to search over
hyperparameters = {'learning_rate': [1e-3, 1e-4],
                    'hidden_size': [128, 256, 512],
                    'num_layers': [1, 2, 3]}

# Use grid search to find the best hyperparameters
model = GridSearchCV(CRNN, hyperparameters, cv=3)
model.fit(X_train, y_train)

# Print the best set of hyperparameters
print(model.best_params_)

# possible why to infer on device

In [None]:
# Export the trained model
torch.jit.save(model, 'model.pt')

# Load the trained model on the Android device
model = torch.jit.load('model.pt')

# Use the model to make predictions on new data
output = model(x)

# Convert the model to ONNX
torch.onnx.export(model, x, 'model.onnx')
