In [16]:
import os
import numpy as np
from keras.models import load_model
from models import Recurrent2DConvNet, log10
from tqdm import tqdm
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import pandas as pd
import pickle

In [2]:
data_root = '/home/seth/datasets/gut'
feats = 'stft'
total_dur = 3
delta_time = 1.0

In [3]:
classes = sorted(['anxiety', 'baseline', 'concentration', 'digestion', 'disgust', 'frustration'])
n_classes = len(classes)
int2cls = dict(zip(range(len(classes)), classes))
cls2int = dict(zip(classes, range(len(classes))))

path = os.path.join(data_root, feats)
paths = []
labels = []

for sub_dir in os.listdir(path):
    for class_dir in os.listdir(os.path.join(path, sub_dir)):
        cls_path = os.path.join(path, sub_dir, class_dir)
        files = sorted(os.listdir(cls_path), key=lambda x: int(x.split('.')[0]))
        if len(files) < total_dur:
            continue
        mod = len(files)%total_dur
        orig_len = len(files)
        for i in range(0, orig_len-mod-total_dur, total_dur):
            paths.append(os.path.join(cls_path, files[i]))
            labels.append(cls2int[class_dir])

In [4]:
sample = np.load(paths[0])

feat_dim = int(delta_time*100)
time_dim = int(total_dur*sample.shape[0]/feat_dim)
input_shape = (time_dim, feat_dim, sample.shape[1], 1)
print('input shape: {}'.format(input_shape))

input shape: (3, 100, 128, 1)


In [5]:
model = load_model(os.path.join('models', 'RCNN.model'), custom_objects={'log10': log10})
model.summary()

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 3, 100, 128, 1)    0         
_________________________________________________________________
scale_input (TimeDistributed (None, 3, 100, 128, 1)    0         
_________________________________________________________________
conv_block_1 (Sequential)    (None, 3, 50, 64, 8)      1248      
_________________________________________________________________
conv_block_2 (Sequential)    (None, 3, 25, 32, 16)     5808      
_________________________________________________________________
conv_block_3 (Sequential)    (None, 3, 12, 16, 32)     23136     
_______________________________________

In [6]:
def isolate_sub(string):
    for _ in range(2):
        string = os.path.split(string)[0]
    return os.path.split(string)[-1]

In [7]:
def isolate_cls(string):
    for _ in range(1):
        string = os.path.split(string)[0]
    return os.path.split(string)[-1]

In [8]:
def predict_path(path):

    base = os.path.split(path)[0]
    start_ix = int(os.path.split(path)[-1].split('.npy')[0])
    frames = []
    for t in range(total_dur):
        ix = str(start_ix + (t*100))
        path = os.path.join(base, ix+'.npy')
        x = np.load(path)
        frames.append(x)

    x = np.concatenate(frames, axis=0)

    frames = []
    for z in range(0, input_shape[0]*input_shape[1], input_shape[1]):
        _slice = x[z:z+input_shape[1],:]
        _slice = np.expand_dims(_slice, axis=0)
        frames.append(_slice)
    x = np.concatenate(frames, axis=0)
    x = np.expand_dims(x, axis=3)
    x = np.expand_dims(x, axis=0)

    y_hat = model.predict(x)
    return np.argmax(y_hat)

In [9]:
sub2data = {}

for sub in tqdm(sorted(os.listdir(path))):
    sub2data[sub] = []
    
    for _path in paths:
        if sub == isolate_sub(_path):
            cls = isolate_cls(_path)
            sub2data[sub].append([_path, cls2int[cls]])

100%|██████████| 65/65 [00:09<00:00,  6.79it/s]


### Predict accuracies for each subject

In [10]:
results = []

for sub in tqdm(sorted(os.listdir(path))):
    y_true = []
    y_pred = []
    
    for _path, label in sub2data[sub]:
        y_true.append(label)
        y_pred.append(predict_path(_path))
        
    acc = str(round(accuracy_score(y_true, y_pred), 4))
    
    results.append([sub, acc])

100%|██████████| 65/65 [03:12<00:00,  2.48s/it]


In [14]:
df = pd.DataFrame(results, columns=['subject', 'acc'])

In [15]:
df.to_csv(os.path.join('results', 'subject_accuracies.csv'), index=None)

This will output a csv file in the results directory with accuracies for each subject. Just viewing accuracy isn't particularly useful in this case. Something more meaningful would be the results from cross validation. CV will allow each subject to be used in a test set exactly once, so we get an idea of how well classificaiton performs on unseen subjects.

If you run python main.py and swap in train_cv(args) this will run cv for n_folds (usually 10) and save mean accuracy and standard deviation scores (train and test) for each fold. You have to move subjects you don't want trained on into another directory. I ran cross validation for both the ordered and unorder subjects. Their results are found in the pickle files. Both were trained for 10 epochs.

#### Ordered subjects

In [34]:
with open(os.path.join('results', 'cv_scores_p.pkl'), 'rb') as handle:
    data = pickle.load(handle)

In [35]:
_mean = str(round(np.mean(data['train']), 3))
_std = str(round(np.std(data['train']), 3))
print('train accuracy: {} +/- {}'.format(_mean, _std))

_mean = str(round(np.mean(data['test']), 3))
_std = str(round(np.std(data['test']), 3))
print(' test accuracy: {} +/- {}'.format(_mean, _std))

train accuracy: 0.833 +/- 0.019
 test accuracy: 0.362 +/- 0.049


#### Unordered subjects

In [36]:
with open(os.path.join('results', 'cv_scores_r.pkl'), 'rb') as handle:
    data = pickle.load(handle)

In [37]:
_mean = str(round(np.mean(data['train']), 3))
_std = str(round(np.std(data['train']), 3))
print('train accuracy: {} +/- {}'.format(_mean, _std))

_mean = str(round(np.mean(data['test']), 3))
_std = str(round(np.std(data['test']), 3))
print(' test accuracy: {} +/- {}'.format(_mean, _std))

train accuracy: 0.704 +/- 0.039
 test accuracy: 0.236 +/- 0.069


### Conclusion

There are two reasons I can think of why test accuracies might be lower than train accuracies.

1. Bowl sounds are unique and 50 subjects is not enough to estimate a population.
2. Systematic error is being introduced in regards to stethoscope placement on the belly. Sounds can be coming from different places in the intestine, so one side might capture entirely different data from the other. This might cause an inconsistent measure of bowl sounds such that each patient is very unique and the true source of data is hidden from us.

The good thing here is that test accuracy is poor for both ordered and unordered subjects. So it seems classification isn't entirely biased due to a decreasing voltage from the sensor over time like we thought. Also, the drop in accuracy between the two sets does suggest that an increase in data size might help. (meaning we are not capturing a population and gut sounds are unique).

Another thing to mention is the accuracy around 80% from the model results notebook. It seems there are real differences between mental states and gut sounds. As long as the stethoscope's position wasn't moved between activities, then it's possible a mind gut connection does exist to some degree; however, this is not certain since generalization to new data seems to fail in the cross validation. It is also possible that events of interest which cause class separation are sparse. This might be able to be detected by using a larger total duration of time. This could lead to a higher degree of overfitting, but it is worth looking into. The main issue would be the input size into the network will start to consume too much memory.