In [14]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Conv1D
from keras.layers import LSTM
from keras.layers import MaxPooling1D
from keras.layers import Dropout
from keras.layers import SpatialDropout1D
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
from sklearn.model_selection import train_test_split
from tensorflow import set_random_seed
from numpy.random import seed
from sklearn.model_selection import KFold

In [15]:
X = np.load('compressed_final_data/copulation_segmented_train.npz')['data']
y = np.load('compressed_final_data/copulation_segmented_train_label.npz')['data']

In [16]:
print('Number of trajectories:', X.shape[0])
print('Number of trajectories with copulation:', sum(y)[0])

Number of trajectories: 13303
Number of trajectories with copulation: 7640


In [17]:
# Remove any trajectory with a nan for any fly in either x or y position
mask = np.any(np.isnan(X), axis=1)
mask = np.all(np.equal(mask, 0), axis=1)
X = X[mask]
y = y[mask]

In [18]:
print('Number of trajectories after processing:', X.shape[0])
print('Number of trajectories with copulation after processing:', sum(y)[0])

Number of trajectories after processing: 13303
Number of trajectories with copulation after processing: 7640


In [6]:
# # Split the data into a train and test set
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

In [7]:
# Split the train set into a train set and a validation set
# X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1)

In [19]:
X_train_1, X_train_2, y_train_1, y_train_2 = train_test_split(X, y, test_size=0.5, shuffle=False)

In [20]:
# Training 5 models for labelling (trains on 50% of the data)
for i in range(10):
    seed(i)
    set_random_seed(i+5)
    if i in [0,1,2,3,4]:
        X_train, y_train = X_train_1, y_train_1
    else:
        X_train, y_train = X_train_2, y_train_2
    model = Sequential()
    model.add(Conv1D(filters=32, kernel_size=3, padding='same', activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Dropout(0.2))
    model.add(LSTM(10, input_shape=(100,4), return_sequences=True))
    model.add(SpatialDropout1D(0.2))
    model.add(LSTM(5, input_shape=(100,4)))
    model.add(Dense(2, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    model.fit(X_train, y_train, epochs=10, batch_size=128)
    print(model.summary())
    name = "classifier_" + str(i) + ".h5"
    model.save(name)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_2 (Conv1D)            (None, 100, 32)           416       
_________________________________________________________________
max_pooling1d_2 (MaxPooling1 (None, 50, 32)            0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 50, 32)            0         
_________________________________________________________________
lstm_3 (LSTM)                (None, 50, 10)            1720      
_________________________________________________________________
spatial_dropout1d_2 (Spatial (None, 50, 10)            0         
_________________________________________________________________
lstm_4 (LSTM)                (None, 5)                 320       
_______________________________

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_6 (Conv1D)            (None, 100, 32)           416       
_________________________________________________________________
max_pooling1d_6 (MaxPooling1 (None, 50, 32)            0         
_________________________________________________________________
dropout_6 (Dropout)          (None, 50, 32)            0         
_________________________________________________________________
lstm_11 (LSTM)               (None, 50, 10)            1720      
_________________________________________________________________
spatial_dropout1d_6 (Spatial (None, 50, 10)            0         
_________________________________________________________________
lstm_12 (LSTM)               (None, 5)                 320       
_______________________________

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_10 (Conv1D)           (None, 100, 32)           416       
_________________________________________________________________
max_pooling1d_10 (MaxPooling (None, 50, 32)            0         
_________________________________________________________________
dropout_10 (Dropout)         (None, 50, 32)            0         
_________________________________________________________________
lstm_19 (LSTM)               (None, 50, 10)            1720      
_________________________________________________________________
spatial_dropout1d_10 (Spatia (None, 50, 10)            0         
_________________________________________________________________
lstm_20 (LSTM)               (None, 5)                 320       
_______________________________

In [9]:
# Predict on the test set and round predictions
y_pred = np.rint(model.predict(X_test))

In [10]:
# Find the correct and incorrect predictions
correct = y_pred[y_test == y_pred]
incorrect = y_pred[y_test != y_pred]

In [11]:
# Compute the accuracy for the model
accuracy = len(correct) /2w (len(correct) + len(incorrect))
print('Accuracy: {0:.2f}%'.format(accuracy * 100))

Accuracy: 98.98%


In [19]:
# Compute the recall for the model
true_positives = correct[correct == 1]
false_positives = incorrect[incorrect == 1]
recall = len(true_positives) / (len(false_positives) + len(true_positives))
print('Recall: {0:.4f}%'.format(recall * 100))

Recall: 96.8172%


In [24]:
# Compute the precision for the model
false_negatives = incorrect[incorrect == 0]
precision = len(true_positives) / (len(false_negatives) + len(true_positives))
print('Precision: {0:.4f}%'.format(precision * 100))

Precision: 99.6829%


In [18]:
print('Number of trajectories with copulation:', sum(y_test)[0])
print('Number of trajectories classified as copulation:', int(sum(y_pred)[0]))

Number of trajectories with copulation: 946
Number of trajectories classified as copulation: 974
