In [16]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import confusion_matrix
from tqdm import tqdm
import os

In [17]:
pd.set_option('Display.max_rows',None) # print all the rows
pd.set_option('Display.max_columns',None) # print all the columns

In [18]:
def pred_data():
    path = "../dataset/insect/ant/predictions" #path of objective folder
    files = os.listdir(path) # get all the files in the folder

    if ('.DS_Store' in files):
        files.remove('.DS_Store')

    index = 0

    #print(files)
    y_pred = []
    for file in files: # iterator for traversing all the files
        if not os.path.isdir(file): # check if it is a file  
            f = open(path+"/"+file) # open the file
            data = pd.read_csv(f) 
            #print(data)
            if (index == 0):
                y_pred = data
            else:
                y_pred = pd.concat([y_pred, data], axis = 1)
            index += 1
    
    return y_pred, y_pred.columns.values.tolist()

In [19]:
def matrix_to_1d(data):
    n, p = data.shape
    return_list = data.iloc[:,0]
    for name, columns in data.iteritems():
        return_list = pd.concat([return_list, columns], axis = 0)
    return_list = return_list.values
    return return_list

In [20]:
def differentiation(data):
    return_matrix = []
    for name, columns in data.iteritems():
        columns = columns.values
        diff = np.diff(columns)
        return_matrix.append(diff)
    return_matrix = pd.DataFrame(return_matrix)
    return_matrix = return_matrix.T
    return return_matrix

In [21]:
def accuracy_change(data, pred):
    assert len(data) == len(pred)
    nb_prediction = 0 #number of times that prediction of an ant going to another chamber is correct
    tp_tn = 0 #total number of times an ant goes to another chamber
    for j in range (len(pred)):
        if (data[j] != 0) or (pred[j] != 0):
            nb_prediction += 1
            if (data[j] == pred[j]) :
                tp_tn += 1
    try: 
        return tp_tn/nb_prediction
    except Exception as reason:
        print(reason)

In [22]:
def recall(data, pred):
    assert len(data) == len(pred)
    n = 0 #number of times that prediction of an ant going to another chamber is correct
    d = 0 #total number of times an ant goes to another chamber
    for j in range (len(pred)):
        if (data[j] != 0):
            n = n + 1
            if (pred[j] == data[j]) :
                d = d + 1
    try: 
        return d/n
    except Exception as reason:
        print(reason)

In [23]:
def precision(data, pred):
    assert len(data) == len(pred)
    n = 0 #number of times that prediction of an ant going to another chamber is correct
    d = 0 #total number of times an ant goes to another chamber
    for j in range (len(pred)):
        if (pred[j] != 0):
            n = n + 1
            if (pred[j] == data[j]) :
                d = d + 1
    try: 
        return d/n
    except Exception as reason:
        print(reason)

In [24]:
def f1_score(precision, recall):
    try:
        return (2* precision * recall/(precision+recall))
    except Exception as reason:
        print(reason)

In [25]:
pred, columns_name = pred_data()
pred.to_csv('../dataset/insect/ant/prediction.csv')
pred_training = pred[int(len(pred)*0.5):]
pred_testing = pred[:int(len(pred)*0.5)]

In [26]:
y_pred = matrix_to_1d(pred_testing)

In [27]:
data = pd.read_csv('../dataset/insect/ant/time_series_chamber.csv')
data = data[columns_name]
true_training = data[int(len(data)*0.5):]
true_testing = data[:int(len(data)*0.5)]
print(len(columns_name))

200


In [28]:
y_true = matrix_to_1d(true_testing)

In [29]:
y_true_diff = differentiation(true_testing)
y_pred_diff = differentiation(pred_testing)
y_pred_diff.to_csv('../dataset/insect/ant/prediction_diff.csv')
y_true_diff.to_csv('../dataset/insect/ant/true_diff.csv')

y_true_diff = matrix_to_1d(y_true_diff)
y_pred_diff = matrix_to_1d(y_pred_diff)
y_pred_diff = y_pred_diff.astype(int)

In [30]:
# Accuracy = (TP+TN)/(TP+FP+TN+FN)
# Percentage of time when you correctly predict the chamber.
from sklearn.metrics import accuracy_score
print('Percentage of time when you correctly predict the chamber.')
print(accuracy_score(y_true, y_pred))
print('-----------')

# Percentage of time when you correctly predict going to another chamber.
acc_change = accuracy_change(y_true_diff, y_pred_diff)
print('Percentage of time when you correctly predict going to another chamber.')
print(acc_change)
print('-----------')

# recall=TP/(TP+FN)
recl = recall(y_true_diff, y_pred_diff)
print('recall:', recl)
print('-----------')

# precision=TP/(TP+FP)
prec = precision(y_true_diff, y_pred_diff)
print('precision:', prec)
print('-----------')

# f1 = 2*(precision*recall)/(precision+recall)
f1 = f1_score(prec, recl)
print('f1-score', f1)

Percentage of time when you correctly predict the chamber.
0.9999101713653953
-----------
Percentage of time when you correctly predict going to another chamber.
0.8820638820638821
-----------
recall: 0.9397905759162304
-----------
precision: 0.9348958333333334
-----------
f1-score 0.93733681462141
