In [6]:
%reset -f
from wavhandler import *
from utils import *

import pandas as pd
import numpy as np
import pickle
import multiprocessing
import matplotlib.pyplot as plt
import seaborn as sn
from sklearn.metrics import confusion_matrix
sn.set()

import logging
logger = logging.getLogger()
logger.propagate = False
logger.setLevel(logging.ERROR)
np.random.seed(0)

# Loading model and indexes of signals it was trained on

In [7]:
loaded_model = pickle.load(open("pima.pickle.dat", "rb"))
model_idx = pd.read_csv('pima_idx_used.csv', index_col=0, header=None)

## Creating a dataframe of all signals that the model has never seen

In [8]:
df_mosquitos = pd.read_pickle('./data/mosquitos.pkl')
df_test = df_mosquitos[~df_mosquitos.fname.isin(model_idx.iloc[:,0].values.tolist())] # NEGATION - signals that are not in model_idx
df_test.head()

Unnamed: 0,fname,pow0,pow1,pow2,fr0,fr1,fr2,damping_0,damping_1,damping_2,names,pathlen,fnamelen,temp,humd,label1,label2
0,F161205_161248_000_G_050,0.037316,0.034266,0.000787,682.617188,1364.257812,1977.539062,0.032904,0.021475,0.007901,/home/yannis/data/insects/Potamitis/Wingbeats/...,11,5,,,Culex,C. pipiens
2,F161205_161254_002_G_050,0.08831,0.135539,0.003797,448.242188,890.625,1368.164062,0.052288,0.029057,0.009636,/home/yannis/data/insects/Potamitis/Wingbeats/...,11,5,,,Culex,C. pipiens
4,F161205_161313_004_G_050,0.138603,0.093639,0.021507,565.429688,1126.953125,1684.570312,0.037997,0.020364,0.013913,/home/yannis/data/insects/Potamitis/Wingbeats/...,11,5,,,Culex,C. pipiens
5,F161205_161316_005_G_050,0.13604,0.019107,0.003176,625.976562,1246.09375,1849.609375,0.048362,0.014498,0.017423,/home/yannis/data/insects/Potamitis/Wingbeats/...,11,5,,,Culex,C. pipiens
6,F161205_161329_006_G_050,0.044235,0.163418,0.015739,603.515625,1207.03125,1811.523438,0.038026,0.018608,0.013477,/home/yannis/data/insects/Potamitis/Wingbeats/...,11,5,,,Culex,C. pipiens


## Getting signals from this dataframe to create our test matrix

In [11]:
sz = 10000 # get this many samples from each class (signals never seen)
df_an = make_df_parallel(df_test, setting='psd', insect_class='Anopheles', sample_size=sz).T
df_an['label'] = 'an'
df_ae = make_df_parallel(df_test, setting='psd',insect_class='Aedes', sample_size=sz).T
df_ae['label'] = 'ae'
df_cu = make_df_parallel(df_test, setting='psd',insect_class='Culex', sample_size=sz).T
df_cu['label'] = 'cu'

### Cleaning up this dataframe

In [12]:
df_concat = pd.concat([df_ae, df_an, df_cu], axis=0, sort=False)
print(df_concat.shape)
df_concat.dropna(how='all', axis=1, inplace=True)
df_concat.dropna(how='any', axis=0, inplace=True)
print(df_concat.shape)
df_concat.head()
labelarray = df_concat.label.values
df_concat.drop('label', axis=1, inplace=True)
print(labelarray.shape)

(30000, 130)
(30000, 130)
(30000,)


# Making predictions on our test dataframe using the loaded model

In [15]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
import xgboost
from sklearn.metrics import accuracy_score
from sklearn.utils import shuffle

X, y = shuffle(df_concat.values, labelarray, random_state=3)
loaded_model_ypred = loaded_model.predict(X)
ac = accuracy_score(y, loaded_model_ypred)
print("ACCURACY SCORE: {0:.3f}".format(ac))

ACCURACY SCORE: 0.856
