In [37]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.dummy import DummyClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.datasets import make_classification
import os
import csv
import struct
import numpy as np
from scipy.interpolate import interp1d
from typing import List, Tuple
from numpy.core import ndarray
from dataclasses import dataclass
from sklearn.model_selection import LeaveOneOut
import math
from sklearn.utils import shuffle
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV
from sklearn.cluster import KMeans

In [None]:
def class_name_to_numeric(cn: str) -> int:
    if cn == "SARS-CoV":
        return 0
    elif cn == "SARS-CoV-2":
        return 1
    elif cn == "MERS-CoV":
        return 2
    elif cn == "HCoV-229E":
        return 3
    else:
        return 4

def class_name_to_numeric_p_n(cn: str) -> int:
    if cn == "Positive":
        return 0
    elif cn == "Negative":
        return 1
    else:
        return 4

def numeric_to_class_name(cn: int) -> str:
    if cn == 0:
        return "SARS-CoV"
    elif cn == 1:
        return "SARS-CoV-2"
    elif cn == 2:
        return "MERS-CoV"
    elif cn == 3:
        return "HCoV-229E"
    else:
        return "Boh"

In [None]:
@dataclass
class Event:
    """Improved feature of single event"""
    class_name: str
    amplitude: float
    d10: float
    d20: float
    d30: float
    d40: float
    d50: float
    d60: float
    d70: float
    d80: float
    d90: float
    c10: float
    c20: float
    c30: float
    c40: float
    c50: float
    c60: float
    c70: float
    c80: float
    c90: float

def event_as_list_no_class_name(e: Event):
    return [e.amplitude, 
            e.d10,e.d20, e.d30, e.d40, e.d50, e.d60, e.d70, e.d80, e.d90,
            e.c10,e.c20, e.c30, e.c40, e.c50, e.c60, e.c70, e.c80, e.c90]

In [64]:
# desktop_folder = os.path.join("C:\\", "Users", "Luca Rossi", "Desktop")
desktop_folder = os.path.join("/home", "luca", "Desktop")
results_folder = os.path.join(desktop_folder, "RESULTS")
virus_folders = ["SARS-CoV", "SARS-CoV-2", "MERS-CoV", "HCoV-229E"]
random_state = 69

def open_dat(filename):
    f = open(filename, "rb")
    f_cont = f.read()
    f.close()
    raw = struct.unpack("d" * (len(f_cont) // 8), f_cont)
    return np.array(raw)

def extract_lengths(filename):
    with open(filename) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        line_count = 0
        events_lengths = []
        for row in csv_reader:
            if len(row) > 1:
                if line_count > 1 and len(row) == 2:
                    events_lengths.append(int(row[1]) - int(row[0]))
            line_count+=1
        return events_lengths

def extract_raw_events(dir_name) -> List[ndarray]:
    events = []
    files = os.listdir(dir_name)
    if len(files) == 0:
        print(dir_name)
        return
    dat_file = [os.path.join(dir_name, f) for f in files if f.endswith(".dat")].pop()
    details_file = [os.path.join(dir_name, f) for f in files if f.endswith(".csv")].pop()
    # caricamento eventi dal singolo file    
    loaded_events = open_dat(dat_file)
    # caricamento dettagli file
    events_length = extract_lengths(details_file)  
    b = 0
    for ev_len in events_length:
        e = b + ev_len
        event = np.array(loaded_events[b:e])
        b = e
        events.append(event)
    return events

def duration_x_and_center(event: ndarray, baseline, amplitude, percentage):
    event = event - baseline
    event = np.concatenate(([0],event,[0]))
    event_x = np.nonzero(event < amplitude * (1 - percentage) )[0]
    dx = event_x.size
    if dx % 2 != 0:
        return dx, event_x[dx//2]
    m1 = dx // 2
    m0 = m1 - 1
    return dx, (event_x[m1] + event_x[m0]) / 2 
    
def calc_baseline(event: ndarray) -> float:
    ev_len = event.size
    x_baseline = np.concatenate((event[:round(ev_len/5*0.2)], event[round(ev_len - ev_len/5*0.2):]))
    return np.mean(x_baseline)

def extract_events(raw_events: List[ndarray], class_name) -> Tuple[str, List[Event]]:
    if raw_events is None:
        return []
    events = []
    for raw_event in raw_events:
        baseline = calc_baseline(raw_event)
#         remove last first and last 2/5 which are padding
        raw_event = raw_event[raw_event.size // 5 * 2:raw_event.size // 5 * 3]
        peak = raw_event.max()
        amplitude = peak-baseline
        d10, c10 = duration_x_and_center(raw_event, baseline, amplitude, 0.1)
        d20, c20 = duration_x_and_center(raw_event, baseline, amplitude, 0.2)
        d30, c30 = duration_x_and_center(raw_event, baseline, amplitude, 0.3)
        d40, c40 = duration_x_and_center(raw_event, baseline, amplitude, 0.4)
        d50, c50 = duration_x_and_center(raw_event, baseline, amplitude, 0.5)
        d60, c60 = duration_x_and_center(raw_event, baseline, amplitude, 0.6)
        d70, c70 = duration_x_and_center(raw_event, baseline, amplitude, 0.7)
        d80, c80 = duration_x_and_center(raw_event, baseline, amplitude, 0.8)
        d90, c90 = duration_x_and_center(raw_event, baseline, amplitude, 0.9)
        events.append(Event(class_name, amplitude,
                              d10, d20, d30, d40, d50, d60, d70, d80, d90,
                              c10, c20, c30, c40, c50, c60, c70, c80, c90))
    return events
        
def get_classes_and_paths(results_folder: List[str], virus_folders:List[str]):
    class_and_path_to_virus_dir = [ (v, os.path.join(results_folder, v)) for v in virus_folders]
    classes_and_paths = []
    for v, p in class_and_path_to_virus_dir:
        for new_p in [os.path.join(p, d) for d in os.listdir(p)]:
            classes_and_paths.append((v, new_p))            
    return classes_and_paths

def predict_entire_file(clf, events: List[Event]):
    results = {"SARS-CoV": 0, "SARS-CoV-2": 0, "MERS-CoV": 0, "HCoV-229E": 0}
    features = [event_as_list_no_class_name(e) for e in events]
    predictions = clf.predict(features)
    results["SARS-CoV"] = len([p for p in predictions if p == 0])
    results["SARS-CoV-2"] = len([p for p in predictions if p == 1])
    results["MERS-CoV"] = len([p for p in predictions if p == 2])
    results["HCoV-229E"] = len([p for p in predictions if p == 3])
    print(results)
    return max(results, key=results.get)

def get_test_and_train(c_p_e, idx, l):
    test = []
    c_p_e_without_test = [c_p_e[i] for i in range(len(c_p_e)) if i != idx]
    train = []
    for c in virus_folders:
        train_with_class_c = [(ct,_,e) for ct,_,e in c_p_e_without_test if ct == c]
        events = [item for sublist in [e for _, _, e in train_with_class_c] for item in sublist]
        events = shuffle(events, random_state=random_state)
        train+=events[:l]
        test.append((c,events[l:]))
    assert(l * 4 == len(train))
    train = shuffle(train,random_state=random_state)
    return test, train
    

def predict_entire_file_p_n(clf, events: List[Event]):
    results = {"Positive": 0, "Negative": 0}
    features = [event_as_list_no_class_name(e) for e in events]
    predictions = clf.predict(features)
    results["Positive"] = len([p for p in predictions if p == "Positive"])
    results["Negative"] = len([p for p in predictions if p == "Negative"])
    print(results)
    return max(results, key=results.get)

def predict_entire_file_p_n_svc(clf, scaler, events: List[Event]):
    results = {"Positive": 0, "Negative": 0}
    features = [event_as_list_no_class_name(e) for e in events]
    features = scaler.transform(features)
    predictions = clf.predict_proba(features)
    print(predictions)
    results["Positive"] = len([p for p in predictions if p == "Positive"])
    results["Negative"] = len([p for p in predictions if p == "Negative"])
    print(results)
    return max(results, key=results.get)

def predict_entire_file_p_n_kmeans(cls, scaler, events: List[Event]):
    results = {"Positive": 0, "Negative": 0}
    features = [event_as_list_no_class_name(e) for e in events]
    features = scaler.transform(features)
    predictions = cls.predict(features)
#     print(predictions)
    results["Positive"] = len([p for p in predictions if p == 1])
    results["Negative"] = len([p for p in predictions if p == 0])
    print(results)
    return max(results, key=results.get)

def predict_entire_file_p_n_svc_clustering_classification(clf, cls, scaler, events: List[Event]):
    results = {"Positive": 0, "Negative": 0}
    features = [event_as_list_no_class_name(e) for e in events]
    features = scaler.transform(features)
    labels = cls.predict(features)
    new_features = [np.concatenate((f,np.array([l])), axis=None) for f, l in zip(features, labels)]
    predictions = clf.predict(new_features)
    print(predictions)
    results["Positive"] = len([p for p in predictions if p == "Positive"])
    results["Negative"] = len([p for p in predictions if p == "Negative"])
    print(results)
    return max(results, key=results.get)


In [None]:
c_p = get_classes_and_paths(results_folder, virus_folders)
c_p_re = [ (c, p, extract_raw_events(p)) for c, p in c_p]
c_p_e = [(c, p, extract_events(re, c)) for c, p, re in c_p_re]

In [None]:
confusion_matrix_with_balanced_training_data_and_leftovers = [[0,0,0,0],[0,0,0,0],[0,0,0,0],[0,0,0,0]]
# |test| = |train * 4|
confusion_matrix_balanced_training_data = [[0,0,0,0],[0,0,0,0],[0,0,0,0],[0,0,0,0]]
clf = RandomForestClassifier(class_weight="balanced_subsample", random_state=random_state)
for idx in range(len(c_p_e)):
    print("CICLO ", idx)
    test_class, _, test_events = c_p_e[idx]
    test, train = get_test_and_train(c_p_e, idx, len(test_events))
    classes = [class_name_to_numeric(e.class_name) for e in train]
    features = [event_as_list_no_class_name(e) for e in train]
    clf.fit(features, classes)
    print(test_class)
    prediction = predict_entire_file(clf, test_events)
    confusion_matrix_balanced_training_data[class_name_to_numeric(test_class)][class_name_to_numeric(prediction)] += 1    
    print("others")
    for tc,te in test:
        print(tc)
        prediction = predict_entire_file(clf, te)
        confusion_matrix_with_balanced_training_data_and_leftovers[class_name_to_numeric(tc)][class_name_to_numeric(prediction)] += 1
    print("----------------------")

In [None]:
print(np.array(confusion_matrix_balanced_training_data))
print(np.array(confusion_matrix_with_balanced_training_data_and_leftovers))

# Saliva

In [None]:
training_folder = os.path.join(desktop_folder, "TRAINING")
test_folder = os.path.join(desktop_folder, "TEST")
positive_negative_folders =["Positive", "Negative"]

# Extract training data and train model

In [None]:
c_p_saliva_train = get_classes_and_paths(training_folder, positive_negative_folders)
c_p_re_saliva_train = [ (c, p, extract_raw_events(p)) for c, p in c_p_saliva_train]
c_p_e_saliva_train = [(c, p, extract_events(re, c)) for c, p, re in c_p_re_saliva_train]
print(len(c_p_e_saliva_train))
print([p for _, p, e in c_p_e_saliva_train if len(e)==0])
c_p_e_saliva_train = [(c, p, e) for c, p, e in c_p_e_saliva_train if len(e)>0]
print(len(c_p_e_saliva_train))


c_p_saliva_test = get_classes_and_paths(test_folder, positive_negative_folders)
c_p_re_saliva_test = [ (c, p, extract_raw_events(p)) for c, p in c_p_saliva_test]
c_p_e_saliva_test = [(c, p, extract_events(re, c)) for c, p, re in c_p_re_saliva_test]
print(len(c_p_e_saliva_test))
c_p_e_saliva_test = [(c, p, e) for c, p, e in c_p_e_saliva_test if len(e)>0]
print(len(c_p_e_saliva_test))


In [None]:
positive_events = [item for sublist in [e for ct,_,e in c_p_e_saliva_train if ct == "Positive"] for item in sublist]
negetive_events = [item for sublist in [e for ct,_,e in c_p_e_saliva_train if ct == "Negative"] for item in sublist]
print(len(positive_events))
print(len(negetive_events))

In [None]:
positive_events_shuffled = shuffle(positive_events, random_state=random_state)
# get only subset of positive events to make it more balanced
positive_events_shuffled = positive_events_shuffled[:len(negetive_events)]
print(len(positive_events_shuffled))
print(len(negetive_events))

In [None]:
train = positive_events_shuffled+negetive_events
train = shuffle(train, random_state=random_state)
train_labels = [e.class_name for e in train]
train_X = [event_as_list_no_class_name(e) for e in train]
train_X = np.array(train_X)

## Extract csv for weka

In [None]:
def save_to_csv(path_to_file: str, header, rows):
    path_to_file = path_to_file + ".csv" if not path_to_file.endswith(".csv") else path_to_file
    with open(path_to_file, 'w', newline="") as f:
        # create the csv writer
        writer = csv.writer(f)
        # write a row to the csv file
        writer.writerow(header)
        writer.writerows(rows)
        
def event_as_list_for_weka(e: Event):
    return event_as_list_no_class_name(e) + [e.class_name]

weka_saliva_train = [event_as_list_for_weka(e) for e in train]
header = ["amplitude", "d10", "d20", "d30", "d40", "d50", "d60", "d70","d80","d90","c10","c20","c30","c40","c50","c60","c70","c80","c90","class_name"]    
save_to_csv(os.path.join(desktop_folder, "weka_saliva"),header, weka_saliva_train)

## Train classifier

In [None]:
from sklearn.model_selection import RandomizedSearchCV
from pprint import pprint
# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
max_depth.append(None)
# Minimum number of samples required to split a node
min_samples_split = [2, 5, 10]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 4]
# Method of selecting samples for training each tree
bootstrap = [True, False]

# Create the random grid
random_grid = {'n_estimators': n_estimators,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}

pprint(random_grid)

In [None]:
# Use the random grid to search for best hyperparameters
# First create the base model to tune
rf = RandomForestClassifier(random_state = 69)
# Random search of parameters, using 5 fold cross validation, 
# search across 100 different combinations, and use all available cores
cv = train_X.size
rf_random = RandomizedSearchCV(estimator=rf, param_distributions=random_grid,
                              n_iter = 100, cv = 10, verbose=2, random_state=69, 
                              n_jobs=-1, return_train_score=True)

# Fit the random search model
rf_random.fit(train_X, train_labels)

In [None]:
rf_random.best_params_

In [None]:
saliva_classifier = RandomForestClassifier(random_state=random_state, n_estimators=800, min_samples_split=5, min_samples_leaf=1,max_depth=10,bootstrap=True, oob_score=True)
saliva_classifier.fit(train_X, train_labels)
print(saliva_classifier.oob_score_)

## Test classifier

In [None]:
saliva_confusion_matrix = [[0,0],[0,0]]
for tc,_,te in c_p_e_saliva_test:
    print(tc)
    prediction = predict_entire_file_p_n(saliva_classifier, te)
    saliva_confusion_matrix[class_name_to_numeric_p_n(tc)][class_name_to_numeric_p_n(prediction)] += 1

In [None]:
np.array(saliva_confusion_matrix)

# Testing out another method to ensemble results and using a different classifier

# Preprocessing data: Standardization

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(train_X)
std_train_X = scaler.transform(train_X)
# REMEMBER TO PREPROCESS TEST DATA TOO

## Extract normalized data for weka

In [None]:
weka_saliva_train = [ np.concatenate((e,np.array(l)), axis=None) for e,l in zip(std_train_X, train_labels)]
save_to_csv(os.path.join(desktop_folder, "weka_saliva_normalized"),header, weka_saliva_train)

## Grid search for svm

In [None]:
kernel = ["linear", "poly", "rbf", "sigmoid"]
degree = [1,3,5]

random_grid_svc = {'kernel': kernel,
                   'degree': degree}

In [None]:
cv = train_X.size
svc = SVC(probability=True, random_state=random_state)
svc_random = GridSearchCV(svc, random_grid_svc, n_jobs=-1)

# Fit the random search model
svc_random.fit(train_X, train_labels)

## Train SVM

In [None]:
from sklearn.svm import SVC
svc = SVC(probability=True, random_state=random_state)
svc.fit(std_train_X, train_labels)

## Test svm

In [None]:
saliva_confusion_matrix_svc = [[0,0],[0,0]]
for tc,_,te in c_p_e_saliva_test:
    print(tc)
    prediction = predict_entire_file_p_n_svc(svc, scaler, te)
    saliva_confusion_matrix_svc[class_name_to_numeric_p_n(tc)][class_name_to_numeric_p_n(prediction)] += 1

In [None]:
np.array(saliva_confusion_matrix_svc)

## Testing out clustering

In [40]:
kmeans = KMeans(n_clusters=2,n_init=50,random_state=random_state).fit(std_train_X)
for tl, l in zip(train_labels, kmeans.labels_):
    print(tl, l)

Negative 1
Positive 1
Negative 1
Negative 1
Negative 0
Positive 1
Negative 1
Negative 1
Negative 1
Negative 0
Positive 1
Negative 1
Positive 1
Negative 1
Positive 1
Positive 1
Negative 0
Positive 1
Positive 1
Positive 1
Negative 0
Negative 1
Positive 1
Negative 1
Positive 1
Positive 1
Positive 0
Positive 1
Negative 1
Negative 1
Negative 1
Positive 1
Positive 1
Positive 1
Negative 1
Positive 0
Positive 1
Negative 1
Positive 1
Negative 0
Negative 1
Negative 1
Negative 1
Positive 1
Negative 1
Negative 1
Positive 1
Positive 1
Negative 1
Positive 1
Positive 0
Positive 0
Positive 1
Positive 1
Negative 1
Positive 1
Negative 0
Negative 1
Positive 1
Negative 1
Negative 1
Positive 1
Negative 1
Positive 0
Positive 0
Negative 1
Positive 1
Negative 1
Negative 0
Negative 1
Positive 1
Negative 1
Negative 0
Positive 1
Positive 1
Positive 1
Negative 1
Negative 1
Positive 1
Positive 1
Negative 0
Negative 1
Positive 1
Positive 1
Positive 0
Positive 1
Positive 0
Positive 0
Positive 0
Negative 1
Negative 1

In [50]:
saliva_confusion_matrix_kmeans = [[0,0],[0,0]]
for tc,_,te in c_p_e_saliva_test:
    print(tc)
    prediction = predict_entire_file_p_n_kmeans(kmeans, scaler, te)
    saliva_confusion_matrix_kmeans[class_name_to_numeric_p_n(tc)][class_name_to_numeric_p_n(prediction)] += 1

Positive
{'Positive': 229, 'Negative': 59}
Positive
{'Positive': 1166, 'Negative': 210}
Positive
{'Positive': 74, 'Negative': 35}
Positive
{'Positive': 988, 'Negative': 223}
Positive
{'Positive': 135, 'Negative': 34}
Positive
{'Positive': 25, 'Negative': 6}
Positive
{'Positive': 289, 'Negative': 131}
Positive
{'Positive': 427, 'Negative': 121}
Positive
{'Positive': 67, 'Negative': 28}
Positive
{'Positive': 160, 'Negative': 57}
Positive
{'Positive': 8, 'Negative': 4}
Positive
{'Positive': 190, 'Negative': 102}
Positive
{'Positive': 155, 'Negative': 68}
Positive
{'Positive': 1, 'Negative': 0}
Positive
{'Positive': 3, 'Negative': 2}
Positive
{'Positive': 838, 'Negative': 71}
Positive
{'Positive': 14, 'Negative': 10}
Positive
{'Positive': 924, 'Negative': 253}
Positive
{'Positive': 165, 'Negative': 99}
Positive
{'Positive': 202, 'Negative': 108}
Positive
{'Positive': 146, 'Negative': 44}
Positive
{'Positive': 269, 'Negative': 32}
Positive
{'Positive': 126, 'Negative': 57}
Positive
{'Positi

In [51]:
np.array(saliva_confusion_matrix_kmeans)

array([[42,  0],
       [47,  0]])

## combine clustering with classification

In [57]:
new_features = [np.concatenate((t_ts,np.array([l])), axis=None) for t_ts, l in zip(std_train_X, kmeans.labels_)]

In [58]:
cc_rf = RandomForestClassifier(random_state=random_state).fit(new_features, train_labels)

In [65]:
saliva_confusion_matrix_clustering_classification = [[0,0],[0,0]]
for tc,_,te in c_p_e_saliva_test:
    print(tc)
    prediction = predict_entire_file_p_n_svc_clustering_classification(cc_rf, kmeans, scaler, te)
    saliva_confusion_matrix_clustering_classification[class_name_to_numeric_p_n(tc)][class_name_to_numeric_p_n(prediction)] += 1

Positive
['Negative' 'Negative' 'Positive' 'Positive' 'Negative' 'Negative'
 'Negative' 'Positive' 'Positive' 'Positive' 'Positive' 'Positive'
 'Negative' 'Positive' 'Negative' 'Negative' 'Positive' 'Negative'
 'Negative' 'Positive' 'Positive' 'Positive' 'Positive' 'Positive'
 'Positive' 'Negative' 'Positive' 'Negative' 'Positive' 'Negative'
 'Positive' 'Negative' 'Positive' 'Negative' 'Positive' 'Positive'
 'Negative' 'Negative' 'Negative' 'Negative' 'Negative' 'Positive'
 'Positive' 'Positive' 'Negative' 'Positive' 'Positive' 'Positive'
 'Positive' 'Negative' 'Positive' 'Positive' 'Positive' 'Positive'
 'Negative' 'Negative' 'Positive' 'Negative' 'Positive' 'Positive'
 'Negative' 'Positive' 'Positive' 'Negative' 'Negative' 'Negative'
 'Positive' 'Negative' 'Negative' 'Negative' 'Negative' 'Negative'
 'Positive' 'Positive' 'Negative' 'Positive' 'Positive' 'Negative'
 'Negative' 'Negative' 'Negative' 'Positive' 'Positive' 'Negative'
 'Negative' 'Positive' 'Negative' 'Negative' 'Negativ

['Negative' 'Positive' 'Negative' 'Negative' 'Negative' 'Negative'
 'Negative' 'Positive' 'Negative' 'Positive' 'Negative' 'Positive'
 'Positive' 'Negative' 'Negative' 'Positive' 'Negative' 'Negative'
 'Positive' 'Negative' 'Negative' 'Negative' 'Negative' 'Positive']
{'Positive': 8, 'Negative': 16}
Positive
['Positive' 'Positive' 'Negative' ... 'Positive' 'Positive' 'Negative']
{'Positive': 639, 'Negative': 538}
Positive
['Positive' 'Positive' 'Negative' 'Negative' 'Positive' 'Negative'
 'Negative' 'Negative' 'Positive' 'Negative' 'Positive' 'Positive'
 'Negative' 'Negative' 'Negative' 'Negative' 'Negative' 'Positive'
 'Negative' 'Negative' 'Positive' 'Negative' 'Negative' 'Negative'
 'Negative' 'Negative' 'Negative' 'Negative' 'Positive' 'Negative'
 'Positive' 'Negative' 'Negative' 'Negative' 'Negative' 'Negative'
 'Positive' 'Negative' 'Negative' 'Positive' 'Negative' 'Negative'
 'Negative' 'Negative' 'Negative' 'Negative' 'Negative' 'Positive'
 'Negative' 'Positive' 'Negative' 'Neg

['Positive' 'Negative' 'Negative' ... 'Positive' 'Positive' 'Positive']
{'Positive': 670, 'Negative': 605}
Positive
['Negative' 'Negative' 'Negative' 'Positive' 'Positive' 'Negative'
 'Positive' 'Positive' 'Negative' 'Positive' 'Negative' 'Negative'
 'Positive' 'Negative' 'Positive' 'Positive' 'Negative' 'Positive'
 'Negative' 'Positive' 'Positive' 'Positive' 'Positive' 'Positive'
 'Negative' 'Negative' 'Positive' 'Negative' 'Negative' 'Positive'
 'Positive' 'Negative' 'Positive' 'Negative' 'Negative' 'Positive'
 'Negative' 'Negative' 'Negative' 'Negative' 'Positive' 'Negative'
 'Negative' 'Positive' 'Positive' 'Negative' 'Positive' 'Positive'
 'Positive' 'Negative' 'Negative' 'Negative' 'Negative' 'Positive'
 'Positive' 'Negative' 'Negative' 'Positive' 'Positive' 'Negative'
 'Negative' 'Positive' 'Positive' 'Positive' 'Negative' 'Positive'
 'Positive' 'Negative' 'Negative' 'Positive' 'Positive' 'Positive'
 'Negative' 'Positive' 'Negative' 'Negative' 'Positive' 'Negative'
 'Negative' '

['Positive' 'Negative' 'Negative' 'Positive' 'Positive' 'Positive'
 'Positive' 'Negative' 'Negative' 'Positive' 'Negative' 'Negative'
 'Positive' 'Positive' 'Positive' 'Positive' 'Positive' 'Negative'
 'Negative' 'Negative' 'Negative' 'Negative' 'Negative' 'Positive'
 'Positive' 'Positive' 'Positive' 'Negative' 'Positive' 'Positive'
 'Negative' 'Negative' 'Negative' 'Negative' 'Negative' 'Positive'
 'Positive' 'Negative' 'Positive' 'Positive' 'Positive' 'Negative'
 'Negative' 'Negative' 'Positive' 'Positive' 'Positive' 'Negative'
 'Negative' 'Negative' 'Negative' 'Negative' 'Positive' 'Negative'
 'Positive' 'Negative' 'Negative' 'Positive' 'Negative' 'Negative'
 'Positive' 'Negative' 'Negative' 'Positive' 'Negative' 'Negative'
 'Positive' 'Positive' 'Negative' 'Positive' 'Positive' 'Negative'
 'Negative' 'Positive' 'Negative' 'Negative' 'Negative' 'Negative'
 'Positive' 'Positive' 'Positive' 'Negative' 'Negative' 'Positive'
 'Positive' 'Positive' 'Negative' 'Positive' 'Positive' 'Negat

['Negative' 'Positive' 'Negative' 'Positive' 'Negative' 'Negative'
 'Negative' 'Negative' 'Positive' 'Negative' 'Positive' 'Negative'
 'Positive' 'Negative' 'Negative' 'Positive' 'Positive' 'Positive'
 'Negative' 'Negative' 'Positive' 'Positive' 'Positive' 'Positive'
 'Positive' 'Negative' 'Negative' 'Negative' 'Negative' 'Negative'
 'Positive' 'Positive' 'Positive' 'Positive' 'Positive' 'Negative'
 'Negative' 'Positive' 'Positive' 'Negative' 'Negative' 'Positive'
 'Positive' 'Positive' 'Negative' 'Negative' 'Positive' 'Negative'
 'Negative' 'Positive' 'Positive' 'Negative' 'Negative' 'Negative'
 'Positive' 'Positive' 'Positive' 'Positive' 'Positive' 'Positive'
 'Negative' 'Positive' 'Negative' 'Negative' 'Negative' 'Negative'
 'Positive' 'Negative' 'Negative' 'Negative' 'Positive' 'Negative'
 'Negative' 'Positive' 'Positive' 'Negative' 'Positive' 'Negative'
 'Positive' 'Positive' 'Positive' 'Positive' 'Positive' 'Positive'
 'Negative' 'Negative' 'Negative' 'Negative' 'Positive' 'Posit

['Negative' 'Negative' 'Negative' 'Negative' 'Negative' 'Positive'
 'Negative' 'Negative' 'Negative' 'Negative' 'Positive' 'Negative'
 'Negative' 'Positive' 'Positive' 'Positive' 'Negative' 'Positive'
 'Positive' 'Positive' 'Positive' 'Positive' 'Positive' 'Negative'
 'Positive' 'Negative' 'Positive' 'Negative' 'Positive' 'Positive'
 'Negative' 'Negative' 'Positive' 'Positive' 'Positive' 'Negative'
 'Negative' 'Negative' 'Positive' 'Positive' 'Positive' 'Negative'
 'Negative' 'Positive' 'Negative' 'Positive' 'Positive' 'Positive'
 'Negative' 'Positive' 'Negative' 'Positive' 'Negative' 'Negative'
 'Positive' 'Negative' 'Positive' 'Positive' 'Negative' 'Negative'
 'Positive' 'Negative' 'Positive' 'Positive' 'Positive' 'Positive'
 'Positive' 'Negative' 'Positive' 'Positive' 'Negative' 'Positive'
 'Negative' 'Positive' 'Negative' 'Positive' 'Positive' 'Positive'
 'Negative' 'Negative' 'Positive' 'Positive' 'Negative' 'Negative'
 'Positive' 'Positive' 'Negative' 'Positive' 'Positive' 'Posit

['Positive' 'Negative' 'Negative' 'Negative' 'Negative' 'Positive'
 'Negative' 'Negative' 'Negative' 'Positive' 'Positive' 'Negative'
 'Negative' 'Positive' 'Negative' 'Positive' 'Negative' 'Negative'
 'Positive' 'Positive' 'Positive' 'Positive' 'Negative' 'Positive'
 'Negative' 'Positive' 'Positive' 'Positive' 'Negative' 'Positive'
 'Negative' 'Negative' 'Negative' 'Negative' 'Negative' 'Negative'
 'Positive' 'Positive' 'Positive' 'Positive' 'Negative' 'Negative'
 'Negative' 'Positive' 'Positive' 'Negative' 'Negative' 'Positive'
 'Negative' 'Positive' 'Positive' 'Negative' 'Positive' 'Positive'
 'Negative' 'Positive' 'Negative' 'Positive' 'Negative' 'Positive'
 'Positive' 'Negative' 'Positive' 'Negative' 'Positive' 'Positive'
 'Positive' 'Negative' 'Positive' 'Negative' 'Negative' 'Negative'
 'Negative' 'Positive' 'Negative' 'Negative' 'Negative' 'Negative'
 'Negative' 'Positive' 'Positive' 'Negative' 'Negative' 'Positive'
 'Negative' 'Positive' 'Negative' 'Positive' 'Negative' 'Posit

['Negative' 'Negative' 'Negative' 'Negative' 'Negative' 'Negative'
 'Positive' 'Negative' 'Positive' 'Positive' 'Negative' 'Negative'
 'Negative' 'Positive' 'Negative' 'Positive' 'Negative' 'Negative'
 'Positive' 'Negative' 'Positive' 'Negative' 'Positive' 'Negative'
 'Negative' 'Negative' 'Negative' 'Negative' 'Positive' 'Negative'
 'Negative' 'Negative' 'Negative' 'Negative' 'Negative' 'Negative'
 'Negative' 'Negative' 'Negative' 'Negative' 'Negative' 'Negative'
 'Positive' 'Negative' 'Negative' 'Positive' 'Positive' 'Negative'
 'Negative' 'Negative' 'Negative' 'Negative' 'Negative' 'Negative'
 'Positive' 'Negative' 'Negative' 'Negative' 'Negative' 'Positive'
 'Negative' 'Negative' 'Negative' 'Negative' 'Negative' 'Negative'
 'Negative' 'Negative' 'Negative' 'Positive' 'Negative' 'Negative'
 'Positive' 'Negative' 'Negative' 'Negative' 'Negative' 'Negative'
 'Positive' 'Negative' 'Positive' 'Negative' 'Negative' 'Positive'
 'Positive' 'Negative' 'Negative' 'Positive' 'Negative' 'Negat

['Positive' 'Positive' 'Positive' 'Negative' 'Positive' 'Negative'
 'Negative' 'Negative' 'Negative' 'Positive' 'Positive' 'Negative'
 'Positive' 'Negative' 'Negative' 'Negative' 'Negative' 'Negative'
 'Negative' 'Positive' 'Positive' 'Positive' 'Positive' 'Positive'
 'Positive' 'Negative' 'Positive' 'Negative' 'Positive' 'Negative'
 'Positive' 'Positive' 'Negative' 'Negative' 'Positive' 'Negative'
 'Negative' 'Positive' 'Negative' 'Positive' 'Negative' 'Positive'
 'Positive' 'Positive' 'Positive' 'Negative' 'Negative' 'Positive'
 'Negative' 'Positive' 'Negative' 'Negative' 'Negative' 'Positive'
 'Negative' 'Positive' 'Positive' 'Negative' 'Negative' 'Positive'
 'Negative' 'Positive' 'Positive' 'Positive' 'Positive' 'Negative'
 'Positive' 'Positive' 'Negative' 'Positive' 'Positive' 'Negative'
 'Positive' 'Negative' 'Positive' 'Negative' 'Negative' 'Negative'
 'Positive' 'Positive' 'Positive' 'Positive' 'Positive' 'Negative'
 'Negative' 'Negative' 'Positive' 'Positive' 'Negative' 'Posit

In [66]:
print(np.array(saliva_confusion_matrix_clustering_classification))

[[15 27]
 [17 30]]
