In [None]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.dummy import DummyClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.datasets import make_classification
import os
import csv
import struct
import numpy as np
from scipy.interpolate import interp1d
from typing import List, Tuple
from numpy.core import ndarray
from dataclasses import dataclass
from sklearn.model_selection import LeaveOneOut

In [None]:
def class_name_to_numeric(cn: str) -> int:
    if cn == "SARS-CoV":
        return 0
    elif cn == "SARS-CoV-2":
        return 1
    elif cn == "MERS-CoV":
        return 2
    elif cn == "HCoV-229E":
        return 3
    else:
        return 4

def class_name_to_numeric_p_n(cn: str) -> int:
    if cn == "Positive":
        return 0
    elif cn == "Negative":
        return 1
    else:
        return 4

def numeric_to_class_name(cn: int) -> str:
    if cn == 0:
        return "SARS-CoV"
    elif cn == 1:
        return "SARS-CoV-2"
    elif cn == 2:
        return "MERS-CoV"
    elif cn == 3:
        return "HCoV-229E"
    else:
        return "Boh"

In [None]:
@dataclass
class Event:
    """Feature of single event"""
    class_name: str
    amplitude: float 
    d50: float 
    normalized_event: ndarray

def event_as_list_no_class_name(e: Event):
    return [-e.amplitude*1e9, e.d50/100] + list(e.normalized_event)

In [40]:
desktop_folder = os.path.join("C:\\", "Users", "Luca Rossi", "Desktop")
results_folder = os.path.join(desktop_folder, "RESULTS")
proviaml_folder = os.path.join(desktop_folder, "PROVIAML")
virus_folders = ["SARS-CoV", "SARS-CoV-2", "MERS-CoV", "HCoV-229E"]
positive_negative_folders =["Positive", "Negative"]

def open_dat(filename):
    f = open(filename, "rb")
    f_cont = f.read()
    f.close()
    raw = struct.unpack("d" * (len(f_cont) // 8), f_cont)
    return np.array(raw)

def extract_lengths(filename):
    with open(filename) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        line_count = 0
        events_lengths = []
        for row in csv_reader:
            if len(row) > 1:
                if line_count > 1 and len(row) == 2:
                    events_lengths.append(int(row[1]) - int(row[0]))
            line_count+=1
        return events_lengths

def extract_raw_events(dir_name) -> List[ndarray]:
    events = []
    files = os.listdir(dir_name)
    if len(files) == 0:
        print(dir_name)
        return
    dat_file = [os.path.join(dir_name, f) for f in files if f.endswith(".dat")].pop()
    details_file = [os.path.join(dir_name, f) for f in files if f.endswith(".csv")].pop()
    # caricamento eventi dal singolo file    
    loaded_events = open_dat(dat_file)
    # caricamento dettagli file
    events_length = extract_lengths(details_file)  
    b = 0
    for ev_len in events_length:
        e = b + ev_len
        event = np.array(loaded_events[b:e])
        b = e
        events.append(event)
    return events

def duration_x(event: ndarray, baseline, amplitude, percentage):
    event_x = np.nonzero(event < baseline - amplitude * percentage )[0]
    # istante in cui l'evento supera la percentuale x dell'escursione
    begin_x = event_x[0]
    # istante in cui l'evento torna al di sotto della percentuale x dell'escursione
    end_x = event_x[-1]
    return end_x - begin_x

def calc_baseline(event: ndarray) -> float:
    ev_len = event.size
    x_baseline = np.concatenate((event[:round(ev_len/5*0.2)], event[round(ev_len - ev_len/5*0.2):]))
    return np.mean(x_baseline)

def normalize_waveform(event:ndarray, stereotype_length = 35) -> ndarray:
    ev_len = event.size
    amplitude = event.max()
    x = np.array([x for x in range(ev_len)])
    x_norm = np.linspace(0, ev_len-1, stereotype_length*3+4)
    f = interp1d(x, event/amplitude, kind='cubic')
    event_norm = f(x_norm)
    event_norm = event_norm[stereotype_length+2:2*stereotype_length+2]
    return event_norm

def extract_events(raw_events: List[ndarray], class_name) -> Tuple[str, List[Event]]:
    events = []
    if raw_events is None:
        return events
    for event in raw_events:
        peak = event.max()
        baseline = calc_baseline(event)
        amplitude = baseline - peak
        d50 = duration_x(event, baseline, amplitude, 0.5)
        normalized_event = normalize_waveform(event)
        if np.count_nonzero(normalized_event < - 0.1) or np.count_nonzero(normalized_event > 1):
            continue
        events.append(Event(class_name, amplitude, d50, normalized_event))
    return events

def get_classes_and_paths(results_folder: List[str], virus_folders:List[str]):
    class_and_path_to_virus_dir = [ (v, os.path.join(results_folder, v)) for v in virus_folders]
    classes_and_paths = []
    for v, p in class_and_path_to_virus_dir:
        for new_p in [os.path.join(p, d) for d in os.listdir(p)]:
            classes_and_paths.append((v, new_p))            
    return classes_and_paths

def shuffle(a, b):
    assert(len(a) == len(b))
    l = len(a)
    p = np.random.permutation(l)
    p = p.astype(int)
    return [a[i] for i in p], [b[i] for i in p]


def resample(events):
    classes = [class_name_to_numeric_p_n(e.class_name) for e in events]
    pos_classes = [c for c in classes if c == 0]
    neg_classes = [c for c in classes if c == 1]
    pos_events = [e for e in events if e.class_name == "Positive"]
    neg_events = [e for e in events if e.class_name == "Negative"]
    if len(pos_events) > len(neg_events):
        pos_events_shuffled, pos_classes_shuffled = shuffle(pos_events, pos_classes)
        events = pos_events_shuffled[:len(neg_events)] + neg_events
        classes = pos_classes_shuffled[:len(neg_events)] + neg_classes
    else:
        neg_events_shuffled,neg_classes_shuffled = shuffle(neg_events, neg_classes)
        events = pos_events + neg_events_shuffled[:len(pos_events)]
        classes = pos_classes + neg_classes_shuffled[:len(pos_classes)]
    return shuffle(events, classes)

def predict_entire_file(clf, events: List[Event]):
    results = {"SARS-CoV": 0, "SARS-CoV-2": 0, "MERS-CoV": 0, "HCoV-229E": 0}
    features = [event_as_list_no_class_name(e) for e in events]
    predictions = clf.predict(features)
    results["SARS-CoV"] = len([p for p in predictions if p == 0])
    results["SARS-CoV-2"] = len([p for p in predictions if p == 1])
    results["MERS-CoV"] = len([p for p in predictions if p == 2])
    results["HCoV-229E"] = len([p for p in predictions if p == 3])
    print(results)
    return max(results, key=results.get)

def predict_entire_file_p_n(clf, events: List[Event]):
    results = {"Positive": 0, "Negative": 0}
    features = [event_as_list_no_class_name(e) for e in events]
    predictions = clf.predict(features)
    results["Positive"] = len([p for p in predictions if p == 0])
    results["Negative"] = len([p for p in predictions if p == 1])
    print(results)
    return max(results, key=results.get)

def get_predictions_of_entire_file_p_n(clf, events: List[Event]):
    results = {"Positive": 0, "Negative": 0}
    features = [event_as_list_no_class_name(e) for e in events]
    predictions = clf.predict(features)
    results["Positive"] = len([p for p in predictions if p == 0])
    results["Negative"] = len([p for p in predictions if p == 1])
    return results

In [None]:
c_p = get_classes_and_paths(results_folder, virus_folders)
c_p_re = [ (c, p, extract_raw_events(p)) for c, p in c_p]
c_p_e = [(c, p, extract_events(re, c)) for c, p, re in c_p_re]

In [None]:
events_for_analysis = [item for sublist in [e for _, _, e in c_p_e] for item in sublist]
classes_for_analysis = [e.class_name for e in events_for_analysis]
for v in virus_folders:
    print(v, len([c for c in classes_for_analysis if c == v]))

In [None]:
confusion_matrix = [[0,0,0,0],[0,0,0,0],[0,0,0,0],[0,0,0,0]]
dummy_confusion_matrix = [[0,0,0,0],[0,0,0,0],[0,0,0,0],[0,0,0,0]]
dummy = DummyClassifier()
# clf = RandomForestClassifier(class_weight="balanced_subsample")
clf = MLPClassifier(hidden_layer_sizes=(20,), activation='tanh',learning_rate_init=0.3, max_iter=500, random_state=0,
 momentum=0.2, early_stopping=True, validation_fraction=0.1, beta_1=0.9, beta_2=0.999, batch_size = 100)
for idx in range(len(c_p_e)):
    test = c_p_e[idx]
    train = [c_p_e[i] for i in range(len(c_p_e)) if i != idx]
    events = [item for sublist in [e for _, _, e in train] for item in sublist]
    classes = [class_name_to_numeric(e.class_name) for e in events]
    features = [event_as_list_no_class_name(e) for e in events]
    clf.fit(features, classes)
    dummy.fit(features, classes)
    test_class, _, test_events = test
    print(test_class)
    prediction = predict_entire_file(clf, test_events)
    dummy_pred = predict_entire_file(dummy, test_events)
    confusion_matrix[class_name_to_numeric(test_class)][class_name_to_numeric(prediction)] += 1
    dummy_confusion_matrix[class_name_to_numeric(test_class)][class_name_to_numeric(dummy_pred)] += 1


In [None]:
print(np.array(confusion_matrix))
print(np.array(dummy_confusion_matrix))


In [41]:
c_p_saliva = get_classes_and_paths(proviaml_folder, positive_negative_folders)
c_p_re_saliva = [ (c, p, extract_raw_events(p)) for c, p in c_p_saliva]
c_p_e_saliva = [(c, p, extract_events(re, c)) for c, p, re in c_p_re_saliva]

C:\Users\Luca Rossi\Desktop\PROVIAML\Positive\AS-2-2-bias+01_BK-1103_045fil_TI
C:\Users\Luca Rossi\Desktop\PROVIAML\Positive\AS-2-2-bias+01_BK-1105_045fil_TI
C:\Users\Luca Rossi\Desktop\PROVIAML\Positive\AS-2-2-bias+01_BK-1126_045fil_TI
C:\Users\Luca Rossi\Desktop\PROVIAML\Positive\AS-2-2-bias+01_BK-950_045fil_TI_1st
C:\Users\Luca Rossi\Desktop\PROVIAML\Positive\AS-2-2-bias+01_BK-953_045fil_TI_1st
C:\Users\Luca Rossi\Desktop\PROVIAML\Positive\AS-2-2-bias+01_BK-986_045fil_TI_1st
C:\Users\Luca Rossi\Desktop\PROVIAML\Positive\AS-2-2-bias+01_BK-990_045fil_TI_1st
C:\Users\Luca Rossi\Desktop\PROVIAML\Positive\F1
C:\Users\Luca Rossi\Desktop\PROVIAML\Positive\F1 day7
C:\Users\Luca Rossi\Desktop\PROVIAML\Positive\F2
C:\Users\Luca Rossi\Desktop\PROVIAML\Positive\F5 day3
C:\Users\Luca Rossi\Desktop\PROVIAML\Positive\F8 day1
C:\Users\Luca Rossi\Desktop\PROVIAML\Negative\HD-112720-39
C:\Users\Luca Rossi\Desktop\PROVIAML\Negative\HD-112720-46
C:\Users\Luca Rossi\Desktop\PROVIAML\Negative\HD-112720-5

In [None]:
p_n_confusion_matrix = [[0,0],[0,0]]
p_n_dummy_confusion_matrix = [[0,0],[0,0]]
# dummy = DummyClassifier()
clf = RandomForestClassifier(class_weight="balanced_subsample", n_jobs=-1)
# clf = MLPClassifier(hidden_layer_sizes=(20,), activation='tanh',learning_rate_init=0.3, max_iter=500, random_state=0,
#  momentum=0.2, early_stopping=True, validation_fraction=0.1, beta_1=0.9, beta_2=0.999, batch_size = 100)
for idx in range(len(c_p_e_saliva)):
    test_class, test_path, test_events = c_p_e_saliva[idx]
    if len(test_events) == 0:
        print("no test events for ", test_path)
        continue
    train = [c_p_e_saliva[i] for i in range(len(c_p_e_saliva)) if i != idx]
    events = [item for sublist in [e for _, _, e in train] for item in sublist]
    events, classes = resample(events)
    features = [event_as_list_no_class_name(e) for e in events]
    print(test_class)
    clf.fit(features, classes)
    # dummy.fit(features, classes)
    prediction = predict_entire_file_p_n(clf, test_events)
    # dummy_pred = predict_entire_file_p_n(dummy, test_events)
    p_n_confusion_matrix[class_name_to_numeric_p_n(test_class)][class_name_to_numeric_p_n(prediction)] += 1
    # p_n_dummy_confusion_matrix[class_name_to_numeric_p_n(test_class)][class_name_to_numeric_p_n(dummy_pred)] += 1

In [None]:
print(np.array(p_n_confusion_matrix))
print(np.array(p_n_dummy_confusion_matrix))

# Classificatore basato su file

In [29]:
@dataclass
class FeatureOfFile:
    """Feature of File"""
    class_name: str
    n_events: int
    n_events_classified_as_positive: int
    n_events_classified_as_negative: int
    hist_of_amplitudes: ndarray
    hist_of_d50s: ndarray

def feature_file_as_list_no_class_name(f: FeatureOfFile):
    return [f.n_events, f.n_events_classified_as_positive, f.n_events_classified_as_negative] + list(f.hist_of_amplitudes) + list(f.hist_of_d50s)
    # return [f.n_events, f.n_events_classified_as_negative] + list(f.hist_of_amplitudes) + list(f.hist_of_d50s)

def feature_of_file_from_events(class_name: str, evs: List[Event], clf ) -> FeatureOfFile:
    n_events = len(evs)
    amplitudes = [e.amplitude for e in evs]
    d50s =  [e.d50 for e in evs]
    hist_of_amplitudes, _ = np.histogram(amplitudes)
    hist_of_d50s, _ = np.histogram(d50s)
    dict_of_predictions = get_predictions_of_entire_file_p_n(clf, evs)
    n_events_classified_as_positive, n_events_classified_as_negative = dict_of_predictions["Positive"], dict_of_predictions["Negative"]
    return FeatureOfFile(class_name, n_events=n_events, n_events_classified_as_positive=n_events_classified_as_positive, 
    n_events_classified_as_negative=n_events_classified_as_negative, hist_of_amplitudes=hist_of_amplitudes, hist_of_d50s=hist_of_d50s)


In [30]:
p_n_confusion_matrix_file = [[0,0],[0,0]]
clf1 = RandomForestClassifier(class_weight="balanced_subsample", n_jobs=-1)
clf2 = RandomForestClassifier(class_weight="balanced_subsample", n_jobs=-1)
for idx in range(len(c_p_e_saliva)):
    test_class, test_path, test_events = c_p_e_saliva[idx]
    if len(test_events) == 0:
        print("no test events for ", test_path)
        continue
    train = [c_p_e_saliva[i] for i in range(len(c_p_e_saliva)) if i != idx]
    events = [item for sublist in [e for _, _, e in train] for item in sublist] 
    events, classes = resample(events)
    features = [event_as_list_no_class_name(e) for e in events]
    clf1.fit(features, classes)
    # fare ribilanciamento anche di qua?
    train =  [(c, p, e) for c, p, e in train if len(e) > 0]
    features_of_files = [feature_of_file_from_events(c, e, clf1) for c, _, e in train]
    classes = [class_name_to_numeric_p_n(f.class_name) for f in features_of_files]
    features = [feature_file_as_list_no_class_name(f) for f in features_of_files]
    features, classes = shuffle(features, classes)
    clf2.fit(features, classes)

    test_feature = feature_of_file_from_events(test_class, test_events, clf1)
    # print(test_feature.class_name)
    # Faccio prediction su un solo elemento
    prediction = clf2.predict([feature_file_as_list_no_class_name(test_feature)])
    p_n_confusion_matrix_file[class_name_to_numeric_p_n(test_class)][prediction[0]] += 1


no test events for  C:\Users\Luca Rossi\Desktop\TRAINING\Positive\AS-2-2-bias+01_BK-1126_045fil_TI
no test events for  C:\Users\Luca Rossi\Desktop\TRAINING\Positive\AS-2-2-bias+01_BK-953_045fil_TI_1st
no test events for  C:\Users\Luca Rossi\Desktop\TRAINING\Positive\F2
no test events for  C:\Users\Luca Rossi\Desktop\TRAINING\Positive\F8 day1
no test events for  C:\Users\Luca Rossi\Desktop\TRAINING\Negative\HD-112720-46
no test events for  C:\Users\Luca Rossi\Desktop\TRAINING\Negative\HD-112720-58
no test events for  C:\Users\Luca Rossi\Desktop\TRAINING\Negative\HD-120420-17
no test events for  C:\Users\Luca Rossi\Desktop\TRAINING\Negative\HD-120420-32
no test events for  C:\Users\Luca Rossi\Desktop\TRAINING\Negative\HD-120420-33
no test events for  C:\Users\Luca Rossi\Desktop\TRAINING\Negative\HD-120420-44
no test events for  C:\Users\Luca Rossi\Desktop\TRAINING\Negative\HD-120720-46
no test events for  C:\Users\Luca Rossi\Desktop\TRAINING\Negative\HD-120720-6


In [31]:
np.array(p_n_confusion_matrix_file)

array([[20, 16],
       [15, 17]])

# Classifico su file con un solo classificatore

In [35]:
@dataclass
class FeatureOfFileOnlyNoClassifier:
    """Feature of File"""
    class_name: str
    n_events: int
    hist_of_amplitudes: ndarray
    hist_of_d50s: ndarray

def feature_file_as_list_no_class_name(f: FeatureOfFile):
    return [f.n_events] + list(f.hist_of_amplitudes) + list(f.hist_of_d50s)

def feature_of_file_no_classifier_from_events(class_name: str, evs: List[Event]) -> FeatureOfFileOnlyNoClassifier:
    n_events = len(evs)
    amplitudes = [e.amplitude for e in evs]
    d50s =  [e.d50 for e in evs]
    hist_of_amplitudes, _ = np.histogram(amplitudes)
    hist_of_d50s, _ = np.histogram(d50s)
    return FeatureOfFileOnlyNoClassifier(class_name, n_events=n_events, hist_of_amplitudes=hist_of_amplitudes, hist_of_d50s=hist_of_d50s)

In [49]:
from sklearn.model_selection import train_test_split
# remove empty events
c_p_e_saliva_non_empty_events =  [(c, p, e) for c, p, e in c_p_e_saliva if len(e) > 0]
features = [feature_of_file_no_classifier_from_events(c, e) for c, _, e in c_p_e_saliva_non_empty_events]
labels  = [class_name_to_numeric_p_n(f.class_name) for f in features]
features = [feature_file_as_list_no_class_name(f) for f in features]
features, labels = shuffle(features, labels)
features = np.array(features)
labels = np.array(labels)
train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size = 0.2, random_state = 69)

In [50]:
print('Training Features Shape:', train_features.shape)
print('Training Labels Shape:', train_labels.shape)
print('Testing Features Shape:', test_features.shape)
print('Testing Labels Shape:', test_labels.shape)

Training Features Shape: (125, 21)
Training Labels Shape: (125,)
Testing Features Shape: (32, 21)
Testing Labels Shape: (32,)


In [83]:
from pprint import pprint
rf = RandomForestClassifier(random_state=69)

print("Parameters currently in use")
pprint(rf.get_params())

Parameters currently in use
{'bootstrap': True,
 'ccp_alpha': 0.0,
 'class_weight': None,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': 'sqrt',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 100,
 'n_jobs': None,
 'oob_score': False,
 'random_state': 69,
 'verbose': 0,
 'warm_start': False}


# Random Search with Cross Validation


In [55]:
from sklearn.model_selection import RandomizedSearchCV

# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
max_depth.append(None)
# Minimum number of samples required to split a node
min_samples_split = [2, 5, 10]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 4]
# Method of selecting samples for training each tree
bootstrap = [True, False]

# Create the random grid
random_grid = {'n_estimators': n_estimators,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}

pprint(random_grid)

{'bootstrap': [True, False],
 'max_depth': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, None],
 'min_samples_leaf': [1, 2, 4],
 'min_samples_split': [2, 5, 10],
 'n_estimators': [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000]}


In [99]:
# Use the random grid to search for best hyperparameters
# First create the base model to tune
rf = RandomForestClassifier(random_state = 69)
# Random search of parameters, using 5 fold cross validation, 
# search across 100 different combinations, and use all available cores
cv = train_features.size
rf_random = RandomizedSearchCV(estimator=rf, param_distributions=random_grid,
                              n_iter = 100, cv = 50, verbose=2, random_state=69, 
                              n_jobs=-1, return_train_score=True)

# Fit the random search model
rf_random.fit(train_features, train_labels)

Fitting 50 folds for each of 100 candidates, totalling 5000 fits


In [100]:
rf_random.best_params_


{'n_estimators': 400,
 'min_samples_split': 10,
 'min_samples_leaf': 4,
 'max_depth': 80,
 'bootstrap': True}

In [101]:
rf_random.cv_results_


{'mean_fit_time': array([2.43071364, 1.42183775, 0.40889946, 3.24036548, 1.24313799,
        2.26464596, 2.61342935, 2.73854373, 2.27596675, 0.86925869,
        1.16257847, 3.8101219 , 2.683216  , 3.86182372, 0.76968798,
        0.45071915, 3.59834338, 3.16625275, 3.08622915, 3.23641544,
        2.71287069, 1.1829026 , 1.82960937, 3.17616816, 1.37224281,
        2.77998313, 2.27173821, 0.46311399, 2.72912208, 2.71522281,
        3.86762333, 2.69461248, 3.16733106, 2.25547926, 3.12403574,
        1.17377826, 1.53777753, 1.54081321, 1.9232116 , 0.39757941,
        0.40179932, 4.52824403, 3.67708776, 3.9367661 , 3.07062124,
        1.34466861, 3.11099866, 3.42989345, 3.87379385, 4.00649009,
        1.36069435, 3.10078925, 0.39505276, 0.39413319, 0.45270755,
        1.33493143, 4.4411975 , 0.89354259, 1.79919223, 1.54123497,
        3.14089167, 3.05327201, 3.61829244, 0.77962802, 3.9412035 ,
        0.38402597, 2.24916703, 1.18876321, 2.69571568, 3.61052327,
        3.04308337, 3.58009702,

# Evaluation Function


In [102]:
def evaluate(model, test_features, test_labels):
    predictions = model.predict(test_features)
    errors = np.bitwise_xor(predictions, test_labels)
    print(errors)
    accuracy = test_labels.size - np.sum(errors) / errors.size 
    print('Model Performance')
    print('Accuracy = {:0.2f}%.'.format(accuracy))
    
    return accuracy

## Evaluate the Default Model


In [103]:
base_model = RandomForestClassifier(n_estimators = 10, random_state = 69)
base_model.fit(train_features, train_labels)
base_accuracy = evaluate(base_model, test_features, test_labels)

[0 1 0 1 0 1 0 1 0 1 1 1 0 0 0 1 1 0 0 0 0 1 0 0 1 0 1 1 0 0 0 1]
Model Performance
Accuracy = 31.56%.


## Evaluate the Best Random Search Model


In [92]:
best_random = rf_random.best_estimator_
random_accuracy = evaluate(best_random, test_features, test_labels)

[1 1 0 1 0 1 0 1 0 1 1 1 0 0 0 1 0 0 0 0 0 1 0 0 0 0 1 1 1 0 0 1]
Model Performance
Accuracy = 31.56%.


In [None]:
print('Improvement of {:0.2f}%.'.format( 100 * (random_accuracy - base_accuracy) / base_accuracy))