In [6]:
import numpy as np
import pandas as pd
import os

In [7]:
from classification_helper import*

In [8]:
class LabelEncoder:
    
    
    def __init__(self, path_csv, path_samples):
        
        self.path_csv = path_csv
        self.n1_trn = path_samples + "n1_train.csv"
        self.n1_val = path_samples + "n1_validation.csv"
        self.n1_tst = path_samples + "n1_test.csv"
        self.n2_tst = path_samples + "n2_test.csv"
        self.n3_tst = path_samples + "n3_test.csv"
        
        
        
    def slice_data_frame(self, path):
        """Select Segment, and ID columns from a data frame"""
        df = pd.read_csv(path)
        return df[["Segment", "ID"]]
        
    def sample_to_ID(self, df):
        """Map each sample to its ID given a data frame of samples and IDs"""
        return dict([(sample, ID) for sample, ID in df.to_records(index = False)])
    
    def ID_to_samples(self, sample_2_ID_dict):
        """Group samples per ID"""
        ID_2_samples = {}
        for sample, ID in sample_2_ID_dict.items():
            if ID not in ID_2_samples:ID_2_samples[ID] = [sample]
            else:ID_2_samples[ID].append(sample)
        return ID_2_samples
    
        
    def hot_encod_labels(self, lst_samples, ID_2_samples_dict):
        samp_2_numericID = dict([(a[j], i) for i, a in enumerate(ID_2_samples_dict.values()) for j in range(len(a))])
        samples, IDs = zip(*[(samp, ID) for samp,ID in samp_2_numericID.items()])
        IDs = to_categorical(IDs)
        sample_2_ID = {samples[i]:IDs[i] for i in range(len(samples))}
        samples, labels = zip(*[(sample, sample_2_ID[sample]) for sample in lst_samples])
        return np.array(samples), np.array(labels)
    
    def get_samples(self, path): return pd.read_csv(path).Segment.values
        
    
    def HotEncodeLabels(self):
        
        df = self.slice_data_frame(self.path_csv)
        sample_2_ID = self.sample_to_ID(df)
        ID_2_samples = self.ID_to_samples(sample_2_ID)
        
        n1_trn_s = self.get_samples(self.n1_trn)
        n1_val_s = self.get_samples(self.n1_val)
        n1_tst_s = self.get_samples(self.n1_tst)
        n2_tst_s = self.get_samples(self.n2_tst)
        n3_tst_s = self.get_samples(self.n3_tst)
        
        n1ts, n1ty = self.hot_encod_labels(n1_trn_s, ID_2_samples)
        n1vs, n1vy = self.hot_encod_labels(n1_val_s, ID_2_samples)
        n1es, n1ey = self.hot_encod_labels(n1_tst_s, ID_2_samples)
        n2es, n2ey = self.hot_encod_labels(n2_tst_s, ID_2_samples)
        n2es, n2ey = self.hot_encod_labels(n2_tst_s, ID_2_samples)
        n3es, n3ey = self.hot_encod_labels(n3_tst_s, ID_2_samples)
        
        t1 = np.arange(len(n1_trn_s))
        v1 = np.arange(len(n1_val_s))
        e1 = np.arange(len(n1_tst_s))
        t2 = np.arange(len(n2_tst_s))
        t3 = np.arange(len(n3_tst_s))

        np.random.shuffle(t1)
        np.random.shuffle(v1)
        np.random.shuffle(e1)
        np.random.shuffle(t2)
        np.random.shuffle(t3)
        
        return n1ts[t1], n1ty[t1], n1vs[v1], n1vy[v1], n1es[e1], n1ey[e1], n2es[t2], n2ey[t2], n3es[t3], n3ey[t3]
        
    

In [9]:
class Predictions:
    
    def __init__(self, model,v1x, v1y, n1x, n1y, n2x, n2y, n3x, n3y,vgen, gen1, gen2, gen3, p_preds):
        self.model = model
        self.v1x = v1x
        self.v1y = v1y
        self.n1x = n1x
        self.n1y = n1y
        self.n2x = n2x
        self.n2y = n2y
        self.n3x = n3x
        self.n3y = n3y
        self.vgen = vgen
        self.gen1 = gen1
        self.gen2 = gen2
        self.gen3 = gen3
        self.p_preds = p_preds
        
        
    def predict(self, model, samples, hotlabels, generator, night, verbose):
        predictions = model.predict(generator, verbose = verbose)
        observed = np.argmax(hotlabels, axis = 1)
        predicted = np.argmax(predictions, axis = 1)
        accuracy = np.where(observed == predicted)[0].shape[0]/observed.shape[0]
        print(f"accuracy for {night} = ", accuracy)
        data = {"samples": samples, "labels": observed, "predictions": predicted}
        df = pd.DataFrame(data)
        df.to_csv(self.p_preds + night + ".csv")
        return accuracy
    
    def execution(self):
        av = self.predict(self.model, self.v1x, self.v1y, self.vgen,"one_n1_val", 0)
        av = round(av, ndigits = 3)
        print()
        a1 = self.predict(self.model, self.n1x, self.n1y, self.gen1,"one_n1_test", 0 )
        a1 = round(a1, ndigits = 3)         
        print()
        a2 = self.predict(self.model, self.n2x, self.n2y, self.gen2,"one_n2_test", 0 )
        a2 = round(a2, ndigits = 3)
        print()
        a3 = self.predict(self.model, self.n3x, self.n3y, self.gen3,"one_n3_test", 0 )
        a3 = round(a3, ndigits = 3)
        
        a = {"accuracy":[av, a1,a2,a3]}
        
        df = pd.DataFrame.from_dict(a, orient = "index", columns = ["night1v","night1", "night2", "night3"])
        df = df.rename_axis("accuracy")
        df.to_csv(self.p_preds + "one_clas_metrics.csv")
    
                

In [10]:
if __name__ == "__main__":
    
    p1 = "../segment_index_extraction/segment_data.csv"
    p2 = "train_val_test_segment_data/"
    p3 = "classification_predictions/cnn/"
     
    PATH_ARRAY = "arrays/"
    PATH_W = "weights/n1_class.h5"
    SHP = (128,173,1)
    
    LR = 1e-3
    lr = 1e-4
    EPOCHS = 30
    BATCH_SIZE = 128
    meanvar = True
    
    
    encoder = LabelEncoder(p1,p2)
    TIms,TLabs, VIms, VLabs, TeIms, TeLabs, N2Ims, N2Labs, N3Ims, N3Labs =  encoder.HotEncodeLabels()
   
    
    Tgen = Generator(TIms, TLabs, SHP, BATCH_SIZE, PATH_ARRAY, meanvar)
    Vgen = Generator(VIms, VLabs, SHP, BATCH_SIZE, PATH_ARRAY, meanvar)
    Tegen = Generator(TeIms, TeLabs, SHP, BATCH_SIZE, PATH_ARRAY, meanvar)
    n2gen = Generator(N2Ims, N2Labs, SHP, BATCH_SIZE, PATH_ARRAY, meanvar)
    n3gen = Generator(N3Ims, N3Labs, SHP, BATCH_SIZE, PATH_ARRAY, meanvar)
    
   
    architecture = ModelArchitecture(SHP)
    trainer = Training()
    trainer.architecture = architecture
    
    trainer.Tgenerator = Tgen
    trainer.Vgenerator = Vgen

    
    
    model = trainer.train(LR, lr, PATH_W, EPOCHS)
    
    predictor = Predictions(model,VIms,VLabs,TeIms,TeLabs,N2Ims,N2Labs,N3Ims,N3Labs,Vgen,Tegen,n2gen,n3gen, p3)
    
    
    predictor.execution()
    




Training model has started


++++++++++++++++++++++++++++++++++++++++++++++++++
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 11: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 26: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30

++++++++++++++++++++++++++++++++++++++++++++++++++

accuracy for one_n1_val =  0.9965437788018433

accuracy for one_n1_test =  0.9950362733867889

accuracy for one_n2_test =  0.3016759776536313

accuracy for one_n3_test =  0.3711864406779661
