In [1]:
import numpy as np
import scipy as sp
from matplotlib.pyplot import cm
# my libraries import 
import preprocessing as prep
import plot as plt
import mvg
import utility as util
import logistic_regression as lr
import dcf 
import gmm 
import svm
import pickle

from copy import deepcopy

In [2]:
N_FEATURES = 12

# preprocessing 
LDA = False
PCA = False
m = 8

# enable models 
MVG = False
LOGISTIC_REGRESSION = False
SVM = True
GMM = False
DIM_REDUCTION = False
FUSION = False

# dataset path
fileTR = './data/Train.txt'
fileTE = './data/Test.txt'

# named features 
features = ["Feature(" + str(x) + ")" for x in range(N_FEATURES)]
applications = [(0.5, 1, 1), (0.1, 1, 1), (0.9, 1, 1)]

# load dataset
DTR, LTR = prep.load_dataset(fileTR)
DTE, LTE = prep.load_dataset(fileTE)

preprocessing = 'z-norm'
preprocessing_ = preprocessing if preprocessing else "raw"
pca_ = ""
if preprocessing == 'z-norm':
    print("PREPROCESSING:\n\tZ-NORM: [enabled]")
    DTR, DTE = prep.preprocess_Z_score(DTR, DTE)
elif preprocessing == 'gau':
    print("PREPROCESSING:\n\tGAUSS: [enabled]")
    DTR, DTE = prep.preprocess_gaussianization(DTR, DTE)
if PCA:
    print("---- PCA with m=", m," -----")
    P = prep.PCA(DTR, m)
    DTR = np.dot(P.T, DTR)
    DTE = np.dot(P.T, DTE)
    pca_ = f"PCA-{m}-"

PREPROCESSING:
	Z-NORM: [enabled]


### MVG

In [3]:
save_results = True

if MVG:
    print(f"----- MVG full covariance [{pca_}{preprocessing_}-features]-----")
    full_minDCFs = []
    for application in applications:
        pi, Cfn, Cfp = application
        print("Application with (pi:", pi,", Cfn",Cfn,", Cfp",Cfp,")")
        classPriors = [pi, 1-pi]
        params = {"diag": False, "tied": False}
        llrs_full = mvg.MVG(DTR, DTE, LTR, params)
        minDCF = dcf.compute_min_DCF(llrs_full, LTE, pi, Cfn, Cfp)
        print(f"\t\tMinDCF: {minDCF}")
        full_minDCFs.append(minDCF)
        if save_results:
            np.save(f"./results/experimental/mvg/{pca_}{preprocessing_}-llrs_full_cov", np.array(llrs_full))
        #print("minDCF:", minDCF)
    print(f"\n----- MVG diagonal covariance [{pca_}{preprocessing_}-features]-----")
    diag_minDCFs = []
    for application in applications:
        pi, Cfn, Cfp = application
        print("Application with (pi:", pi,", Cfn",Cfn,", Cfp",Cfp,")")
        classPriors = [pi, 1-pi]
        params = {"diag": True, "tied": False}
        llrs_diag = mvg.MVG(DTR, DTE, LTR, params)
        minDCF = dcf.compute_min_DCF(llrs_diag, LTE, pi, Cfn, Cfp)
        print(f"\t\tMinDCF: {minDCF}")
        diag_minDCFs.append(minDCF)
        if save_results:
            np.save(f"./results/experimental/mvg/{pca_}{preprocessing_}-llrs_diag_cov", np.array(llrs_diag))
        
    print(f"\n----- MVG tied full covariance [{pca_}{preprocessing_}-features] -----")
    tied_minDCFs = []
    for application in applications:
        pi, Cfn, Cfp = application
        print("Application with (pi:", pi,", Cfn",Cfn,", Cfp",Cfp,")")
        classPriors = [pi, 1-pi]
        params = {"diag": False, "tied": True}
        llrs_tied_full = mvg.MVG(DTR, DTE, LTR, params)
        minDCF = dcf.compute_min_DCF(llrs_tied_full, LTE, pi, Cfn, Cfp)
        print(f"\t\tMinDCF: {minDCF}")
        tied_minDCFs.append(minDCF)
        if save_results:
            np.save(f"./results/experimental/mvg/{pca_}{preprocessing_}-llrs_tied_full_cov", np.array(llrs_tied_full))
        
    print(f"\n----- MVG tied digonal covariance [{pca_}{preprocessing_}-features]-----")
    tied_diag_minDCFs = []
    for application in applications:
        pi, Cfn, Cfp = application
        print("Application with (pi:", pi,", Cfn",Cfn,", Cfp",Cfp,")")
        classPriors = [pi, 1-pi]
        params = {"diag": True, "tied": True}
        llrs_tied_diag = mvg.MVG(DTR, DTE, LTR, params)
        minDCF = dcf.compute_min_DCF(llrs_tied_diag, LTE, pi, Cfn, Cfp)
        print(f"\t\tMinDCF: {minDCF}")
        tied_diag_minDCFs.append(minDCF)
        if save_results:
            np.save(f"./results/experimental/mvg/{pca_}{preprocessing_}-llrs_diag_tied_cov", np.array(llrs_tied_diag))

### Logistic Regression

In [4]:

lambda_ = 10**-6
minDCF_plot = False

save_results = True

linear = True
quadratic = True
weighted = True

if LOGISTIC_REGRESSION:
    weighted_ = "weighted" if weighted else "not-weighted"
    if linear:
        linear_scores = dict()
        print(f"\n\n----- Linear logistic regression [{weighted_}-{preprocessing_}-features] -----")
        classPriors = [0.5, 0.5]
        if minDCF_plot:
            lambdas, minDCFs = plt.compare_min_DCF_logreg(DTR, DTE, LTR, LTE, applications, quadratic=False, preprocessing=preprocessing, weighted=weighted)
            if save_results:
                np.save(f"./results/experimental/logreg/plots/{weighted_}-{preprocessing_}-plot_linear_lambdas", np.array(lambdas))
                util.pickle_dump(f"./results/experimental/logreg/plots/{weighted_}-{preprocessing_}-plot_linear_min_dcfs", minDCFs)
        for application in applications:
            pi, Cfn, Cfp = application
            print("Application with ( pi:", pi,", Cfn:",Cfn,", Cfp:",Cfp,")")
            piT_scores = dict()
            for pi_T in [0.5, 0.1, 0.9]:
                print("\tevaluating with pi_T:", pi_T)
                classPriors = [pi_T, 1-pi_T]
                params = {
                    'priors': classPriors,
                    'lambda_' : lambda_,
                    'weighted' : weighted
                }
                scores = lr.logreg(DTR, DTE, LTR, params)
                minDCF = dcf.compute_min_DCF(scores, LTE, pi, Cfn, Cfp)
                print(f"\t\tMinDCF: {minDCF}")
                piT_scores[pi_T] = scores
            linear_scores[application] = deepcopy(piT_scores)
        if save_results:
            util.pickle_dump(f"./results/experimental/logreg/{weighted_}-{preprocessing_}-scores_linear", linear_scores)
    if quadratic:
        quadratic_scores = dict()
        print(f"------ Quadratic Logistic Regression [{weighted_}-{preprocessing_}-features] ------")
        if minDCF_plot:
            lambdas, minDCFs = plt.compare_min_DCF_logreg(DTR, DTE, LTR, LTE, applications, quadratic=True, preprocessing=preprocessing, weighted=weighted)
            if save_results:
                np.save(f"./results/experimental/logreg/plots/{weighted_}-{preprocessing_}-plot_quadratic_lambdas", np.array(lambdas))
                util.pickle_dump(f"./results/experimental/logreg/plots/{weighted_}-{preprocessing_}-plot_quadratic_min_dcfs", minDCFs)
        for application in applications:
            pi, Cfn, Cfp = application
            print("Application with ( pi:", pi,", Cfn:",Cfn,", Cfp:",Cfp,")")
            piT_scores = dict()
            for pi_T in [0.5, 0.1, 0.9]:
                print("\tevaluating with pi_T:", pi_T)
                classPriors = [pi_T, 1-pi_T]
                params = {
                    'priors': classPriors,
                    'lambda_' : lambda_,
                    'weighted' : weighted
                }
                scores = lr.quadratic_logreg(DTR, DTE, LTR, params)
                minDCF = dcf.compute_min_DCF(scores, LTE, pi, Cfn, Cfp)
                print(f"\t\tMinDCF: {minDCF}")
                piT_scores[pi_T] = scores
            quadratic_scores[application] = deepcopy(piT_scores)
        if save_results:
            util.pickle_dump(f"./results/experimental/logreg/{weighted_}-{preprocessing_}-scores_quadratic", quadratic_scores)

In [5]:
print(preprocessing_)

z-norm


## SVM

In [None]:
linear = False
plot_linear_minDCF = False

exp = False
plot_exp_minDCF = False

quadratic = True
plot_quadratic_minDCF = True

balanced = False


save_results = True
balanced_ = "balanced" if balanced else "not-balanced"

if SVM:
    if linear:
        linear_scores = dict()
        print(f"\n\n----- linear SVM on {preprocessing_} features [{balanced_}] -----")
        if plot_linear_minDCF:
            Cs, minDCFs_dict = plt.compare_min_DCF_svm(DTR, DTE, LTR, LTE, 'linear', applications, balanced, preprocessing)
            if save_results:
                np.save(f"./results/experimental/svm/plots/plot_linear_Cs", np.array(Cs))
                util.pickle_dump(f"./results/experimental/svm/plots/{balanced_}-{preprocessing_}-plot_linear_minDCFs", minDCFs_dict)
        for application in applications:
            pi, Cfn, Cfp = application
            print("application:", application)
            piT_scores = dict()
            for pi_T in [0.5, 0.1, 0.9]:
                print("\tevaluating with pi_T:", pi_T)
                class_priors = [pi_T, 1-pi_T]
                params = util.build_params(C = 1, balanced=balanced, priors=class_priors)
                scores = svm.train_SVM_linear(DTR, DTE, LTR, params)
                scores = np.array(scores).ravel()
                piT_scores[pi_T] = scores
                
                minDCF = dcf.compute_min_DCF(scores, LTE, pi, Cfn, Cfp)
                print(f"\t\tMinDCF: {minDCF}")
            linear_scores[application] = deepcopy(piT_scores)
        if save_results:
            util.pickle_dump(f"./results/experimental/svm/{balanced_}-{preprocessing_}-linear-svm", linear_scores)
    if exp:
        exp_scores = dict()
        print(f"\n\n----- RBF SVM on {preprocessing_} features [{balanced_}] -----")
        gammas = [0.1, 0.01, 0.001]
        if plot_exp_minDCF:
            Cs, minDCFs_dict = plt.compare_min_DCF_svm(DTR, DTE, LTR, LTE, 'rbf', gammas, balanced, preprocessing)
            if save_results:
                np.save(f"./results/experimental/svm/plots/plot_RBF_Cs", np.array(Cs))
                util.pickle_dump(f"./results/experimental/svm/plots/{balanced_}-{preprocessing_}-plot_RBF_minDCFs", minDCFs_dict)
        for application in applications:
            pi, Cfn, Cfp = application
            print("application:", application)
            piT_scores = dict()
            for pi_T in [0.5, 0.1, 0.9]:
                print("\tevaluating with pi_T:", pi_T)
                class_priors = [pi_T, 1-pi_T]
                params = util.build_params(kernel='rbf', C=1, gamma=0.1, priors=class_priors, balanced=balanced)
                scores = svm.train_non_linear_SVM(DTR, DTE, LTR, params)
                scores = np.array(scores).ravel()
                piT_scores[pi_T] = scores
                minDCF = dcf.compute_min_DCF(scores, LTE, pi, Cfn, Cfp)
                print(f"\t\tMinDCF: {minDCF}")
            exp_scores[application] = deepcopy(piT_scores)
        if save_results:
            util.pickle_dump(f"./results/experimental/svm/{balanced_}-{preprocessing_}-RBF-svm", exp_scores)
    if quadratic:
        quadratic_scores = dict()
        print(f"\n\n----- Quadratic SVM on {preprocessing_} features [{balanced_}] -----")
        if plot_quadratic_minDCF:
            Cs, minDCFs_dict = plt.compare_min_DCF_svm(DTR, DTE, LTR, LTE, 'poly', applications, balanced, preprocessing)
            if save_results:
                np.save(f"./results/experimental/svm/plots/plot_quadratic_Cs", np.array(Cs))
                util.pickle_dump(f"./results/experimental/svm/plots/{balanced_}-{preprocessing_}-plot_quadratic_minDCFs", minDCFs_dict)
        for application in applications:
            pi, Cfn, Cfp = application
            print("application:", application)
            piT_scores = dict()
            for pi_T in [0.5, 0.1, 0.9]:
                print("\tevaluating with pi_T:", pi_T)
                classPriors = [pi_T, 1-pi_T]
                params = util.build_params(kernel='poly', C=1, d=2, c=1, priors=classPriors, balanced=balanced)
                scores = svm.train_non_linear_SVM(DTR, DTE, LTR, params)
                scores = np.array(scores).ravel()
                piT_scores[pi_T] = scores
                minDCF = dcf.compute_min_DCF(scores, LTE, pi, Cfn, Cfp)
                print(f"\t\tMinDCF: {minDCF}")
            quadratic_scores[application] = deepcopy(piT_scores)
        if save_results:
            util.pickle_dump(f"./results/experimental/svm/{balanced_}-{preprocessing_}-quadratic-svm", quadratic_scores)



----- Quadratic SVM on z-norm features [not-balanced] -----


 50%|█████     | 3/6 [04:08<04:39, 93.17s/it]

### GMM (Gaussian Mixture Model)

In [None]:
ROOT = f"./results/gmm"
COV_TYPES = ["full-not-tied", "diag-not-tied", "full-tied", "diag-tied"]
cov_bool = [(True, False), (False, False), (True, True), (False, True)]
cov_dict = dict(zip(COV_TYPES, cov_bool))

APPLICATION = (0.5, 1, 1)
COMPONENTS = 32

G = 6
alpha = 0.1 
stopping_criterion = 1e-6
# number of components (2^G - 1)
psi = 0.01 
priors = [0.5, 0.5]
params = util.build_params(priors=priors, alpha=alpha, stopping_criterion=stopping_criterion, G=G, psi=psi)


    
DTR_Z, DTE_Z = prep.preprocess_Z_score(deepcopy(DTR), deepcopy(DTE))


In [None]:
plot = True
save_results = True

minDCFs_z_norm = dict()
minDCFs_raw = dict()

if GMM:
    print("----- GMM Classifier -----")
    for cov_type in COV_TYPES:
        print(f"\t[{cov_type}] covariance")
        
        params["full_cov"], params["tied"] = cov_dict[cov_type]
        llrs_z_norm = gmm.GMM(DTR_Z, DTE_Z, LTR, params)
        llrs_raw = gmm.GMM(DTR, DTE, LTR, params)
        
        
        for application in applications:
            minDCF = dcf.GMM_minDCF(llrs_raw, LTE, G, 0, application)
            minDCFs_raw[application] = minDCF
            minDCF = dcf.GMM_minDCF(llrs_z_norm, LTE, G, 0, application)
            minDCFs_z_norm[application] = minDCF
        if save_results:
            # raw results dump
            np.save(f"{ROOT}/experimental/{gmm.build_filename(False, False, COMPONENTS, cov_type)}", llrs_raw)
            util.pickle_dump(f"{ROOT}/experimental/{gmm.build_filename(True, False, COMPONENTS, cov_type)}", minDCFs_raw)
            # z-norm results dump
            np.save(f"{ROOT}/experimental/{gmm.build_filename(False, True, COMPONENTS, cov_type)}", llrs_z_norm)
            util.pickle_dump(f"{ROOT}/experimental/{gmm.build_filename(True, True, COMPONENTS, cov_type)}", minDCFs_z_norm)
            
        if plot:
            train_minDCFs_raw = gmm.load_results(True, False, COMPONENTS, cov_type, APPLICATION, True)
            train_minDCFs_z_norm = gmm.load_results(True, True, COMPONENTS, cov_type, APPLICATION, True)
            
            eval_minDCFs_raw = minDCFs_raw[APPLICATION]
            eval_minDCFs_z_norm = minDCFs_z_norm[APPLICATION]
            
            to_plot = [eval_minDCFs_raw, train_minDCFs_raw, eval_minDCFs_z_norm, train_minDCFs_z_norm]
            labels = [f"raw [Eval]", f"raw [Val]", f"z-norm [Eval]", f"z-norm [Val]"]
            plt.plot_minDCF_GMM_hist(to_plot, G, labels, f"gmm-{COMPONENTS}-{cov_type}", True, f" GMM {COMPONENTS}C {cov_type}")

    
